diff options
Diffstat (limited to 'arch')
269 files changed, 3980 insertions, 2174 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 0ce9d0f71f2a..12bfc1fa51f0 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -157,6 +157,7 @@ machine-$(CONFIG_ARCH_EBSA110) += ebsa110 machine-$(CONFIG_ARCH_EFM32) += efm32 machine-$(CONFIG_ARCH_EP93XX) += ep93xx machine-$(CONFIG_ARCH_EXYNOS) += exynos +machine-$(CONFIG_ARCH_FOOTBRIDGE) += footbridge machine-$(CONFIG_ARCH_GEMINI) += gemini machine-$(CONFIG_ARCH_HIGHBANK) += highbank machine-$(CONFIG_ARCH_HISI) += hisi @@ -205,7 +206,6 @@ machine-$(CONFIG_ARCH_VEXPRESS) += vexpress machine-$(CONFIG_ARCH_VT8500) += vt8500 machine-$(CONFIG_ARCH_W90X900) += w90x900 machine-$(CONFIG_ARCH_ZYNQ) += zynq -machine-$(CONFIG_FOOTBRIDGE) += footbridge machine-$(CONFIG_PLAT_SPEAR) += spear # Platform directory name. This list is sorted alphanumerically diff --git a/arch/arm/boot/bootp/Makefile b/arch/arm/boot/bootp/Makefile index c394e305447c..5761f0039133 100644 --- a/arch/arm/boot/bootp/Makefile +++ b/arch/arm/boot/bootp/Makefile @@ -5,6 +5,8 @@ # architecture-specific flags and dependencies. # +GCOV_PROFILE := n + LDFLAGS_bootp :=-p --no-undefined -X \ --defsym initrd_phys=$(INITRD_PHYS) \ --defsym params_phys=$(PARAMS_PHYS) -T diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 76a50ecae1c3..3ea230aa94b7 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -37,6 +37,8 @@ ifeq ($(CONFIG_ARM_VIRT_EXT),y) OBJS += hyp-stub.o endif +GCOV_PROFILE := n + # # Architecture dependencies # diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index e03fbf3c6889..b40cdadb1f87 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -447,22 +447,19 @@ gpmc,device-width = <2>; gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; - gpmc,cs-rd-off-ns = <40>; - gpmc,cs-wr-off-ns = <40>; + gpmc,cs-rd-off-ns = <80>; + gpmc,cs-wr-off-ns = <80>; gpmc,adv-on-ns = <0>; - gpmc,adv-rd-off-ns = <30>; - gpmc,adv-wr-off-ns = <30>; - gpmc,we-on-ns = <5>; - gpmc,we-off-ns = <25>; - gpmc,oe-on-ns = <2>; - gpmc,oe-off-ns = <20>; - gpmc,access-ns = <20>; - gpmc,wr-access-ns = <40>; - gpmc,rd-cycle-ns = <40>; - gpmc,wr-cycle-ns = <40>; - gpmc,wait-pin = <0>; - gpmc,wait-on-read; - gpmc,wait-on-write; + gpmc,adv-rd-off-ns = <60>; + gpmc,adv-wr-off-ns = <60>; + gpmc,we-on-ns = <10>; + gpmc,we-off-ns = <50>; + gpmc,oe-on-ns = <4>; + gpmc,oe-off-ns = <40>; + gpmc,access-ns = <40>; + gpmc,wr-access-ns = <80>; + gpmc,rd-cycle-ns = <80>; + gpmc,wr-cycle-ns = <80>; gpmc,bus-turnaround-ns = <0>; gpmc,cycle2cycle-delay-ns = <0>; gpmc,clk-activation-ns = <0>; diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index c6c58c1c00e3..6b675a02066f 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -423,10 +423,14 @@ status = "disabled"; lvds-channel@0 { + #address-cells = <1>; + #size-cells = <0>; reg = <0>; status = "disabled"; - port { + port@0 { + reg = <0>; + lvds0_in: endpoint { remote-endpoint = <&ipu_di0_lvds0>; }; @@ -434,10 +438,14 @@ }; lvds-channel@1 { + #address-cells = <1>; + #size-cells = <0>; reg = <1>; status = "disabled"; - port { + port@1 { + reg = <1>; + lvds1_in: endpoint { remote-endpoint = <&ipu_di1_lvds1>; }; diff --git a/arch/arm/boot/dts/k2e-clocks.dtsi b/arch/arm/boot/dts/k2e-clocks.dtsi index 598afe91c676..4773d6af66a0 100644 --- a/arch/arm/boot/dts/k2e-clocks.dtsi +++ b/arch/arm/boot/dts/k2e-clocks.dtsi @@ -40,7 +40,7 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,psc-clock"; clocks = <&chipclk16>; - clock-output-names = "usb"; + clock-output-names = "usb1"; reg = <0x02350004 0xb00>, <0x02350000 0x400>; reg-names = "control", "domain"; domain-id = <0>; @@ -60,8 +60,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,psc-clock"; clocks = <&chipclk12>; - clock-output-names = "pcie"; - reg = <0x0235006c 0xb00>, <0x02350000 0x400>; + clock-output-names = "pcie1"; + reg = <0x0235006c 0xb00>, <0x02350048 0x400>; reg-names = "control", "domain"; domain-id = <18>; }; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 1fe45d1f75ec..4361777a08d8 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -93,7 +93,7 @@ }; tv: connector { - compatible = "composite-connector"; + compatible = "composite-video-connector"; label = "tv"; port { diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts index b8698ca68647..429471aa7a1f 100644 --- a/arch/arm/boot/dts/omap5-cm-t54.dts +++ b/arch/arm/boot/dts/omap5-cm-t54.dts @@ -353,13 +353,12 @@ }; ldo8_reg: ldo8 { - /* VDD_3v0: Does not go anywhere */ + /* VDD_3V_GP: act led/serial console */ regulator-name = "ldo8"; regulator-min-microvolt = <3000000>; regulator-max-microvolt = <3000000>; + regulator-always-on; regulator-boot-on; - /* Unused */ - status = "disabled"; }; ldo9_reg: ldo9 { diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 1b650c85bdd0..72233b9c9d07 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -107,5 +107,6 @@ CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_USER=y CONFIG_DEBUG_LL=y +CONFIG_DEBUG_LL_UART_PL01X=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_LIBCRC32C=y diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig index d52b4ffe2012..ea49d37564da 100644 --- a/arch/arm/configs/versatile_defconfig +++ b/arch/arm/configs/versatile_defconfig @@ -82,5 +82,6 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_USER=y CONFIG_DEBUG_LL=y +CONFIG_DEBUG_LL_UART_PL01X=y CONFIG_FONTS=y CONFIG_FONT_ACORN_8x8=y diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S index 50013c0e2864..dcd01f3f0bb0 100644 --- a/arch/arm/crypto/sha1-armv7-neon.S +++ b/arch/arm/crypto/sha1-armv7-neon.S @@ -9,7 +9,7 @@ */ #include <linux/linkage.h> - +#include <asm/assembler.h> .syntax unified .code 32 @@ -61,13 +61,13 @@ #define RT3 r12 #define W0 q0 -#define W1 q1 +#define W1 q7 #define W2 q2 #define W3 q3 #define W4 q4 -#define W5 q5 -#define W6 q6 -#define W7 q7 +#define W5 q6 +#define W6 q5 +#define W7 q1 #define tmp0 q8 #define tmp1 q9 @@ -79,6 +79,11 @@ #define qK3 q14 #define qK4 q15 +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ARM_LE(code...) +#else +#define ARM_LE(code...) code +#endif /* Round function macros. */ @@ -150,45 +155,45 @@ #define W_PRECALC_00_15() \ add RWK, sp, #(WK_offs(0)); \ \ - vld1.32 {tmp0, tmp1}, [RDATA]!; \ - vrev32.8 W0, tmp0; /* big => little */ \ - vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vld1.32 {W0, W7}, [RDATA]!; \ + ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ + vld1.32 {W6, W5}, [RDATA]!; \ vadd.u32 tmp0, W0, curK; \ - vrev32.8 W7, tmp1; /* big => little */ \ - vrev32.8 W6, tmp2; /* big => little */ \ + ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ + ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ vadd.u32 tmp1, W7, curK; \ - vrev32.8 W5, tmp3; /* big => little */ \ + ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ vadd.u32 tmp2, W6, curK; \ vst1.32 {tmp0, tmp1}, [RWK]!; \ vadd.u32 tmp3, W5, curK; \ vst1.32 {tmp2, tmp3}, [RWK]; \ #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {tmp0, tmp1}, [RDATA]!; \ + vld1.32 {W0, W7}, [RDATA]!; \ #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ add RWK, sp, #(WK_offs(0)); \ #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W0, tmp0; /* big => little */ \ + ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vld1.32 {W6, W5}, [RDATA]!; \ #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ vadd.u32 tmp0, W0, curK; \ #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W7, tmp1; /* big => little */ \ + ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W6, tmp2; /* big => little */ \ + ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ vadd.u32 tmp1, W7, curK; \ #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vrev32.8 W5, tmp3; /* big => little */ \ + ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ vadd.u32 tmp2, W6, curK; \ diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 79ecb4f34ffb..10e78d00a0bb 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -466,6 +466,7 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) */ #define v7_exit_coherency_flush(level) \ asm volatile( \ + ".arch armv7-a \n\t" \ "stmfd sp!, {fp, ip} \n\t" \ "mrc p15, 0, r0, c1, c0, 0 @ get SCTLR \n\t" \ "bic r0, r0, #"__stringify(CR_C)" \n\t" \ diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 39eb16b0066f..bfe2a2f5a644 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -45,7 +45,7 @@ void *return_address(unsigned int); #else -extern inline void *return_address(unsigned int level) +static inline void *return_address(unsigned int level) { return NULL; } diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 69b746955fca..b9db269c6e61 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -149,6 +149,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu) static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) { + return kvm_vcpu_get_hsr(vcpu) & HSR_FSC; +} + +static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu) +{ return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; } diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6dfb404f6c46..53036e21756b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,6 +19,8 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include <linux/types.h> +#include <linux/kvm_types.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> @@ -40,9 +42,8 @@ #include <kvm/arm_vgic.h> -struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); -int kvm_target_cpu(void); +int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_reset_coprocs(struct kvm_vcpu *vcpu); @@ -149,20 +150,17 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); u64 kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -172,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, + unsigned long end) { return 0; } @@ -182,12 +181,16 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) return 0; } +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} + struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); -struct kvm_one_reg; int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); @@ -233,4 +236,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) int kvm_perf_init(void); int kvm_perf_teardown(void); +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5cc0b0f5f72f..3f688b458143 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -78,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) flush_pmd_entry(pte); } -static inline bool kvm_is_write_fault(unsigned long hsr) -{ - unsigned long hsr_ec = hsr >> HSR_EC_SHIFT; - if (hsr_ec == HSR_EC_IABT) - return false; - else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR)) - return false; - else - return true; -} - static inline void kvm_clean_pgd(pgd_t *pgd) { clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 2ec765c39ab4..18f5a554134f 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -49,12 +49,6 @@ extern void smp_init_cpus(void); extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); /* - * Boot a secondary CPU, and assign it the specified idle task. - * This also gives us the initial stack to use for this CPU. - */ -extern int boot_secondary(unsigned int cpu, struct task_struct *); - -/* * Called from platform specific assembly code, this is the * secondary CPU entry point. */ diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index 4651f6999b7d..e86c985b8c7a 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -63,8 +63,8 @@ static inline void syscall_get_arguments(struct task_struct *task, if (i + n > SYSCALL_MAX_ARGS) { unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; - pr_warning("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); + pr_warn("%s called with max args %d, handling only %d\n", + __func__, i + n, SYSCALL_MAX_ARGS); memset(args_bad, 0, n_bad * sizeof(args[0])); n = SYSCALL_MAX_ARGS - i; } @@ -88,8 +88,8 @@ static inline void syscall_set_arguments(struct task_struct *task, return; if (i + n > SYSCALL_MAX_ARGS) { - pr_warning("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); + pr_warn("%s called with max args %d, handling only %d\n", + __func__, i + n, SYSCALL_MAX_ARGS); n = SYSCALL_MAX_ARGS - i; } diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h index 83259b873333..5f833f7adba1 100644 --- a/arch/arm/include/asm/tls.h +++ b/arch/arm/include/asm/tls.h @@ -1,6 +1,9 @@ #ifndef __ASMARM_TLS_H #define __ASMARM_TLS_H +#include <linux/compiler.h> +#include <asm/thread_info.h> + #ifdef __ASSEMBLY__ #include <asm/asm-offsets.h> .macro switch_tls_none, base, tp, tpuser, tmp1, tmp2 @@ -50,6 +53,49 @@ #endif #ifndef __ASSEMBLY__ + +static inline void set_tls(unsigned long val) +{ + struct thread_info *thread; + + thread = current_thread_info(); + + thread->tp_value[0] = val; + + /* + * This code runs with preemption enabled and therefore must + * be reentrant with respect to switch_tls. + * + * We need to ensure ordering between the shadow state and the + * hardware state, so that we don't corrupt the hardware state + * with a stale shadow state during context switch. + * + * If we're preempted here, switch_tls will load TPIDRURO from + * thread_info upon resuming execution and the following mcr + * is merely redundant. + */ + barrier(); + + if (!tls_emu) { + if (has_tls_reg) { + asm("mcr p15, 0, %0, c13, c0, 3" + : : "r" (val)); + } else { +#ifdef CONFIG_KUSER_HELPERS + /* + * User space must never try to access this + * directly. Expect your app to break + * eventually if you do so. The user helper + * at 0xffff0fe0 must be used instead. (see + * entry-armv.S for details) + */ + *((unsigned int *)0xffff0ff0) = val; +#endif + } + + } +} + static inline unsigned long get_tpuser(void) { unsigned long reg = 0; @@ -59,5 +105,23 @@ static inline unsigned long get_tpuser(void) return reg; } + +static inline void set_tpuser(unsigned long val) +{ + /* Since TPIDRURW is fully context-switched (unlike TPIDRURO), + * we need not update thread_info. + */ + if (has_tls_reg && !tls_emu) { + asm("mcr p15, 0, %0, c13, c0, 2" + : : "r" (val)); + } +} + +static inline void flush_tls(void) +{ + set_tls(0); + set_tpuser(0); +} + #endif #endif /* __ASMARM_TLS_H */ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index a4cd7af475e9..4767eb9caa78 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -107,8 +107,11 @@ static inline void set_fs(mm_segment_t fs) extern int __get_user_1(void *); extern int __get_user_2(void *); extern int __get_user_4(void *); -extern int __get_user_lo8(void *); +extern int __get_user_32t_8(void *); extern int __get_user_8(void *); +extern int __get_user_64t_1(void *); +extern int __get_user_64t_2(void *); +extern int __get_user_64t_4(void *); #define __GUP_CLOBBER_1 "lr", "cc" #ifdef CONFIG_CPU_USE_DOMAINS @@ -117,7 +120,7 @@ extern int __get_user_8(void *); #define __GUP_CLOBBER_2 "lr", "cc" #endif #define __GUP_CLOBBER_4 "lr", "cc" -#define __GUP_CLOBBER_lo8 "lr", "cc" +#define __GUP_CLOBBER_32t_8 "lr", "cc" #define __GUP_CLOBBER_8 "lr", "cc" #define __get_user_x(__r2,__p,__e,__l,__s) \ @@ -131,12 +134,30 @@ extern int __get_user_8(void *); /* narrowing a double-word get into a single 32bit word register: */ #ifdef __ARMEB__ -#define __get_user_xb(__r2, __p, __e, __l, __s) \ - __get_user_x(__r2, __p, __e, __l, lo8) +#define __get_user_x_32t(__r2, __p, __e, __l, __s) \ + __get_user_x(__r2, __p, __e, __l, 32t_8) #else -#define __get_user_xb __get_user_x +#define __get_user_x_32t __get_user_x #endif +/* + * storing result into proper least significant word of 64bit target var, + * different only for big endian case where 64 bit __r2 lsw is r3: + */ +#ifdef __ARMEB__ +#define __get_user_x_64t(__r2, __p, __e, __l, __s) \ + __asm__ __volatile__ ( \ + __asmeq("%0", "r0") __asmeq("%1", "r2") \ + __asmeq("%3", "r1") \ + "bl __get_user_64t_" #__s \ + : "=&r" (__e), "=r" (__r2) \ + : "0" (__p), "r" (__l) \ + : __GUP_CLOBBER_##__s) +#else +#define __get_user_x_64t __get_user_x +#endif + + #define __get_user_check(x,p) \ ({ \ unsigned long __limit = current_thread_info()->addr_limit - 1; \ @@ -146,17 +167,26 @@ extern int __get_user_8(void *); register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ case 1: \ - __get_user_x(__r2, __p, __e, __l, 1); \ + if (sizeof((x)) >= 8) \ + __get_user_x_64t(__r2, __p, __e, __l, 1); \ + else \ + __get_user_x(__r2, __p, __e, __l, 1); \ break; \ case 2: \ - __get_user_x(__r2, __p, __e, __l, 2); \ + if (sizeof((x)) >= 8) \ + __get_user_x_64t(__r2, __p, __e, __l, 2); \ + else \ + __get_user_x(__r2, __p, __e, __l, 2); \ break; \ case 4: \ - __get_user_x(__r2, __p, __e, __l, 4); \ + if (sizeof((x)) >= 8) \ + __get_user_x_64t(__r2, __p, __e, __l, 4); \ + else \ + __get_user_x(__r2, __p, __e, __l, 4); \ break; \ case 8: \ if (sizeof((x)) < 8) \ - __get_user_xb(__r2, __p, __e, __l, 4); \ + __get_user_x_32t(__r2, __p, __e, __l, 4); \ else \ __get_user_x(__r2, __p, __e, __l, 8); \ break; \ diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h index 1109017499e5..e8275ea88e88 100644 --- a/arch/arm/include/asm/xen/page-coherent.h +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -26,25 +26,14 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page, __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); } -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, +void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - if (__generic_dma_ops(hwdev)->unmap_page) - __generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); -} + struct dma_attrs *attrs); -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - if (__generic_dma_ops(hwdev)->sync_single_for_cpu) - __generic_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); -} +void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir); + +void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir); -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - if (__generic_dma_ops(hwdev)->sync_single_for_device) - __generic_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); -} #endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */ diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index ded062f9b358..135c24a5ba26 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -33,7 +33,6 @@ typedef struct xpaddr { #define INVALID_P2M_ENTRY (~0UL) unsigned long __pfn_to_mfn(unsigned long pfn); -unsigned long __mfn_to_pfn(unsigned long mfn); extern struct rb_root phys_to_mach; static inline unsigned long pfn_to_mfn(unsigned long pfn) @@ -51,14 +50,6 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) static inline unsigned long mfn_to_pfn(unsigned long mfn) { - unsigned long pfn; - - if (phys_to_mach.rb_node != NULL) { - pfn = __mfn_to_pfn(mfn); - if (pfn != INVALID_P2M_ENTRY) - return pfn; - } - return mfn; } diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index e6ebdd3471e5..09ee408c1a67 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -25,6 +25,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE +#define __KVM_HAVE_READONLY_MEM #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -173,6 +174,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index f7b450f97e68..a88671cfe1ff 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -98,6 +98,14 @@ EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); EXPORT_SYMBOL(__get_user_4); +EXPORT_SYMBOL(__get_user_8); + +#ifdef __ARMEB__ +EXPORT_SYMBOL(__get_user_64t_1); +EXPORT_SYMBOL(__get_user_64t_2); +EXPORT_SYMBOL(__get_user_64t_4); +EXPORT_SYMBOL(__get_user_32t_8); +#endif EXPORT_SYMBOL(__put_user_1); EXPORT_SYMBOL(__put_user_2); diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index 7807ef58a2ab..528f8af2addb 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -130,7 +130,7 @@ static int __init parse_tag_cmdline(const struct tag *tag) strlcat(default_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE); #elif defined(CONFIG_CMDLINE_FORCE) - pr_warning("Ignoring tag cmdline (using the default kernel command line)\n"); + pr_warn("Ignoring tag cmdline (using the default kernel command line)\n"); #else strlcpy(default_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 36276cdccfbc..2f5555d307b3 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -146,7 +146,7 @@ ENDPROC(__und_invalid) #define SPFIX(code...) #endif - .macro svc_entry, stack_hole=0 + .macro svc_entry, stack_hole=0, trace=1 UNWIND(.fnstart ) UNWIND(.save {r0 - pc} ) sub sp, sp, #(S_FRAME_SIZE + \stack_hole - 4) @@ -182,9 +182,11 @@ ENDPROC(__und_invalid) @ stmia r7, {r2 - r6} + .if \trace #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif + .endif .endm .align 5 @@ -295,6 +297,15 @@ __pabt_svc: ENDPROC(__pabt_svc) .align 5 +__fiq_svc: + svc_entry trace=0 + mov r0, sp @ struct pt_regs *regs + bl handle_fiq_as_nmi + svc_exit_via_fiq + UNWIND(.fnend ) +ENDPROC(__fiq_svc) + + .align 5 .LCcralign: .word cr_alignment #ifdef MULTI_DABORT @@ -305,6 +316,46 @@ ENDPROC(__pabt_svc) .word fp_enter /* + * Abort mode handlers + */ + +@ +@ Taking a FIQ in abort mode is similar to taking a FIQ in SVC mode +@ and reuses the same macros. However in abort mode we must also +@ save/restore lr_abt and spsr_abt to make nested aborts safe. +@ + .align 5 +__fiq_abt: + svc_entry trace=0 + + ARM( msr cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT ) + THUMB( mov r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT ) + THUMB( msr cpsr_c, r0 ) + mov r1, lr @ Save lr_abt + mrs r2, spsr @ Save spsr_abt, abort is now safe + ARM( msr cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT ) + THUMB( mov r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT ) + THUMB( msr cpsr_c, r0 ) + stmfd sp!, {r1 - r2} + + add r0, sp, #8 @ struct pt_regs *regs + bl handle_fiq_as_nmi + + ldmfd sp!, {r1 - r2} + ARM( msr cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT ) + THUMB( mov r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT ) + THUMB( msr cpsr_c, r0 ) + mov lr, r1 @ Restore lr_abt, abort is unsafe + msr spsr_cxsf, r2 @ Restore spsr_abt + ARM( msr cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT ) + THUMB( mov r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT ) + THUMB( msr cpsr_c, r0 ) + + svc_exit_via_fiq + UNWIND(.fnend ) +ENDPROC(__fiq_abt) + +/* * User mode handlers * * EABI note: sp_svc is always 64-bit aligned here, so should S_FRAME_SIZE @@ -314,13 +365,16 @@ ENDPROC(__pabt_svc) #error "sizeof(struct pt_regs) must be a multiple of 8" #endif - .macro usr_entry + .macro usr_entry, trace=1 UNWIND(.fnstart ) UNWIND(.cantunwind ) @ don't unwind the user space sub sp, sp, #S_FRAME_SIZE ARM( stmib sp, {r1 - r12} ) THUMB( stmia sp, {r0 - r12} ) + ATRAP( mrc p15, 0, r7, c1, c0, 0) + ATRAP( ldr r8, .LCcralign) + ldmia r0, {r3 - r5} add r0, sp, #S_PC @ here for interlock avoidance mov r6, #-1 @ "" "" "" "" @@ -328,6 +382,8 @@ ENDPROC(__pabt_svc) str r3, [sp] @ save the "real" r0 copied @ from the exception stack + ATRAP( ldr r8, [r8, #0]) + @ @ We are now ready to fill in the remaining blanks on the stack: @ @@ -341,20 +397,21 @@ ENDPROC(__pabt_svc) ARM( stmdb r0, {sp, lr}^ ) THUMB( store_user_sp_lr r0, r1, S_SP - S_PC ) - @ @ Enable the alignment trap while in kernel mode - @ - alignment_trap r0, .LCcralign + ATRAP( teq r8, r7) + ATRAP( mcrne p15, 0, r8, c1, c0, 0) @ @ Clear FP to mark the first stack frame @ zero_fp + .if \trace #ifdef CONFIG_IRQSOFF_TRACER bl trace_hardirqs_off #endif ct_user_exit save = 0 + .endif .endm .macro kuser_cmpxchg_check @@ -683,6 +740,17 @@ ENTRY(ret_from_exception) ENDPROC(__pabt_usr) ENDPROC(ret_from_exception) + .align 5 +__fiq_usr: + usr_entry trace=0 + kuser_cmpxchg_check + mov r0, sp @ struct pt_regs *regs + bl handle_fiq_as_nmi + get_thread_info tsk + restore_user_regs fast = 0, offset = 0 + UNWIND(.fnend ) +ENDPROC(__fiq_usr) + /* * Register switch for ARMv3 and ARMv4 processors * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info @@ -1118,17 +1186,29 @@ vector_addrexcptn: b vector_addrexcptn /*============================================================================= - * Undefined FIQs + * FIQ "NMI" handler *----------------------------------------------------------------------------- - * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC - * MUST PRESERVE SVC SPSR, but need to switch to SVC mode to show our msg. - * Basically to switch modes, we *HAVE* to clobber one register... brain - * damage alert! I don't think that we can execute any code in here in any - * other mode than FIQ... Ok you can switch to another mode, but you can't - * get out of that mode without clobbering one register. + * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86 + * systems. */ -vector_fiq: - subs pc, lr, #4 + vector_stub fiq, FIQ_MODE, 4 + + .long __fiq_usr @ 0 (USR_26 / USR_32) + .long __fiq_svc @ 1 (FIQ_26 / FIQ_32) + .long __fiq_svc @ 2 (IRQ_26 / IRQ_32) + .long __fiq_svc @ 3 (SVC_26 / SVC_32) + .long __fiq_svc @ 4 + .long __fiq_svc @ 5 + .long __fiq_svc @ 6 + .long __fiq_abt @ 7 + .long __fiq_svc @ 8 + .long __fiq_svc @ 9 + .long __fiq_svc @ a + .long __fiq_svc @ b + .long __fiq_svc @ c + .long __fiq_svc @ d + .long __fiq_svc @ e + .long __fiq_svc @ f .globl vector_fiq_offset .equ vector_fiq_offset, vector_fiq diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index e52fe5a2d843..6bb09d4abdea 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -366,7 +366,7 @@ ENTRY(vector_swi) str r0, [sp, #S_OLD_R0] @ Save OLD_R0 #endif zero_fp - alignment_trap ip, __cr_alignment + alignment_trap r10, ip, __cr_alignment enable_irq ct_user_exit get_thread_info tsk diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 2fdf8679b46e..4176df721bf0 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -37,11 +37,19 @@ #endif .endm - .macro alignment_trap, rtemp, label #ifdef CONFIG_ALIGNMENT_TRAP - ldr \rtemp, \label - ldr \rtemp, [\rtemp] - mcr p15, 0, \rtemp, c1, c0 +#define ATRAP(x...) x +#else +#define ATRAP(x...) +#endif + + .macro alignment_trap, rtmp1, rtmp2, label +#ifdef CONFIG_ALIGNMENT_TRAP + mrc p15, 0, \rtmp2, c1, c0, 0 + ldr \rtmp1, \label + ldr \rtmp1, [\rtmp1] + teq \rtmp1, \rtmp2 + mcrne p15, 0, \rtmp1, c1, c0, 0 #endif .endm @@ -216,6 +224,34 @@ ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr .endm + @ + @ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit + @ + @ This macro acts in a similar manner to svc_exit but switches to FIQ + @ mode to restore the final part of the register state. + @ + @ We cannot use the normal svc_exit procedure because that would + @ clobber spsr_svc (FIQ could be delivered during the first few + @ instructions of vector_swi meaning its contents have not been + @ saved anywhere). + @ + @ Note that, unlike svc_exit, this macro also does not allow a caller + @ supplied rpsr. This is because the FIQ exceptions are not re-entrant + @ and the handlers cannot call into the scheduler (meaning the value + @ on the stack remains correct). + @ + .macro svc_exit_via_fiq + mov r0, sp + ldmib r0, {r1 - r14} @ abort is deadly from here onward (it will + @ clobber state restored below) + msr cpsr_c, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT + add r8, r0, #S_PC + ldr r9, [r0, #S_PSR] + msr spsr_cxsf, r9 + ldr r0, [r0, #S_R0] + ldmia r8, {pc}^ + .endm + .macro restore_user_regs, fast = 0, offset = 0 ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC]! @ get pc @@ -267,6 +303,25 @@ rfeia sp! .endm + @ + @ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit + @ + @ For full details see non-Thumb implementation above. + @ + .macro svc_exit_via_fiq + add r0, sp, #S_R2 + ldr lr, [sp, #S_LR] + ldr sp, [sp, #S_SP] @ abort is deadly from here onward (it will + @ clobber state restored below) + ldmia r0, {r2 - r12} + mov r1, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT + msr cpsr_c, r1 + sub r0, #S_R2 + add r8, r0, #S_PC + ldmia r0, {r0 - r1} + rfeia r8 + .endm + #ifdef CONFIG_CPU_V7M /* * Note we don't need to do clrex here as clearing the local monitor is diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index 918875d96d5d..b37752a96652 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -52,7 +52,8 @@ (unsigned)&vector_fiq_offset; \ }) -static unsigned long no_fiq_insn; +static unsigned long dfl_fiq_insn; +static struct pt_regs dfl_fiq_regs; /* Default reacquire function * - we always relinquish FIQ control @@ -60,8 +61,15 @@ static unsigned long no_fiq_insn; */ static int fiq_def_op(void *ref, int relinquish) { - if (!relinquish) - set_fiq_handler(&no_fiq_insn, sizeof(no_fiq_insn)); + if (!relinquish) { + /* Restore default handler and registers */ + local_fiq_disable(); + set_fiq_regs(&dfl_fiq_regs); + set_fiq_handler(&dfl_fiq_insn, sizeof(dfl_fiq_insn)); + local_fiq_enable(); + + /* FIXME: notify irq controller to standard enable FIQs */ + } return 0; } @@ -150,6 +158,7 @@ EXPORT_SYMBOL(disable_fiq); void __init init_FIQ(int start) { unsigned offset = FIQ_OFFSET; - no_fiq_insn = *(unsigned long *)(0xffff0000 + offset); + dfl_fiq_insn = *(unsigned long *)(0xffff0000 + offset); + get_fiq_regs(&dfl_fiq_regs); fiq_start = start; } diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 4d963fb66e3f..b5b452f90f76 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -113,8 +113,8 @@ static u32 read_wb_reg(int n) GEN_READ_WB_REG_CASES(ARM_OP2_WVR, val); GEN_READ_WB_REG_CASES(ARM_OP2_WCR, val); default: - pr_warning("attempt to read from unknown breakpoint " - "register %d\n", n); + pr_warn("attempt to read from unknown breakpoint register %d\n", + n); } return val; @@ -128,8 +128,8 @@ static void write_wb_reg(int n, u32 val) GEN_WRITE_WB_REG_CASES(ARM_OP2_WVR, val); GEN_WRITE_WB_REG_CASES(ARM_OP2_WCR, val); default: - pr_warning("attempt to write to unknown breakpoint " - "register %d\n", n); + pr_warn("attempt to write to unknown breakpoint register %d\n", + n); } isb(); } @@ -292,7 +292,7 @@ int hw_breakpoint_slots(int type) case TYPE_DATA: return get_num_wrps(); default: - pr_warning("unknown slot type: %d\n", type); + pr_warn("unknown slot type: %d\n", type); return 0; } } @@ -365,7 +365,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) } if (i == max_slots) { - pr_warning("Can't find any breakpoint slot\n"); + pr_warn("Can't find any breakpoint slot\n"); return -EBUSY; } @@ -417,7 +417,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) } if (i == max_slots) { - pr_warning("Can't find any breakpoint slot\n"); + pr_warn("Can't find any breakpoint slot\n"); return; } @@ -894,8 +894,8 @@ static int debug_reg_trap(struct pt_regs *regs, unsigned int instr) { int cpu = smp_processor_id(); - pr_warning("Debug register access (0x%x) caused undefined instruction on CPU %d\n", - instr, cpu); + pr_warn("Debug register access (0x%x) caused undefined instruction on CPU %d\n", + instr, cpu); /* Set the error flag for this CPU and skip the faulting instruction. */ cpumask_set_cpu(cpu, &debug_err_mask); diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 2c4257604513..88de943eebd6 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -175,7 +175,7 @@ static bool migrate_one_irq(struct irq_desc *desc) c = irq_data_get_irq_chip(d); if (!c->irq_set_affinity) pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) cpumask_copy(d->affinity, affinity); return ret; @@ -205,8 +205,8 @@ void migrate_irqs(void) raw_spin_unlock(&desc->lock); if (affinity_broken && printk_ratelimit()) - pr_warning("IRQ%u no longer affine to CPU%u\n", i, - smp_processor_id()); + pr_warn("IRQ%u no longer affine to CPU%u\n", + i, smp_processor_id()); } local_irq_restore(flags); diff --git a/arch/arm/kernel/kprobes-test.c b/arch/arm/kernel/kprobes-test.c index 08d731294bcd..b206d7790c77 100644 --- a/arch/arm/kernel/kprobes-test.c +++ b/arch/arm/kernel/kprobes-test.c @@ -110,10 +110,13 @@ * * @ TESTCASE_START * bl __kprobes_test_case_start - * @ start of inline data... + * .pushsection .rodata + * "10: * .ascii "mov r0, r7" @ text title for test case * .byte 0 - * .align 2, 0 + * .popsection + * @ start of inline data... + * .word 10b @ pointer to title in .rodata section * * @ TEST_ARG_REG * .byte ARG_TYPE_REG @@ -971,7 +974,7 @@ void __naked __kprobes_test_case_start(void) __asm__ __volatile__ ( "stmdb sp!, {r4-r11} \n\t" "sub sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "bic r0, lr, #1 @ r0 = inline title string \n\t" + "bic r0, lr, #1 @ r0 = inline data \n\t" "mov r1, sp \n\t" "bl kprobes_test_case_start \n\t" "bx r0 \n\t" @@ -1349,15 +1352,14 @@ static unsigned long next_instruction(unsigned long pc) return pc + 4; } -static uintptr_t __used kprobes_test_case_start(const char *title, void *stack) +static uintptr_t __used kprobes_test_case_start(const char **title, void *stack) { struct test_arg *args; struct test_arg_end *end_arg; unsigned long test_code; - args = (struct test_arg *)PTR_ALIGN(title + strlen(title) + 1, 4); - - current_title = title; + current_title = *title++; + args = (struct test_arg *)title; current_args = args; current_stack = stack; diff --git a/arch/arm/kernel/kprobes-test.h b/arch/arm/kernel/kprobes-test.h index eecc90a0fd91..4430990e90e7 100644 --- a/arch/arm/kernel/kprobes-test.h +++ b/arch/arm/kernel/kprobes-test.h @@ -111,11 +111,14 @@ struct test_arg_end { #define TESTCASE_START(title) \ __asm__ __volatile__ ( \ "bl __kprobes_test_case_start \n\t" \ + ".pushsection .rodata \n\t" \ + "10: \n\t" \ /* don't use .asciz here as 'title' may be */ \ /* multiple strings to be concatenated. */ \ ".ascii "#title" \n\t" \ ".byte 0 \n\t" \ - ".align 2, 0 \n\t" + ".popsection \n\t" \ + ".word 10b \n\t" #define TEST_ARG_REG(reg, val) \ ".byte "__stringify(ARG_TYPE_REG)" \n\t" \ diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index e6a6edbec613..eb2c4d55666b 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -76,21 +76,15 @@ static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) static void cpu_pmu_enable_percpu_irq(void *data) { - struct arm_pmu *cpu_pmu = data; - struct platform_device *pmu_device = cpu_pmu->plat_device; - int irq = platform_get_irq(pmu_device, 0); + int irq = *(int *)data; enable_percpu_irq(irq, IRQ_TYPE_NONE); - cpumask_set_cpu(smp_processor_id(), &cpu_pmu->active_irqs); } static void cpu_pmu_disable_percpu_irq(void *data) { - struct arm_pmu *cpu_pmu = data; - struct platform_device *pmu_device = cpu_pmu->plat_device; - int irq = platform_get_irq(pmu_device, 0); + int irq = *(int *)data; - cpumask_clear_cpu(smp_processor_id(), &cpu_pmu->active_irqs); disable_percpu_irq(irq); } @@ -103,7 +97,7 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) irq = platform_get_irq(pmu_device, 0); if (irq >= 0 && irq_is_percpu(irq)) { - on_each_cpu(cpu_pmu_disable_percpu_irq, cpu_pmu, 1); + on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); free_percpu_irq(irq, &percpu_pmu); } else { for (i = 0; i < irqs; ++i) { @@ -138,7 +132,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) irq); return err; } - on_each_cpu(cpu_pmu_enable_percpu_irq, cpu_pmu, 1); + on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); } else { for (i = 0; i < irqs; ++i) { err = 0; @@ -152,8 +146,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) * continue. Otherwise, continue without this interrupt. */ if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, i); continue; } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 81ef686a91ca..a0a691d1cbee 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -306,7 +306,6 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { - printk("\n"); __show_regs(regs); dump_stack(); } @@ -334,6 +333,8 @@ void flush_thread(void) memset(&tsk->thread.debug, 0, sizeof(struct debug_info)); memset(&thread->fpstate, 0, sizeof(union fp_state)); + flush_tls(); + thread_notify(THREAD_NOTIFY_FLUSH, thread); } @@ -472,19 +473,57 @@ int in_gate_area_no_mm(unsigned long addr) const char *arch_vma_name(struct vm_area_struct *vma) { - return is_gate_vma(vma) ? "[vectors]" : - (vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ? - "[sigpage]" : NULL; + return is_gate_vma(vma) ? "[vectors]" : NULL; +} + +/* If possible, provide a placement hint at a random offset from the + * stack for the signal page. + */ +static unsigned long sigpage_addr(const struct mm_struct *mm, + unsigned int npages) +{ + unsigned long offset; + unsigned long first; + unsigned long last; + unsigned long addr; + unsigned int slots; + + first = PAGE_ALIGN(mm->start_stack); + + last = TASK_SIZE - (npages << PAGE_SHIFT); + + /* No room after stack? */ + if (first > last) + return 0; + + /* Just enough room? */ + if (first == last) + return first; + + slots = ((last - first) >> PAGE_SHIFT) + 1; + + offset = get_random_int() % slots; + + addr = first + (offset << PAGE_SHIFT); + + return addr; } static struct page *signal_page; extern struct page *get_signal_page(void); +static const struct vm_special_mapping sigpage_mapping = { + .name = "[sigpage]", + .pages = &signal_page, +}; + int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; unsigned long addr; - int ret; + unsigned long hint; + int ret = 0; if (!signal_page) signal_page = get_signal_page(); @@ -492,18 +531,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return -ENOMEM; down_write(&mm->mmap_sem); - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + hint = sigpage_addr(mm, 1); + addr = get_unmapped_area(NULL, hint, PAGE_SIZE, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } - ret = install_special_mapping(mm, addr, PAGE_SIZE, + vma = _install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, - &signal_page); + &sigpage_mapping); + + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto up_fail; + } - if (ret == 0) - mm->context.sigpage = addr; + mm->context.sigpage = addr; up_fail: up_write(&mm->mmap_sem); diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index fafedd86885d..98ea4b7eb406 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -59,15 +59,6 @@ void *return_address(unsigned int level) #else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ -#if defined(CONFIG_ARM_UNWIND) -#warning "TODO: return_address should use unwind tables" -#endif - -void *return_address(unsigned int level) -{ - return NULL; -} - #endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */ EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 84db893dedc2..c03106378b49 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -133,6 +133,7 @@ struct stack { u32 irq[3]; u32 abt[3]; u32 und[3]; + u32 fiq[3]; } ____cacheline_aligned; #ifndef CONFIG_CPU_V7M @@ -470,7 +471,10 @@ void notrace cpu_init(void) "msr cpsr_c, %5\n\t" "add r14, %0, %6\n\t" "mov sp, r14\n\t" - "msr cpsr_c, %7" + "msr cpsr_c, %7\n\t" + "add r14, %0, %8\n\t" + "mov sp, r14\n\t" + "msr cpsr_c, %9" : : "r" (stk), PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), @@ -479,6 +483,8 @@ void notrace cpu_init(void) "I" (offsetof(struct stack, abt[0])), PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE), "I" (offsetof(struct stack, und[0])), + PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), + "I" (offsetof(struct stack, fiq[0])), PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE) : "r14"); #endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 9388a3d479e1..39c74a2c3df9 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -95,6 +95,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; + if (!smp_ops.smp_boot_secondary) + return -ENOSYS; + /* * We need to tell the secondary core where to find * its stack and the page tables. @@ -113,7 +116,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) /* * Now bring the CPU into our world. */ - ret = boot_secondary(cpu, idle); + ret = smp_ops.smp_boot_secondary(cpu, idle); if (ret == 0) { /* * CPU was successfully started, wait for it @@ -142,13 +145,6 @@ void __init smp_init_cpus(void) smp_ops.smp_init_cpus(); } -int boot_secondary(unsigned int cpu, struct task_struct *idle) -{ - if (smp_ops.smp_boot_secondary) - return smp_ops.smp_boot_secondary(cpu, idle); - return -ENOSYS; -} - int platform_can_cpu_hotplug(void) { #ifdef CONFIG_HOTPLUG_CPU @@ -650,7 +646,7 @@ void smp_send_stop(void) udelay(1); if (num_online_cpus() > 1) - pr_warning("SMP: failed to stop secondary CPUs\n"); + pr_warn("SMP: failed to stop secondary CPUs\n"); } /* diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 67ca8578c6d8..587fdfe1a72c 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -142,14 +142,6 @@ static int emulate_swpX(unsigned int address, unsigned int *data, while (1) { unsigned long temp; - /* - * Barrier required between accessing protected resource and - * releasing a lock for it. Legacy code might not have done - * this, and we cannot determine that this is not the case - * being emulated, so insert always. - */ - smp_mb(); - if (type == TYPE_SWPB) __user_swpb_asm(*data, address, res, temp); else @@ -162,13 +154,6 @@ static int emulate_swpX(unsigned int address, unsigned int *data, } if (res == 0) { - /* - * Barrier also required between acquiring a lock for a - * protected resource and accessing the resource. Inserted for - * same reason as above. - */ - smp_mb(); - if (type == TYPE_SWPB) swpbcounter++; else diff --git a/arch/arm/kernel/thumbee.c b/arch/arm/kernel/thumbee.c index 7b8403b76666..80f0d69205e7 100644 --- a/arch/arm/kernel/thumbee.c +++ b/arch/arm/kernel/thumbee.c @@ -45,7 +45,7 @@ static int thumbee_notifier(struct notifier_block *self, unsigned long cmd, void switch (cmd) { case THREAD_NOTIFY_FLUSH: - thread->thumbee_state = 0; + teehbr_write(0); break; case THREAD_NOTIFY_SWITCH: current_thread_info()->thumbee_state = teehbr_read(); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index c8e4bb714944..0c8b10801d36 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -25,6 +25,7 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/sched.h> +#include <linux/irq.h> #include <linux/atomic.h> #include <asm/cacheflush.h> @@ -460,10 +461,29 @@ die_sig: arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6); } -asmlinkage void do_unexp_fiq (struct pt_regs *regs) +/* + * Handle FIQ similarly to NMI on x86 systems. + * + * The runtime environment for NMIs is extremely restrictive + * (NMIs can pre-empt critical sections meaning almost all locking is + * forbidden) meaning this default FIQ handling must only be used in + * circumstances where non-maskability improves robustness, such as + * watchdog or debug logic. + * + * This handler is not appropriate for general purpose use in drivers + * platform code and can be overrideen using set_fiq_handler. + */ +asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs) { - printk("Hmm. Unexpected FIQ received, but trying to continue\n"); - printk("You may have a hardware problem...\n"); + struct pt_regs *old_regs = set_irq_regs(regs); + + nmi_enter(); + + /* nop. FIQ handlers for special arch/arm features can be added here. */ + + nmi_exit(); + + set_irq_regs(old_regs); } /* @@ -581,7 +601,6 @@ do_cache_op(unsigned long start, unsigned long end, int flags) #define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE) asmlinkage int arm_syscall(int no, struct pt_regs *regs) { - struct thread_info *thread = current_thread_info(); siginfo_t info; if ((no >> 16) != (__ARM_NR_BASE>> 16)) @@ -632,21 +651,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) return regs->ARM_r0; case NR(set_tls): - thread->tp_value[0] = regs->ARM_r0; - if (tls_emu) - return 0; - if (has_tls_reg) { - asm ("mcr p15, 0, %0, c13, c0, 3" - : : "r" (regs->ARM_r0)); - } else { - /* - * User space must never try to access this directly. - * Expect your app to break eventually if you do so. - * The user helper at 0xffff0fe0 must be used instead. - * (see entry-armv.S for details) - */ - *((unsigned int *)0xffff0ff0) = regs->ARM_r0; - } + set_tls(regs->ARM_r0); return 0; #ifdef CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index a61a1dfbb0db..cbb85c5fabf9 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -157,7 +157,7 @@ static const struct unwind_idx *search_index(unsigned long addr, if (likely(start->addr_offset <= addr_prel31)) return start; else { - pr_warning("unwind: Unknown symbol address %08lx\n", addr); + pr_warn("unwind: Unknown symbol address %08lx\n", addr); return NULL; } } @@ -225,7 +225,7 @@ static unsigned long unwind_get_byte(struct unwind_ctrl_block *ctrl) unsigned long ret; if (ctrl->entries <= 0) { - pr_warning("unwind: Corrupt unwind table\n"); + pr_warn("unwind: Corrupt unwind table\n"); return 0; } @@ -333,8 +333,8 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) insn = (insn << 8) | unwind_get_byte(ctrl); mask = insn & 0x0fff; if (mask == 0) { - pr_warning("unwind: 'Refuse to unwind' instruction %04lx\n", - insn); + pr_warn("unwind: 'Refuse to unwind' instruction %04lx\n", + insn); return -URC_FAILURE; } @@ -357,8 +357,8 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) unsigned long mask = unwind_get_byte(ctrl); if (mask == 0 || mask & 0xf0) { - pr_warning("unwind: Spare encoding %04lx\n", - (insn << 8) | mask); + pr_warn("unwind: Spare encoding %04lx\n", + (insn << 8) | mask); return -URC_FAILURE; } @@ -370,7 +370,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) ctrl->vrs[SP] += 0x204 + (uleb128 << 2); } else { - pr_warning("unwind: Unhandled instruction %02lx\n", insn); + pr_warn("unwind: Unhandled instruction %02lx\n", insn); return -URC_FAILURE; } @@ -403,7 +403,7 @@ int unwind_frame(struct stackframe *frame) idx = unwind_find_idx(frame->pc); if (!idx) { - pr_warning("unwind: Index not found %08lx\n", frame->pc); + pr_warn("unwind: Index not found %08lx\n", frame->pc); return -URC_FAILURE; } @@ -422,8 +422,8 @@ int unwind_frame(struct stackframe *frame) /* only personality routine 0 supported in the index */ ctrl.insn = &idx->insn; else { - pr_warning("unwind: Unsupported personality routine %08lx in the index at %p\n", - idx->insn, idx); + pr_warn("unwind: Unsupported personality routine %08lx in the index at %p\n", + idx->insn, idx); return -URC_FAILURE; } @@ -435,8 +435,8 @@ int unwind_frame(struct stackframe *frame) ctrl.byte = 1; ctrl.entries = 1 + ((*ctrl.insn & 0x00ff0000) >> 16); } else { - pr_warning("unwind: Unsupported personality routine %08lx at %p\n", - *ctrl.insn, ctrl.insn); + pr_warn("unwind: Unsupported personality routine %08lx at %p\n", + *ctrl.insn, ctrl.insn); return -URC_FAILURE; } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 6f57cb94367f..8e95aa47457a 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -219,8 +219,8 @@ SECTIONS __data_loc = ALIGN(4); /* location in binary */ . = PAGE_OFFSET + TEXT_OFFSET; #else - __init_end = .; . = ALIGN(THREAD_SIZE); + __init_end = .; __data_loc = .; #endif diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a99e0cdf8ba2..779605122f32 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -82,12 +82,12 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void) /** * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. */ -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) { return &kvm_arm_running_vcpu; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } @@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - void kvm_arch_check_processor_compat(void *rtn) { *(int *)rtn = 0; } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} /** * kvm_arch_init_vm - initializes a VM data structure @@ -172,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm->vcpus[i] = NULL; } } + + kvm_vgic_destroy(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -188,6 +179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI: case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_READONLY_MEM: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -253,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { kvm_mmu_free_memory_caches(vcpu); kvm_timer_vcpu_terminate(vcpu); + kvm_vgic_vcpu_destroy(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -268,26 +261,15 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { - int ret; - /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; - /* Set up VGIC */ - ret = kvm_vgic_vcpu_init(vcpu); - if (ret) - return ret; - /* Set up the timer */ kvm_timer_vcpu_init(vcpu); return 0; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; @@ -428,9 +410,9 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm->arch.pgd); + BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; - kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; - kvm->arch.vttbr |= vmid; + kvm->arch.vttbr = pgd_phys | vmid; spin_unlock(&kvm_vmid_lock); } diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 37a0fe1bb9bb..7928dbdf2102 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -791,7 +791,7 @@ static bool is_valid_cache(u32 val) u32 level, ctype; if (val >= CSSELR_MAX) - return -ENOENT; + return false; /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ level = (val >> 1); diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 813e49258690..cc0b78769bd8 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -163,7 +163,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); if (ret != 0) - return ret; + return -EFAULT; return kvm_arm_timer_set_reg(vcpu, reg->id, val); } diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16e7994bf347..eea03069161b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -746,22 +746,29 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) return false; } +static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; + + return kvm_vcpu_dabt_iswrite(vcpu); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_memory_slot *memslot, + struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) { int ret; bool write_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; - unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; pfn_t pfn; pgprot_t mem_type = PAGE_S2; - write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); + write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; @@ -863,7 +870,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) unsigned long fault_status; phys_addr_t fault_ipa; struct kvm_memory_slot *memslot; - bool is_iabt; + unsigned long hva; + bool is_iabt, write_fault, writable; gfn_t gfn; int ret, idx; @@ -874,17 +882,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - fault_status = kvm_vcpu_trap_get_fault(vcpu); + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { - kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", - kvm_vcpu_trap_get_class(vcpu), fault_status); + kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", + kvm_vcpu_trap_get_class(vcpu), + (unsigned long)kvm_vcpu_trap_get_fault(vcpu), + (unsigned long)kvm_vcpu_get_hsr(vcpu)); return -EFAULT; } idx = srcu_read_lock(&vcpu->kvm->srcu); gfn = fault_ipa >> PAGE_SHIFT; - if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { + memslot = gfn_to_memslot(vcpu->kvm, gfn); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); + write_fault = kvm_is_write_fault(vcpu); + if (kvm_is_error_hva(hva) || (write_fault && !writable)) { if (is_iabt) { /* Prefetch Abort on I/O address */ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); @@ -892,13 +905,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - if (fault_status != FSC_FAULT) { - kvm_err("Unsupported fault status on io memory: %#lx\n", - fault_status); - ret = -EFAULT; - goto out_unlock; - } - /* * The IPA is reported as [MAX:12], so we need to * complement it with the bottom 12 bits from the @@ -910,9 +916,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - memslot = gfn_to_memslot(vcpu->kvm, gfn); - - ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; out_unlock: diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 938600098b88..8ecfd15c3a02 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -80,7 +80,7 @@ ENTRY(__get_user_8) ENDPROC(__get_user_8) #ifdef __ARMEB__ -ENTRY(__get_user_lo8) +ENTRY(__get_user_32t_8) check_uaccess r0, 8, r1, r2, __get_user_bad #ifdef CONFIG_CPU_USE_DOMAINS add r0, r0, #4 @@ -90,7 +90,37 @@ ENTRY(__get_user_lo8) #endif mov r0, #0 ret lr -ENDPROC(__get_user_lo8) +ENDPROC(__get_user_32t_8) + +ENTRY(__get_user_64t_1) + check_uaccess r0, 1, r1, r2, __get_user_bad8 +8: TUSER(ldrb) r3, [r0] + mov r0, #0 + ret lr +ENDPROC(__get_user_64t_1) + +ENTRY(__get_user_64t_2) + check_uaccess r0, 2, r1, r2, __get_user_bad8 +#ifdef CONFIG_CPU_USE_DOMAINS +rb .req ip +9: ldrbt r3, [r0], #1 +10: ldrbt rb, [r0], #0 +#else +rb .req r0 +9: ldrb r3, [r0] +10: ldrb rb, [r0, #1] +#endif + orr r3, rb, r3, lsl #8 + mov r0, #0 + ret lr +ENDPROC(__get_user_64t_2) + +ENTRY(__get_user_64t_4) + check_uaccess r0, 4, r1, r2, __get_user_bad8 +11: TUSER(ldr) r3, [r0] + mov r0, #0 + ret lr +ENDPROC(__get_user_64t_4) #endif __get_user_bad8: @@ -111,5 +141,9 @@ ENDPROC(__get_user_bad8) .long 6b, __get_user_bad8 #ifdef __ARMEB__ .long 7b, __get_user_bad + .long 8b, __get_user_bad8 + .long 9b, __get_user_bad8 + .long 10b, __get_user_bad8 + .long 11b, __get_user_bad8 #endif .popsection diff --git a/arch/arm/mach-at91/include/mach/at91_pio.h b/arch/arm/mach-at91/include/mach/at91_pio.h index 732b11c37f1a..7b7366253ceb 100644 --- a/arch/arm/mach-at91/include/mach/at91_pio.h +++ b/arch/arm/mach-at91/include/mach/at91_pio.h @@ -71,4 +71,10 @@ #define ABCDSR_PERIPH_C 0x2 #define ABCDSR_PERIPH_D 0x3 +#define SAMA5D3_PIO_DRIVER1 0x118 /*PIO Driver 1 register offset*/ +#define SAMA5D3_PIO_DRIVER2 0x11C /*PIO Driver 2 register offset*/ + +#define AT91SAM9X5_PIO_DRIVER1 0x114 /*PIO Driver 1 register offset*/ +#define AT91SAM9X5_PIO_DRIVER2 0x118 /*PIO Driver 2 register offset*/ + #endif diff --git a/arch/arm/mach-imx/clk-gate2.c b/arch/arm/mach-imx/clk-gate2.c index 84acdfd1d715..5a75cdc81891 100644 --- a/arch/arm/mach-imx/clk-gate2.c +++ b/arch/arm/mach-imx/clk-gate2.c @@ -97,7 +97,7 @@ static int clk_gate2_is_enabled(struct clk_hw *hw) struct clk_gate2 *gate = to_clk_gate2(hw); if (gate->share_count) - return !!(*gate->share_count); + return !!__clk_get_enable_count(hw->clk); else return clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx); } @@ -127,10 +127,6 @@ struct clk *clk_register_gate2(struct device *dev, const char *name, gate->bit_idx = bit_idx; gate->flags = clk_gate2_flags; gate->lock = lock; - - /* Initialize share_count per hardware state */ - if (share_count) - *share_count = clk_gate2_reg_is_enabled(reg, bit_idx) ? 1 : 0; gate->share_count = share_count; init.name = name; diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index e7189dcc9309..08d4167cc7c5 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -1,9 +1,6 @@ menu "TI OMAP/AM/DM/DRA Family" depends on ARCH_MULTI_V6 || ARCH_MULTI_V7 -config ARCH_OMAP - bool - config ARCH_OMAP2 bool "TI OMAP2" depends on ARCH_MULTI_V6 diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 8fd87a3055bf..9e91a4e7519a 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2065,7 +2065,7 @@ static void _reconfigure_io_chain(void) spin_lock_irqsave(&io_chain_lock, flags); - if (cpu_is_omap34xx() && omap3_has_io_chain_ctrl()) + if (cpu_is_omap34xx()) omap3xxx_prm_reconfigure_io_chain(); else if (cpu_is_omap44xx()) omap44xx_prm_reconfigure_io_chain(); diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index 2458be6fc67b..372de3edf4a5 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -45,7 +45,7 @@ static struct omap_prcm_irq_setup omap3_prcm_irq_setup = { .ocp_barrier = &omap3xxx_prm_ocp_barrier, .save_and_clear_irqen = &omap3xxx_prm_save_and_clear_irqen, .restore_irqen = &omap3xxx_prm_restore_irqen, - .reconfigure_io_chain = &omap3xxx_prm_reconfigure_io_chain, + .reconfigure_io_chain = NULL, }; /* @@ -369,15 +369,30 @@ void __init omap3_prm_init_pm(bool has_uart4, bool has_iva) } /** - * omap3xxx_prm_reconfigure_io_chain - clear latches and reconfigure I/O chain + * omap3430_pre_es3_1_reconfigure_io_chain - restart wake-up daisy chain + * + * The ST_IO_CHAIN bit does not exist in 3430 before es3.1. The only + * thing we can do is toggle EN_IO bit for earlier omaps. + */ +void omap3430_pre_es3_1_reconfigure_io_chain(void) +{ + omap2_prm_clear_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD, + PM_WKEN); + omap2_prm_set_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD, + PM_WKEN); + omap2_prm_read_mod_reg(WKUP_MOD, PM_WKEN); +} + +/** + * omap3_prm_reconfigure_io_chain - clear latches and reconfigure I/O chain * * Clear any previously-latched I/O wakeup events and ensure that the * I/O wakeup gates are aligned with the current mux settings. Works * by asserting WUCLKIN, waiting for WUCLKOUT to be asserted, and then * deasserting WUCLKIN and clearing the ST_IO_CHAIN WKST bit. No - * return value. + * return value. These registers are only available in 3430 es3.1 and later. */ -void omap3xxx_prm_reconfigure_io_chain(void) +void omap3_prm_reconfigure_io_chain(void) { int i = 0; @@ -400,6 +415,15 @@ void omap3xxx_prm_reconfigure_io_chain(void) } /** + * omap3xxx_prm_reconfigure_io_chain - reconfigure I/O chain + */ +void omap3xxx_prm_reconfigure_io_chain(void) +{ + if (omap3_prcm_irq_setup.reconfigure_io_chain) + omap3_prcm_irq_setup.reconfigure_io_chain(); +} + +/** * omap3xxx_prm_enable_io_wakeup - enable wakeup events from I/O wakeup latches * * Activates the I/O wakeup event latches and allows events logged by @@ -656,6 +680,13 @@ static int omap3xxx_prm_late_init(void) if (!(prm_features & PRM_HAS_IO_WAKEUP)) return 0; + if (omap3_has_io_chain_ctrl()) + omap3_prcm_irq_setup.reconfigure_io_chain = + omap3_prm_reconfigure_io_chain; + else + omap3_prcm_irq_setup.reconfigure_io_chain = + omap3430_pre_es3_1_reconfigure_io_chain; + omap3xxx_prm_enable_io_wakeup(); ret = omap_prcm_register_chain_handler(&omap3_prcm_irq_setup); if (!ret) diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c index 630fa916bbc6..04b013fbc98f 100644 --- a/arch/arm/mach-pxa/generic.c +++ b/arch/arm/mach-pxa/generic.c @@ -61,7 +61,7 @@ EXPORT_SYMBOL(get_clock_tick_rate); /* * For non device-tree builds, keep legacy timer init */ -void pxa_timer_init(void) +void __init pxa_timer_init(void) { pxa_timer_nodt_init(IRQ_OST0, io_p2v(0x40a00000), get_clock_tick_rate()); diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig index 04f9784ff0ed..c6f6ed1cbed0 100644 --- a/arch/arm/mach-sa1100/Kconfig +++ b/arch/arm/mach-sa1100/Kconfig @@ -58,6 +58,7 @@ config SA1100_H3100 bool "Compaq iPAQ H3100" select ARM_SA1110_CPUFREQ select HTC_EGPIO + select MFD_IPAQ_MICRO help Say Y here if you intend to run this kernel on the Compaq iPAQ H3100 handheld computer. Information about this machine and the @@ -69,6 +70,7 @@ config SA1100_H3600 bool "Compaq iPAQ H3600/H3700" select ARM_SA1110_CPUFREQ select HTC_EGPIO + select MFD_IPAQ_MICRO help Say Y here if you intend to run this kernel on the Compaq iPAQ H3600 handheld computer. Information about this machine and the diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c index c79bf467fb7f..b1d4faa12f9a 100644 --- a/arch/arm/mach-sa1100/h3xxx.c +++ b/arch/arm/mach-sa1100/h3xxx.c @@ -25,6 +25,7 @@ #include <asm/mach/map.h> #include <mach/h3xxx.h> +#include <mach/irqs.h> #include "generic.h" @@ -244,9 +245,23 @@ static struct platform_device h3xxx_keys = { }, }; +static struct resource h3xxx_micro_resources[] = { + DEFINE_RES_MEM(0x80010000, SZ_4K), + DEFINE_RES_MEM(0x80020000, SZ_4K), + DEFINE_RES_IRQ(IRQ_Ser1UART), +}; + +struct platform_device h3xxx_micro_asic = { + .name = "ipaq-h3xxx-micro", + .id = -1, + .resource = h3xxx_micro_resources, + .num_resources = ARRAY_SIZE(h3xxx_micro_resources), +}; + static struct platform_device *h3xxx_devices[] = { &h3xxx_egpio, &h3xxx_keys, + &h3xxx_micro_asic, }; void __init h3xxx_mach_init(void) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 0c1ab49e5f7b..83792f4324ea 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -41,6 +41,7 @@ * This code is not portable to processors with late data abort handling. */ #define CODING_BITS(i) (i & 0x0e000000) +#define COND_BITS(i) (i & 0xf0000000) #define LDST_I_BIT(i) (i & (1 << 26)) /* Immediate constant */ #define LDST_P_BIT(i) (i & (1 << 24)) /* Preindex */ @@ -821,6 +822,8 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) break; case 0x04000000: /* ldr or str immediate */ + if (COND_BITS(instr) == 0xf0000000) /* NEON VLDn, VSTn */ + goto bad; offset.un = OFFSET_BITS(instr); handler = do_alignment_ldrstr; break; diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 5f2c988a06ac..55f9d6e0cc88 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/smp.h> #include <linux/spinlock.h> +#include <linux/log2.h> #include <linux/io.h> #include <linux/of.h> #include <linux/of_address.h> @@ -945,6 +946,98 @@ static int l2_wt_override; * pass it though the device tree */ static u32 cache_id_part_number_from_dt; +/** + * l2x0_cache_size_of_parse() - read cache size parameters from DT + * @np: the device tree node for the l2 cache + * @aux_val: pointer to machine-supplied auxilary register value, to + * be augmented by the call (bits to be set to 1) + * @aux_mask: pointer to machine-supplied auxilary register mask, to + * be augmented by the call (bits to be set to 0) + * @associativity: variable to return the calculated associativity in + * @max_way_size: the maximum size in bytes for the cache ways + */ +static void __init l2x0_cache_size_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask, + u32 *associativity, + u32 max_way_size) +{ + u32 mask = 0, val = 0; + u32 cache_size = 0, sets = 0; + u32 way_size_bits = 1; + u32 way_size = 0; + u32 block_size = 0; + u32 line_size = 0; + + of_property_read_u32(np, "cache-size", &cache_size); + of_property_read_u32(np, "cache-sets", &sets); + of_property_read_u32(np, "cache-block-size", &block_size); + of_property_read_u32(np, "cache-line-size", &line_size); + + if (!cache_size || !sets) + return; + + /* All these l2 caches have the same line = block size actually */ + if (!line_size) { + if (block_size) { + /* If linesize if not given, it is equal to blocksize */ + line_size = block_size; + } else { + /* Fall back to known size */ + pr_warn("L2C OF: no cache block/line size given: " + "falling back to default size %d bytes\n", + CACHE_LINE_SIZE); + line_size = CACHE_LINE_SIZE; + } + } + + if (line_size != CACHE_LINE_SIZE) + pr_warn("L2C OF: DT supplied line size %d bytes does " + "not match hardware line size of %d bytes\n", + line_size, + CACHE_LINE_SIZE); + + /* + * Since: + * set size = cache size / sets + * ways = cache size / (sets * line size) + * way size = cache size / (cache size / (sets * line size)) + * way size = sets * line size + * associativity = ways = cache size / way size + */ + way_size = sets * line_size; + *associativity = cache_size / way_size; + + if (way_size > max_way_size) { + pr_err("L2C OF: set size %dKB is too large\n", way_size); + return; + } + + pr_info("L2C OF: override cache size: %d bytes (%dKB)\n", + cache_size, cache_size >> 10); + pr_info("L2C OF: override line size: %d bytes\n", line_size); + pr_info("L2C OF: override way size: %d bytes (%dKB)\n", + way_size, way_size >> 10); + pr_info("L2C OF: override associativity: %d\n", *associativity); + + /* + * Calculates the bits 17:19 to set for way size: + * 512KB -> 6, 256KB -> 5, ... 16KB -> 1 + */ + way_size_bits = ilog2(way_size >> 10) - 3; + if (way_size_bits < 1 || way_size_bits > 6) { + pr_err("L2C OF: cache way size illegal: %dKB is not mapped\n", + way_size); + return; + } + + mask |= L2C_AUX_CTRL_WAY_SIZE_MASK; + val |= (way_size_bits << L2C_AUX_CTRL_WAY_SIZE_SHIFT); + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + static void __init l2x0_of_parse(const struct device_node *np, u32 *aux_val, u32 *aux_mask) { @@ -952,6 +1045,7 @@ static void __init l2x0_of_parse(const struct device_node *np, u32 tag = 0; u32 dirty = 0; u32 val = 0, mask = 0; + u32 assoc; of_property_read_u32(np, "arm,tag-latency", &tag); if (tag) { @@ -974,6 +1068,15 @@ static void __init l2x0_of_parse(const struct device_node *np, val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; } + l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K); + if (assoc > 8) { + pr_err("l2x0 of: cache setting yield too high associativity\n"); + pr_err("l2x0 of: %d calculated, max 8\n", assoc); + } else { + mask |= L2X0_AUX_CTRL_ASSOC_MASK; + val |= (assoc << L2X0_AUX_CTRL_ASSOC_SHIFT); + } + *aux_val &= ~mask; *aux_val |= val; *aux_mask &= ~mask; @@ -1021,6 +1124,7 @@ static void __init l2c310_of_parse(const struct device_node *np, u32 data[3] = { 0, 0, 0 }; u32 tag[3] = { 0, 0, 0 }; u32 filter[2] = { 0, 0 }; + u32 assoc; of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); if (tag[0] && tag[1] && tag[2]) @@ -1047,6 +1151,23 @@ static void __init l2c310_of_parse(const struct device_node *np, writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, l2x0_base + L310_ADDR_FILTER_START); } + + l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); + switch (assoc) { + case 16: + *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; + *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16; + *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; + break; + case 8: + *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; + *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; + break; + default: + pr_err("PL310 OF: cache setting yield illegal associativity\n"); + pr_err("PL310 OF: %d calculated, only 8 and 16 legal\n", assoc); + break; + } } static const struct l2c_init_data of_l2c310_data __initconst = { diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index c447ec70e868..e7a81cebbb2e 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -27,7 +27,7 @@ static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, if (pud_none_or_clear_bad(pud) || (pud_val(*pud) & L_PGD_SWAPPER)) { pmd = pmd_alloc_one(&init_mm, addr); if (!pmd) { - pr_warning("Failed to allocate identity pmd.\n"); + pr_warn("Failed to allocate identity pmd.\n"); return; } /* diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 659c75d808dc..9221645dd192 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -636,6 +636,11 @@ static int keep_initrd; void free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) { + if (start == initrd_start) + start = round_down(start, PAGE_SIZE); + if (end == initrd_end) + end = round_up(end, PAGE_SIZE); + poison_init_mem((void *)start, PAGE_ALIGN(end) - start); free_reserved_area((void *)start, (void *)end, -1, "initrd"); } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 8348ed6b2efe..9f98cec7fe1e 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -223,13 +223,13 @@ early_param("ecc", early_ecc); static int __init early_cachepolicy(char *p) { - pr_warning("cachepolicy kernel parameter not supported without cp15\n"); + pr_warn("cachepolicy kernel parameter not supported without cp15\n"); } early_param("cachepolicy", early_cachepolicy); static int __init noalign_setup(char *__unused) { - pr_warning("noalign kernel parameter not supported without cp15\n"); + pr_warn("noalign kernel parameter not supported without cp15\n"); } __setup("noalign", noalign_setup); diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 1a24e9232ec8..d3daed0ae0ad 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -146,7 +146,6 @@ ENDPROC(cpu_v7_set_pte_ext) mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits addls \ttbr1, \ttbr1, #TTBR1_OFFSET - adcls \tmp, \tmp, #0 mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1 mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits @@ -158,9 +157,9 @@ ENDPROC(cpu_v7_set_pte_ext) * TFR EV X F IHD LR S * .EEE ..EE PUI. .TAT 4RVI ZWRS BLDP WCAM * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced - * 11 0 110 1 0011 1100 .111 1101 < we want + * 11 0 110 0 0011 1100 .111 1101 < we want */ .align 2 .type v7_crval, #object v7_crval: - crval clear=0x0120c302, mmuset=0x30c23c7d, ucset=0x00c01c7c + crval clear=0x0122c302, mmuset=0x30c03c7d, ucset=0x00c01c7c diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index b5d67db20897..b3a947863ac7 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -570,7 +570,7 @@ __v7_ca15mp_proc_info: __v7_b15mp_proc_info: .long 0x420f00f0 .long 0xff0ffff0 - __v7_proc __v7_b15mp_setup, hwcaps = HWCAP_IDIV + __v7_proc __v7_b15mp_setup .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info /* diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index 02fc10d2d63b..d055db32ffcb 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -1,3 +1,6 @@ +config ARCH_OMAP + bool + if ARCH_OMAP menu "TI OMAP Common Features" diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile index 12969523414c..1f85bfe6b470 100644 --- a/arch/arm/xen/Makefile +++ b/arch/arm/xen/Makefile @@ -1 +1 @@ -obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o +obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o mm32.o diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 98544c5f86e9..0e15f011f9c8 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -260,6 +260,12 @@ static int __init xen_guest_init(void) xen_domain_type = XEN_HVM_DOMAIN; xen_setup_features(); + + if (!xen_feature(XENFEAT_grant_map_identity)) { + pr_warn("Please upgrade your Xen.\n" + "If your platform has any non-coherent DMA devices, they won't work properly.\n"); + } + if (xen_feature(XENFEAT_dom0)) xen_start_info->flags |= SIF_INITDOMAIN|SIF_PRIVILEGED; else diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c new file mode 100644 index 000000000000..3b99860fd7ae --- /dev/null +++ b/arch/arm/xen/mm32.c @@ -0,0 +1,202 @@ +#include <linux/cpu.h> +#include <linux/dma-mapping.h> +#include <linux/gfp.h> +#include <linux/highmem.h> + +#include <xen/features.h> + +static DEFINE_PER_CPU(unsigned long, xen_mm32_scratch_virt); +static DEFINE_PER_CPU(pte_t *, xen_mm32_scratch_ptep); + +static int alloc_xen_mm32_scratch_page(int cpu) +{ + struct page *page; + unsigned long virt; + pmd_t *pmdp; + pte_t *ptep; + + if (per_cpu(xen_mm32_scratch_ptep, cpu) != NULL) + return 0; + + page = alloc_page(GFP_KERNEL); + if (page == NULL) { + pr_warn("Failed to allocate xen_mm32_scratch_page for cpu %d\n", cpu); + return -ENOMEM; + } + + virt = (unsigned long)__va(page_to_phys(page)); + pmdp = pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt); + ptep = pte_offset_kernel(pmdp, virt); + + per_cpu(xen_mm32_scratch_virt, cpu) = virt; + per_cpu(xen_mm32_scratch_ptep, cpu) = ptep; + + return 0; +} + +static int xen_mm32_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + switch (action) { + case CPU_UP_PREPARE: + if (alloc_xen_mm32_scratch_page(cpu)) + return NOTIFY_BAD; + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block xen_mm32_cpu_notifier = { + .notifier_call = xen_mm32_cpu_notify, +}; + +static void* xen_mm32_remap_page(dma_addr_t handle) +{ + unsigned long virt = get_cpu_var(xen_mm32_scratch_virt); + pte_t *ptep = __get_cpu_var(xen_mm32_scratch_ptep); + + *ptep = pfn_pte(handle >> PAGE_SHIFT, PAGE_KERNEL); + local_flush_tlb_kernel_page(virt); + + return (void*)virt; +} + +static void xen_mm32_unmap(void *vaddr) +{ + put_cpu_var(xen_mm32_scratch_virt); +} + + +/* functions called by SWIOTLB */ + +static void dma_cache_maint(dma_addr_t handle, unsigned long offset, + size_t size, enum dma_data_direction dir, + void (*op)(const void *, size_t, int)) +{ + unsigned long pfn; + size_t left = size; + + pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE; + offset %= PAGE_SIZE; + + do { + size_t len = left; + void *vaddr; + + if (!pfn_valid(pfn)) + { + /* Cannot map the page, we don't know its physical address. + * Return and hope for the best */ + if (!xen_feature(XENFEAT_grant_map_identity)) + return; + vaddr = xen_mm32_remap_page(handle) + offset; + op(vaddr, len, dir); + xen_mm32_unmap(vaddr - offset); + } else { + struct page *page = pfn_to_page(pfn); + + if (PageHighMem(page)) { + if (len + offset > PAGE_SIZE) + len = PAGE_SIZE - offset; + + if (cache_is_vipt_nonaliasing()) { + vaddr = kmap_atomic(page); + op(vaddr + offset, len, dir); + kunmap_atomic(vaddr); + } else { + vaddr = kmap_high_get(page); + if (vaddr) { + op(vaddr + offset, len, dir); + kunmap_high(page); + } + } + } else { + vaddr = page_address(page) + offset; + op(vaddr, len, dir); + } + } + + offset = 0; + pfn++; + left -= len; + } while (left); +} + +static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + /* Cannot use __dma_page_dev_to_cpu because we don't have a + * struct page for handle */ + + if (dir != DMA_TO_DEVICE) + outer_inv_range(handle, handle + size); + + dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_unmap_area); +} + +static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + + dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_map_area); + + if (dir == DMA_FROM_DEVICE) { + outer_inv_range(handle, handle + size); + } else { + outer_clean_range(handle, handle + size); + } +} + +void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) + +{ + if (!__generic_dma_ops(hwdev)->unmap_page) + return; + if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + return; + + __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir); +} + +void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + if (!__generic_dma_ops(hwdev)->sync_single_for_cpu) + return; + __xen_dma_page_dev_to_cpu(hwdev, handle, size, dir); +} + +void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + if (!__generic_dma_ops(hwdev)->sync_single_for_device) + return; + __xen_dma_page_cpu_to_dev(hwdev, handle, size, dir); +} + +int __init xen_mm32_init(void) +{ + int cpu; + + if (!xen_initial_domain()) + return 0; + + register_cpu_notifier(&xen_mm32_cpu_notifier); + get_online_cpus(); + for_each_online_cpu(cpu) { + if (alloc_xen_mm32_scratch_page(cpu)) { + put_online_cpus(); + unregister_cpu_notifier(&xen_mm32_cpu_notifier); + return -ENOMEM; + } + } + put_online_cpus(); + + return 0; +} +arch_initcall(xen_mm32_init); diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 97baf4427817..054857776254 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -21,14 +21,12 @@ struct xen_p2m_entry { unsigned long pfn; unsigned long mfn; unsigned long nr_pages; - struct rb_node rbnode_mach; struct rb_node rbnode_phys; }; static rwlock_t p2m_lock; struct rb_root phys_to_mach = RB_ROOT; EXPORT_SYMBOL_GPL(phys_to_mach); -static struct rb_root mach_to_phys = RB_ROOT; static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new) { @@ -41,8 +39,6 @@ static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new) parent = *link; entry = rb_entry(parent, struct xen_p2m_entry, rbnode_phys); - if (new->mfn == entry->mfn) - goto err_out; if (new->pfn == entry->pfn) goto err_out; @@ -88,64 +84,6 @@ unsigned long __pfn_to_mfn(unsigned long pfn) } EXPORT_SYMBOL_GPL(__pfn_to_mfn); -static int xen_add_mach_to_phys_entry(struct xen_p2m_entry *new) -{ - struct rb_node **link = &mach_to_phys.rb_node; - struct rb_node *parent = NULL; - struct xen_p2m_entry *entry; - int rc = 0; - - while (*link) { - parent = *link; - entry = rb_entry(parent, struct xen_p2m_entry, rbnode_mach); - - if (new->mfn == entry->mfn) - goto err_out; - if (new->pfn == entry->pfn) - goto err_out; - - if (new->mfn < entry->mfn) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - rb_link_node(&new->rbnode_mach, parent, link); - rb_insert_color(&new->rbnode_mach, &mach_to_phys); - goto out; - -err_out: - rc = -EINVAL; - pr_warn("%s: cannot add pfn=%pa -> mfn=%pa: pfn=%pa -> mfn=%pa already exists\n", - __func__, &new->pfn, &new->mfn, &entry->pfn, &entry->mfn); -out: - return rc; -} - -unsigned long __mfn_to_pfn(unsigned long mfn) -{ - struct rb_node *n = mach_to_phys.rb_node; - struct xen_p2m_entry *entry; - unsigned long irqflags; - - read_lock_irqsave(&p2m_lock, irqflags); - while (n) { - entry = rb_entry(n, struct xen_p2m_entry, rbnode_mach); - if (entry->mfn <= mfn && - entry->mfn + entry->nr_pages > mfn) { - read_unlock_irqrestore(&p2m_lock, irqflags); - return entry->pfn + (mfn - entry->mfn); - } - if (mfn < entry->mfn) - n = n->rb_left; - else - n = n->rb_right; - } - read_unlock_irqrestore(&p2m_lock, irqflags); - - return INVALID_P2M_ENTRY; -} -EXPORT_SYMBOL_GPL(__mfn_to_pfn); - int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) @@ -192,7 +130,6 @@ bool __set_phys_to_machine_multi(unsigned long pfn, p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (p2m_entry->pfn <= pfn && p2m_entry->pfn + p2m_entry->nr_pages > pfn) { - rb_erase(&p2m_entry->rbnode_mach, &mach_to_phys); rb_erase(&p2m_entry->rbnode_phys, &phys_to_mach); write_unlock_irqrestore(&p2m_lock, irqflags); kfree(p2m_entry); @@ -217,8 +154,7 @@ bool __set_phys_to_machine_multi(unsigned long pfn, p2m_entry->mfn = mfn; write_lock_irqsave(&p2m_lock, irqflags); - if ((rc = xen_add_phys_to_mach_entry(p2m_entry) < 0) || - (rc = xen_add_mach_to_phys_entry(p2m_entry) < 0)) { + if ((rc = xen_add_phys_to_mach_entry(p2m_entry)) < 0) { write_unlock_irqrestore(&p2m_lock, irqflags); return false; } diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index cc83520459ed..7fd3e27e3ccc 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -122,6 +122,17 @@ #define VTCR_EL2_T0SZ_MASK 0x3f #define VTCR_EL2_T0SZ_40B 24 +/* + * We configure the Stage-2 page tables to always restrict the IPA space to be + * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are + * not known to exist and will break with this configuration. + * + * Note that when using 4K pages, we concatenate two first level page tables + * together. + * + * The magic numbers used for VTTBR_X in this patch can be found in Tables + * D4-23 and D4-25 in ARM DDI 0487A.b. + */ #ifdef CONFIG_ARM64_64K_PAGES /* * Stage2 translation configuration: @@ -149,7 +160,7 @@ #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) #define VTTBR_VMID_SHIFT (48LLU) #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fdc3e21abd8d..5674a55b5518 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -174,6 +174,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { + return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC; +} + +static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) +{ return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e10c45a578e3..2012c4ba8d67 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -22,6 +22,8 @@ #ifndef __ARM64_KVM_HOST_H__ #define __ARM64_KVM_HOST_H__ +#include <linux/types.h> +#include <linux/kvm_types.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> @@ -41,8 +43,7 @@ #define KVM_VCPU_MAX_FEATURES 3 -struct kvm_vcpu; -int kvm_target_cpu(void); +int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); @@ -164,25 +165,23 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, + unsigned long end) { return 0; } @@ -192,8 +191,13 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) return 0; } +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} + struct kvm_vcpu *kvm_arm_get_running_vcpu(void); -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); u64 kvm_call_hyp(void *hypfn, ...); @@ -244,4 +248,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) } } +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 8e138c7c53ac..a030d163840b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -59,10 +59,9 @@ #define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) /* - * Align KVM with the kernel's view of physical memory. Should be - * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration. + * We currently only support a 40bit IPA. */ -#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT +#define KVM_PHYS_SHIFT (40) #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) @@ -93,19 +92,6 @@ void kvm_clear_hyp_idmap(void); #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) -static inline bool kvm_is_write_fault(unsigned long esr) -{ - unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT; - - if (esr_ec == ESR_EL2_EC_IABT) - return false; - - if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR)) - return false; - - return true; -} - static inline void kvm_clean_pgd(pgd_t *pgd) {} static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} static inline void kvm_clean_pte(pte_t *pte) {} diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index e633ff8cdec8..8e38878c87c6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -37,6 +37,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE +#define __KVM_HAVE_READONLY_MEM #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -159,6 +160,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 0f08dfd69ebc..dfa6e3e74fdd 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -97,19 +97,15 @@ static bool migrate_one_irq(struct irq_desc *desc) if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) return false; - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + affinity = cpu_online_mask; ret = true; + } - /* - * when using forced irq_set_affinity we must ensure that the cpu - * being offlined is not present in the affinity mask, it may be - * selected as the target CPU otherwise - */ - affinity = cpu_online_mask; c = irq_data_get_irq_chip(d); if (!c->irq_set_affinity) pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) cpumask_copy(d->affinity, affinity); return ret; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index bf669228a59f..89f41f7d27dd 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -204,9 +204,27 @@ void exit_thread(void) { } +static void tls_thread_flush(void) +{ + asm ("msr tpidr_el0, xzr"); + + if (is_compat_task()) { + current->thread.tp_value = 0; + + /* + * We need to ensure ordering between the shadow state and the + * hardware state, so that we don't corrupt the hardware state + * with a stale shadow state during context switch. + */ + barrier(); + asm ("msr tpidrro_el0, xzr"); + } +} + void flush_thread(void) { fpsimd_flush_thread(); + tls_thread_flush(); flush_ptrace_hw_breakpoint(current); } diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index de2b0226e06d..dc47e53e9e28 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -79,6 +79,12 @@ long compat_arm_syscall(struct pt_regs *regs) case __ARM_NR_compat_set_tls: current->thread.tp_value = regs->regs[0]; + + /* + * Protect against register corruption from context switch. + * See comment in tls_thread_flush. + */ + barrier(); asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0])); return 0; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 97f0c0429dfa..edf8715ba39b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -97,9 +97,9 @@ SECTIONS PERCPU_SECTION(64) + . = ALIGN(PAGE_SIZE); __init_end = .; - . = ALIGN(PAGE_SIZE); _data = .; _sdata = .; RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 8d1ec2887a26..76794692c20b 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -174,7 +174,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); if (ret != 0) - return ret; + return -EFAULT; return kvm_arm_timer_set_reg(vcpu, reg->id, val); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5805e7c4a4dd..4cc3b719208e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1218,7 +1218,7 @@ static bool is_valid_cache(u32 val) u32 level, ctype; if (val >= CSSELR_MAX) - return -ENOENT; + return false; /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ level = (val >> 1); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 271e654d852d..494297c698ca 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -149,8 +149,7 @@ void __init arm64_memblock_init(void) memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start); #endif - if (!efi_enabled(EFI_MEMMAP)) - early_init_fdt_scan_reserved_mem(); + early_init_fdt_scan_reserved_mem(); /* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA)) @@ -334,8 +333,14 @@ static int keep_initrd; void free_initrd_mem(unsigned long start, unsigned long end) { - if (!keep_initrd) + if (!keep_initrd) { + if (start == initrd_start) + start = round_down(start, PAGE_SIZE); + if (end == initrd_end) + end = round_up(end, PAGE_SIZE); + free_reserved_area((void *)start, (void *)end, 0, "initrd"); + } } static int __init keepinitrd_setup(char *__unused) diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index ed30699cc635..af76634f8d98 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -671,7 +671,7 @@ config TICKSOURCE_CORETMR default y endmenu -menu "Clock souce" +menu "Clock source" depends on GENERIC_CLOCKEVENTS config CYCLES_CLOCKSOURCE bool "CYCLES" diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 1f94784eab6d..694619365265 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -455,7 +455,7 @@ void handle_sec_sci_fault(uint32_t gstat) printk(KERN_DEBUG "sec ack err\n"); break; default: - printk(KERN_DEBUG "sec sci unknow err\n"); + printk(KERN_DEBUG "sec sci unknown err\n"); } } diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig index 4c4ac163c600..b6bda1838629 100644 --- a/arch/ia64/configs/bigsur_defconfig +++ b/arch/ia64/configs/bigsur_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=16 @@ -6,6 +5,8 @@ CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y CONFIG_IA64_DIG=y CONFIG_SMP=y CONFIG_NR_CPUS=2 @@ -51,9 +52,6 @@ CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_NETDEVICES=y CONFIG_DUMMY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_NET_PCI=y CONFIG_INPUT_EVDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y @@ -85,7 +83,6 @@ CONFIG_EXT3_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y -CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y @@ -95,17 +92,13 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFSD=m CONFIG_NFSD_V4=y CONFIG_CIFS=m CONFIG_CIFS_STATS=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_SGI_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index e8ed3ae70aae..81f686dee53c 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y @@ -6,13 +5,13 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=20 CONFIG_CGROUPS=y CONFIG_CPUSETS=y -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y -# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y CONFIG_MCKINLEY=y CONFIG_IA64_PAGE_SIZE_64KB=y CONFIG_IA64_CYCLONE=y @@ -29,14 +28,13 @@ CONFIG_ACPI_BUTTON=m CONFIG_ACPI_FAN=m CONFIG_ACPI_DOCK=y CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_CONTAINER=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_ACPI=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_MULTICAST=y -CONFIG_ARPD=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -82,16 +80,13 @@ CONFIG_FUSION_FC=m CONFIG_FUSION_SAS=y CONFIG_NETDEVICES=y CONFIG_DUMMY=m -CONFIG_NET_ETHERNET=y +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y CONFIG_NET_TULIP=y CONFIG_TULIP=m -CONFIG_NET_PCI=y -CONFIG_NET_VENDOR_INTEL=y CONFIG_E100=m CONFIG_E1000=y CONFIG_IGB=y -CONFIG_TIGON3=y -CONFIG_NETCONSOLE=y # CONFIG_SERIO_SERPORT is not set CONFIG_GAMEPORT=m CONFIG_SERIAL_NONSTANDARD=y @@ -151,6 +146,7 @@ CONFIG_USB_STORAGE=m CONFIG_INFINIBAND=m CONFIG_INFINIBAND_MTHCA=m CONFIG_INFINIBAND_IPOIB=m +CONFIG_INTEL_IOMMU=y CONFIG_MSPEC=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y @@ -164,7 +160,6 @@ CONFIG_REISERFS_FS_XATTR=y CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=y -CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y @@ -175,16 +170,10 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFSD=m CONFIG_NFSD_V4=y -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m -CONFIG_PARTITION_ADVANCED=y -CONFIG_SGI_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_737=m CONFIG_NLS_CODEPAGE_775=m @@ -225,11 +214,7 @@ CONFIG_NLS_UTF8=m CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_T10DIF=y -CONFIG_INTEL_IOMMU=y diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig index d663efd1e4db..5b4fcdd51457 100644 --- a/arch/ia64/configs/gensparse_defconfig +++ b/arch/ia64/configs/gensparse_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y @@ -9,6 +8,8 @@ CONFIG_KALLSYMS_ALL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y CONFIG_MCKINLEY=y CONFIG_IA64_CYCLONE=y CONFIG_SMP=y @@ -24,14 +25,12 @@ CONFIG_BINFMT_MISC=m CONFIG_ACPI_BUTTON=m CONFIG_ACPI_FAN=m CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_CONTAINER=m CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_ACPI=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_MULTICAST=y -CONFIG_ARPD=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_BLK_DEV_LOOP=m @@ -71,15 +70,12 @@ CONFIG_FUSION_SPI=y CONFIG_FUSION_FC=m CONFIG_NETDEVICES=y CONFIG_DUMMY=m -CONFIG_NET_ETHERNET=y +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y CONFIG_NET_TULIP=y CONFIG_TULIP=m -CONFIG_NET_PCI=y -CONFIG_NET_VENDOR_INTEL=y CONFIG_E100=m CONFIG_E1000=y -CONFIG_TIGON3=y -CONFIG_NETCONSOLE=y # CONFIG_SERIO_SERPORT is not set CONFIG_GAMEPORT=m CONFIG_SERIAL_NONSTANDARD=y @@ -146,7 +142,6 @@ CONFIG_REISERFS_FS_XATTR=y CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=y -CONFIG_AUTOFS_FS=y CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y @@ -157,16 +152,10 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFSD=m CONFIG_NFSD_V4=y -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m -CONFIG_PARTITION_ADVANCED=y -CONFIG_SGI_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_737=m CONFIG_NLS_CODEPAGE_775=m diff --git a/arch/ia64/configs/sim_defconfig b/arch/ia64/configs/sim_defconfig index b4548a3e82d5..f0f69fdbddae 100644 --- a/arch/ia64/configs/sim_defconfig +++ b/arch/ia64/configs/sim_defconfig @@ -1,13 +1,12 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y CONFIG_IA64_HP_SIM=y CONFIG_MCKINLEY=y CONFIG_IA64_PAGE_SIZE_64KB=y @@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SPI_ATTRS=y @@ -49,8 +47,6 @@ CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y CONFIG_NFSD=y CONFIG_NFSD_V3=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_EFI_PARTITION=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_INFO=y diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index c8a3f40e77f6..192ed157c9ce 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y @@ -11,6 +10,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y CONFIG_IA64_DIG=y CONFIG_MCKINLEY=y CONFIG_IA64_PAGE_SIZE_64KB=y @@ -29,14 +30,12 @@ CONFIG_BINFMT_MISC=m CONFIG_ACPI_BUTTON=m CONFIG_ACPI_FAN=m CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_CONTAINER=m CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_ACPI=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_MULTICAST=y -CONFIG_ARPD=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_BLK_DEV_LOOP=m @@ -53,6 +52,7 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=m +CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_QLOGIC_1280=y CONFIG_MD=y @@ -72,15 +72,12 @@ CONFIG_FUSION_FC=y CONFIG_FUSION_CTL=y CONFIG_NETDEVICES=y CONFIG_DUMMY=m -CONFIG_NET_ETHERNET=y +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y CONFIG_NET_TULIP=y CONFIG_TULIP=m -CONFIG_NET_PCI=y -CONFIG_NET_VENDOR_INTEL=y CONFIG_E100=m CONFIG_E1000=y -CONFIG_TIGON3=y -CONFIG_NETCONSOLE=y # CONFIG_SERIO_SERPORT is not set CONFIG_GAMEPORT=m CONFIG_SERIAL_NONSTANDARD=y @@ -118,7 +115,6 @@ CONFIG_REISERFS_FS_XATTR=y CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=y -CONFIG_AUTOFS_FS=y CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y @@ -129,16 +125,10 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFSD=m CONFIG_NFSD_V4=y -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m -CONFIG_PARTITION_ADVANCED=y -CONFIG_SGI_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_737=m CONFIG_NLS_CODEPAGE_775=m @@ -180,6 +170,5 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y CONFIG_IA64_GRANULE_16MB=y -CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD5=y diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig index 54bc72eda30d..b504c8e2fd52 100644 --- a/arch/ia64/configs/zx1_defconfig +++ b/arch/ia64/configs/zx1_defconfig @@ -1,9 +1,9 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BLK_DEV_INITRD=y CONFIG_KPROBES=y CONFIG_MODULES=y +CONFIG_PARTITION_ADVANCED=y CONFIG_IA64_HP_ZX1=y CONFIG_MCKINLEY=y CONFIG_SMP=y @@ -18,6 +18,7 @@ CONFIG_EFI_VARS=y CONFIG_BINFMT_MISC=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_ACPI=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y @@ -37,9 +38,9 @@ CONFIG_CHR_DEV_OSST=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_QLOGIC_1280=y CONFIG_FUSION=y @@ -48,18 +49,15 @@ CONFIG_FUSION_FC=y CONFIG_FUSION_CTL=m CONFIG_NETDEVICES=y CONFIG_DUMMY=y -CONFIG_NET_ETHERNET=y +CONFIG_TIGON3=y CONFIG_NET_TULIP=y CONFIG_TULIP=y CONFIG_TULIP_MWI=y CONFIG_TULIP_MMIO=y CONFIG_TULIP_NAPI=y CONFIG_TULIP_NAPI_HW_MITIGATION=y -CONFIG_NET_PCI=y -CONFIG_NET_VENDOR_INTEL=y CONFIG_E100=y CONFIG_E1000=y -CONFIG_TIGON3=y CONFIG_INPUT_JOYDEV=y CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set @@ -100,7 +98,6 @@ CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT3_FS=y -CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_UDF_FS=y @@ -110,12 +107,9 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_NFSD=y CONFIG_NFSD_V3=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_737=y CONFIG_NLS_CODEPAGE_775=y diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index db95f570705f..4729752b7256 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -234,9 +234,6 @@ struct kvm_vm_data { #define KVM_REQ_PTC_G 32 #define KVM_REQ_RESUME 33 -struct kvm; -struct kvm_vcpu; - struct kvm_mmio_req { uint64_t addr; /* physical address */ uint64_t size; /* size in bytes */ @@ -595,6 +592,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu); struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) {} +static inline void kvm_arch_hardware_unsetup(void) {} + #endif /* __ASSEMBLY__*/ #endif diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index 6a65bb7d0657..18026b2eb582 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -329,6 +329,6 @@ #define __NR_sched_getattr 1337 #define __NR_renameat2 1338 #define __NR_getrandom 1339 -#define __NR_memfd_create 1339 +#define __NR_memfd_create 1340 #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 0729ba6acddf..ec6b9acb6bea 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { long status; long tmp_base; @@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { long status; @@ -1364,10 +1364,6 @@ static void kvm_release_vm_pages(struct kvm *kvm) } } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_iommu_unmap_guest(kvm); @@ -1376,10 +1372,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_release_vm_pages(kvm); } -void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -{ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { if (cpu != vcpu->cpu) { @@ -1468,7 +1460,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kfree(vcpu->arch.apic); } - long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1551,21 +1542,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, @@ -1597,14 +1579,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, return 0; } -void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, - enum kvm_mr_change change) -{ - return; -} - void kvm_arch_flush_shadow_all(struct kvm *kvm) { kvm_flush_remote_tlbs(kvm); @@ -1853,10 +1827,6 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) { return __apic_accept_irq(vcpu, irq->vector); diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c index ec73b2cf912a..fc505d58f078 100644 --- a/arch/ia64/pci/fixup.c +++ b/arch/ia64/pci/fixup.c @@ -38,27 +38,6 @@ static void pci_fixup_video(struct pci_dev *pdev) return; /* Maybe, this machine supports legacy memory map. */ - if (!vga_default_device()) { - resource_size_t start, end; - int i; - - /* Does firmware framebuffer belong to us? */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) - continue; - - start = pci_resource_start(pdev, i); - end = pci_resource_end(pdev, i); - - if (!start || !end) - continue; - - if (screen_info.lfb_base >= start && - (screen_info.lfb_base + screen_info.lfb_size) < end) - vga_set_default_device(pdev); - } - } - /* Is VGA routed to us? */ bus = pdev->bus; while (bus) { @@ -83,8 +62,7 @@ static void pci_fixup_video(struct pci_dev *pdev) pci_read_config_word(pdev, PCI_COMMAND, &config); if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; - dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); - vga_set_default_device(pdev); + dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n"); } } } diff --git a/arch/m68k/Kconfig.devices b/arch/m68k/Kconfig.devices index d163991c5717..2a74777667fd 100644 --- a/arch/m68k/Kconfig.devices +++ b/arch/m68k/Kconfig.devices @@ -73,7 +73,7 @@ config ATARI_ETHERNEC ROM port. The driver works by polling instead of interrupts, so it is quite slow. - This driver also suppports the ethernet part of the NetUSBee ROM + This driver also supports the ethernet part of the NetUSBee ROM port combined Ethernet/USB adapter. To compile the actual ethernet driver, choose Y or M in for the NE2000 diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 40e1c1dd0e24..6feded3b0c4c 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -127,7 +127,7 @@ config SECCOMP endmenu -menu "Advanced setup" +menu "Kernel features" config ADVANCED_OPTIONS bool "Prompt for advanced kernel configuration options" @@ -248,10 +248,10 @@ config MICROBLAZE_64K_PAGES endchoice -endmenu - source "mm/Kconfig" +endmenu + menu "Executable file formats" source "fs/Kconfig.binfmt" diff --git a/arch/microblaze/include/asm/entry.h b/arch/microblaze/include/asm/entry.h index b4a4cb150aa9..596e485ae707 100644 --- a/arch/microblaze/include/asm/entry.h +++ b/arch/microblaze/include/asm/entry.h @@ -15,6 +15,7 @@ #include <asm/percpu.h> #include <asm/ptrace.h> +#include <linux/linkage.h> /* * These are per-cpu variables required in entry.S, among other diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 0aa005703a0b..59a89a64a865 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -98,13 +98,13 @@ static inline int access_ok(int type, const void __user *addr, if ((get_fs().seg < ((unsigned long)addr)) || (get_fs().seg < ((unsigned long)addr + size - 1))) { - pr_debug("ACCESS fail: %s at 0x%08x (size 0x%x), seg 0x%08x\n", + pr_devel("ACCESS fail: %s at 0x%08x (size 0x%x), seg 0x%08x\n", type ? "WRITE" : "READ ", (__force u32)addr, (u32)size, (u32)get_fs().seg); return 0; } ok: - pr_debug("ACCESS OK: %s at 0x%08x (size 0x%x), seg 0x%08x\n", + pr_devel("ACCESS OK: %s at 0x%08x (size 0x%x), seg 0x%08x\n", type ? "WRITE" : "READ ", (__force u32)addr, (u32)size, (u32)get_fs().seg); return 1; diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index fd56a8f66489..ea4b233647c1 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -38,6 +38,6 @@ #endif /* __ASSEMBLY__ */ -#define __NR_syscalls 381 +#define __NR_syscalls 387 #endif /* _ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 900c7e5333b6..574c43000699 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -546,6 +546,7 @@ config SGI_IP28 # select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN + select MIPS_L1_CACHE_SHIFT_7 help This is the SGI Indigo2 with R10000 processor. To compile a Linux kernel that runs on these, say Y here. @@ -2029,7 +2030,9 @@ config MIPS_CMP bool "MIPS CMP framework support (DEPRECATED)" depends on SYS_SUPPORTS_MIPS_CMP select MIPS_GIC_IPI + select SMP select SYNC_R4K + select SYS_SUPPORTS_SMP select WEAK_ORDERING default n help diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 9336509f47ad..bbac51e11179 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -113,7 +113,16 @@ predef-le += -DMIPSEL -D_MIPSEL -D__MIPSEL -D__MIPSEL__ cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' && echo -EB $(undef-all) $(predef-be)) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' || echo -EL $(undef-all) $(predef-le)) -cflags-$(CONFIG_CPU_HAS_SMARTMIPS) += $(call cc-option,-msmartmips) +# For smartmips configurations, there are hundreds of warnings due to ISA overrides +# in assembly and header files. smartmips is only supported for MIPS32r1 onwards +# and there is no support for 64-bit. Various '.set mips2' or '.set mips3' or +# similar directives in the kernel will spam the build logs with the following warnings: +# Warning: the `smartmips' extension requires MIPS32 revision 1 or greater +# or +# Warning: the 64-bit MIPS architecture does not support the `smartmips' extension +# Pass -Wa,--no-warn to disable all assembler warnings until the kernel code has +# been fixed properly. +cflags-$(CONFIG_CPU_HAS_SMARTMIPS) += $(call cc-option,-msmartmips) -Wa,--no-warn cflags-$(CONFIG_CPU_MICROMIPS) += $(call cc-option,-mmicromips) cflags-$(CONFIG_SB1XXX_CORELIS) += $(call cc-option,-mno-sched-prolog) \ diff --git a/arch/mips/bcm63xx/irq.c b/arch/mips/bcm63xx/irq.c index 37eb2d1fa69a..b94bf44d8d8e 100644 --- a/arch/mips/bcm63xx/irq.c +++ b/arch/mips/bcm63xx/irq.c @@ -434,7 +434,7 @@ static void bcm63xx_init_irq(void) irq_stat_addr[0] += PERF_IRQSTAT_3368_REG; irq_mask_addr[0] += PERF_IRQMASK_3368_REG; irq_stat_addr[1] = 0; - irq_stat_addr[1] = 0; + irq_mask_addr[1] = 0; irq_bits = 32; ext_irq_count = 4; ext_irq_cfg_reg1 = PERF_EXTIRQ_CFG_REG_3368; @@ -443,7 +443,7 @@ static void bcm63xx_init_irq(void) irq_stat_addr[0] += PERF_IRQSTAT_6328_REG(0); irq_mask_addr[0] += PERF_IRQMASK_6328_REG(0); irq_stat_addr[1] += PERF_IRQSTAT_6328_REG(1); - irq_stat_addr[1] += PERF_IRQMASK_6328_REG(1); + irq_mask_addr[1] += PERF_IRQMASK_6328_REG(1); irq_bits = 64; ext_irq_count = 4; is_ext_irq_cascaded = 1; diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index b49c7adbfa89..31903cf9709d 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/kernel.h> +#include <linux/string.h> #include <asm/addrspace.h> diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index 602866657938..c370426a7322 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -1,7 +1,7 @@ if CPU_CAVIUM_OCTEON config CAVIUM_CN63XXP1 - bool "Enable CN63XXP1 errata worarounds" + bool "Enable CN63XXP1 errata workarounds" default "n" help The CN63XXP1 chip requires build time workarounds to diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index 8f219dac9598..e24feb0633aa 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -19,6 +19,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PCI=y CONFIG_BINFMT_MISC=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index cc0756021398..48e16d98b2cc 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -28,6 +28,7 @@ CONFIG_MIPS32_COMPAT=y CONFIG_MIPS32_O32=y CONFIG_MIPS32_N32=y CONFIG_PM=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 2575302aa2be..4f37a5985459 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -18,6 +18,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_BINFMT_MISC=m CONFIG_PM=y +CONFIG_NET=y CONFIG_PACKET=m CONFIG_UNIX=y CONFIG_NET_KEY=m diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 4cb787ff273e..1c6191ebd583 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -59,6 +59,7 @@ CONFIG_MIPS32_COMPAT=y CONFIG_MIPS32_O32=y CONFIG_MIPS32_N32=y CONFIG_PM_RUNTIME=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index e18741ea1771..f57b96dcf7df 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -19,6 +19,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_PCI=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index cf0e01f814e1..d41742dd26c8 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -20,6 +20,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_PCI=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/mips/configs/malta_kvm_guest_defconfig b/arch/mips/configs/malta_kvm_guest_defconfig index edd9ec9cb678..a7806e83ea0f 100644 --- a/arch/mips/configs/malta_kvm_guest_defconfig +++ b/arch/mips/configs/malta_kvm_guest_defconfig @@ -19,6 +19,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_PCI=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index d269a5326a30..9b6926d6bb32 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -27,6 +27,7 @@ CONFIG_PD6729=m CONFIG_I82092=m CONFIG_BINFMT_MISC=m CONFIG_PM=y +CONFIG_NET=y CONFIG_PACKET=m CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig index 2f660e9a0da6..70509a48df82 100644 --- a/arch/mips/configs/nlm_xlp_defconfig +++ b/arch/mips/configs/nlm_xlp_defconfig @@ -63,6 +63,7 @@ CONFIG_MIPS32_O32=y CONFIG_MIPS32_N32=y CONFIG_PM_RUNTIME=y CONFIG_PM_DEBUG=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig index c6f84655c98a..82207e8079f3 100644 --- a/arch/mips/configs/nlm_xlr_defconfig +++ b/arch/mips/configs/nlm_xlr_defconfig @@ -43,6 +43,7 @@ CONFIG_PCI_DEBUG=y CONFIG_BINFMT_MISC=m CONFIG_PM_RUNTIME=y CONFIG_PM_DEBUG=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 29d79ae8a823..db029f4ff759 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -20,6 +20,7 @@ CONFIG_MODVERSIONS=y CONFIG_PCI=y CONFIG_BINFMT_MISC=m CONFIG_PM=y +CONFIG_NET=y CONFIG_PACKET=m CONFIG_UNIX=y CONFIG_NET_KEY=m diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h index d0352983b94d..51f80bd36fcc 100644 --- a/arch/mips/include/asm/cop2.h +++ b/arch/mips/include/asm/cop2.h @@ -16,8 +16,8 @@ extern void octeon_cop2_save(struct octeon_cop2_state *); extern void octeon_cop2_restore(struct octeon_cop2_state *); -#define cop2_save(r) octeon_cop2_save(r) -#define cop2_restore(r) octeon_cop2_restore(r) +#define cop2_save(r) octeon_cop2_save(&(r)->thread.cp2) +#define cop2_restore(r) octeon_cop2_restore(&(r)->thread.cp2) #define cop2_present 1 #define cop2_lazy_restore 1 @@ -26,26 +26,26 @@ extern void octeon_cop2_restore(struct octeon_cop2_state *); extern void nlm_cop2_save(struct nlm_cop2_state *); extern void nlm_cop2_restore(struct nlm_cop2_state *); -#define cop2_save(r) nlm_cop2_save(r) -#define cop2_restore(r) nlm_cop2_restore(r) + +#define cop2_save(r) nlm_cop2_save(&(r)->thread.cp2) +#define cop2_restore(r) nlm_cop2_restore(&(r)->thread.cp2) #define cop2_present 1 #define cop2_lazy_restore 0 #elif defined(CONFIG_CPU_LOONGSON3) -#define cop2_save(r) -#define cop2_restore(r) - #define cop2_present 1 #define cop2_lazy_restore 1 +#define cop2_save(r) do { (r); } while (0) +#define cop2_restore(r) do { (r); } while (0) #else #define cop2_present 0 #define cop2_lazy_restore 0 -#define cop2_save(r) -#define cop2_restore(r) +#define cop2_save(r) do { (r); } while (0) +#define cop2_restore(r) do { (r); } while (0) #endif enum cu2_ops { diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 7a3fc67bd7f9..f2c249796ea8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -96,11 +96,6 @@ #define CAUSEB_DC 27 #define CAUSEF_DC (_ULCAST_(1) << 27) -struct kvm; -struct kvm_run; -struct kvm_vcpu; -struct kvm_interrupt; - extern atomic_t kvm_mips_instance; extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); @@ -767,5 +762,16 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/mips/include/asm/mach-ip28/spaces.h b/arch/mips/include/asm/mach-ip28/spaces.h index 5d6a76434d00..c4a912733b65 100644 --- a/arch/mips/include/asm/mach-ip28/spaces.h +++ b/arch/mips/include/asm/mach-ip28/spaces.h @@ -11,15 +11,8 @@ #ifndef _ASM_MACH_IP28_SPACES_H #define _ASM_MACH_IP28_SPACES_H -#define CAC_BASE _AC(0xa800000000000000, UL) - -#define HIGHMEM_START (~0UL) - #define PHYS_OFFSET _AC(0x20000000, UL) -#define UNCAC_BASE _AC(0xc0000000, UL) /* 0xa0000000 + PHYS_OFFSET */ -#define IO_BASE UNCAC_BASE - #include <asm/mach-generic/spaces.h> #endif /* _ASM_MACH_IP28_SPACES_H */ diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 5699ec3a71af..3be81803595d 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -37,7 +37,7 @@ /* * This is used for calculating the real page sizes - * for FTLB or VTLB + FTLB confugrations. + * for FTLB or VTLB + FTLB configurations. */ static inline unsigned int page_size_ftlb(unsigned int mmuextdef) { @@ -223,7 +223,8 @@ static inline int pfn_valid(unsigned long pfn) #endif -#define virt_to_page(kaddr) pfn_to_page(PFN_DOWN(virt_to_phys(kaddr))) +#define virt_to_page(kaddr) pfn_to_page(PFN_DOWN(virt_to_phys((void *) \ + (kaddr)))) extern int __virt_addr_valid(const volatile void *kaddr); #define virt_addr_valid(kaddr) \ diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 1e0f20a9cdda..eacf865d21c2 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -37,11 +37,6 @@ extern int __cpu_logical_map[NR_CPUS]; #define NO_PROC_ID (-1) -#define topology_physical_package_id(cpu) (cpu_data[cpu].package) -#define topology_core_id(cpu) (cpu_data[cpu].core) -#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) -#define topology_thread_cpumask(cpu) (&cpu_sibling_map[cpu]) - #define SMP_RESCHEDULE_YOURSELF 0x1 /* XXX braindead */ #define SMP_CALL_FUNCTION 0x2 /* Octeon - Tell another core to flush its icache */ diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 495c1041a2cc..b928b6f898cd 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -92,7 +92,7 @@ do { \ KSTK_STATUS(prev) &= ~ST0_CU2; \ __c0_stat = read_c0_status(); \ write_c0_status(__c0_stat | ST0_CU2); \ - cop2_save(&prev->thread.cp2); \ + cop2_save(prev); \ write_c0_status(__c0_stat & ~ST0_CU2); \ } \ __clear_software_ll_bit(); \ @@ -111,7 +111,7 @@ do { \ (KSTK_STATUS(current) & ST0_CU2)) { \ __c0_stat = read_c0_status(); \ write_c0_status(__c0_stat | ST0_CU2); \ - cop2_restore(¤t->thread.cp2); \ + cop2_restore(current); \ write_c0_status(__c0_stat & ~ST0_CU2); \ } \ if (cpu_has_dsp) \ diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h index 20ea4859c822..3e307ec2afba 100644 --- a/arch/mips/include/asm/topology.h +++ b/arch/mips/include/asm/topology.h @@ -9,5 +9,13 @@ #define __ASM_TOPOLOGY_H #include <topology.h> +#include <linux/smp.h> + +#ifdef CONFIG_SMP +#define topology_physical_package_id(cpu) (cpu_data[cpu].package) +#define topology_core_id(cpu) (cpu_data[cpu].core) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_thread_cpumask(cpu) (&cpu_sibling_map[cpu]) +#endif #endif /* __ASM_TOPOLOGY_H */ diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index 9bc13eaf9d67..fdb4923777d1 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -373,16 +373,18 @@ #define __NR_sched_getattr (__NR_Linux + 350) #define __NR_renameat2 (__NR_Linux + 351) #define __NR_seccomp (__NR_Linux + 352) +#define __NR_getrandom (__NR_Linux + 353) +#define __NR_memfd_create (__NR_Linux + 354) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 352 +#define __NR_Linux_syscalls 354 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 352 +#define __NR_O32_Linux_syscalls 354 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -703,16 +705,18 @@ #define __NR_sched_getattr (__NR_Linux + 310) #define __NR_renameat2 (__NR_Linux + 311) #define __NR_seccomp (__NR_Linux + 312) +#define __NR_getrandom (__NR_Linux + 313) +#define __NR_memfd_create (__NR_Linux + 314) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 312 +#define __NR_Linux_syscalls 314 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 312 +#define __NR_64_Linux_syscalls 314 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1037,15 +1041,17 @@ #define __NR_sched_getattr (__NR_Linux + 314) #define __NR_renameat2 (__NR_Linux + 315) #define __NR_seccomp (__NR_Linux + 316) +#define __NR_getrandom (__NR_Linux + 317) +#define __NR_memfd_create (__NR_Linux + 318) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 316 +#define __NR_Linux_syscalls 318 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 316 +#define __NR_N32_Linux_syscalls 318 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/kernel/machine_kexec.c b/arch/mips/kernel/machine_kexec.c index 992e18474da5..50980bf3983e 100644 --- a/arch/mips/kernel/machine_kexec.c +++ b/arch/mips/kernel/machine_kexec.c @@ -71,8 +71,12 @@ machine_kexec(struct kimage *image) kexec_start_address = (unsigned long) phys_to_virt(image->start); - kexec_indirection_page = - (unsigned long) phys_to_virt(image->head & PAGE_MASK); + if (image->type == KEXEC_TYPE_DEFAULT) { + kexec_indirection_page = + (unsigned long) phys_to_virt(image->head & PAGE_MASK); + } else { + kexec_indirection_page = (unsigned long)&image->head; + } memcpy((void*)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S index 5d25462de8a6..2f7c734771f4 100644 --- a/arch/mips/kernel/mcount.S +++ b/arch/mips/kernel/mcount.S @@ -129,7 +129,11 @@ NESTED(_mcount, PT_SIZE, ra) nop #endif b ftrace_stub +#ifdef CONFIG_32BIT + addiu sp, sp, 8 +#else nop +#endif static_trace: MCOUNT_SAVE_REGS @@ -139,6 +143,9 @@ static_trace: move a1, AT /* arg2: parent's return address */ MCOUNT_RESTORE_REGS +#ifdef CONFIG_32BIT + addiu sp, sp, 8 +#endif .globl ftrace_stub ftrace_stub: RETURN_BACK @@ -183,6 +190,11 @@ NESTED(ftrace_graph_caller, PT_SIZE, ra) jal prepare_ftrace_return nop MCOUNT_RESTORE_REGS +#ifndef CONFIG_DYNAMIC_FTRACE +#ifdef CONFIG_32BIT + addiu sp, sp, 8 +#endif +#endif RETURN_BACK END(ftrace_graph_caller) diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index f93b4cbec739..744cd10ba599 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -577,3 +577,5 @@ EXPORT(sys_call_table) PTR sys_sched_getattr /* 4350 */ PTR sys_renameat2 PTR sys_seccomp + PTR sys_getrandom + PTR sys_memfd_create diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 03ebd9979ad2..002b1bc09c38 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -432,4 +432,6 @@ EXPORT(sys_call_table) PTR sys_sched_getattr /* 5310 */ PTR sys_renameat2 PTR sys_seccomp + PTR sys_getrandom + PTR sys_memfd_create .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index ebc9228e2e15..ca6cbbe9805b 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -425,4 +425,6 @@ EXPORT(sysn32_call_table) PTR sys_sched_getattr PTR sys_renameat2 /* 6315 */ PTR sys_seccomp + PTR sys_getrandom + PTR sys_memfd_create .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 25bb8400156d..9e10d11fbb84 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -562,4 +562,6 @@ EXPORT(sys32_call_table) PTR sys_sched_getattr /* 4350 */ PTR sys_renameat2 PTR sys_seccomp + PTR sys_getrandom + PTR sys_memfd_create .size sys32_call_table,.-sys32_call_table diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index cd7114147ae7..e3b21e51ff7e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -77,24 +77,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - void kvm_arch_check_processor_compat(void *rtn) { *(int *)rtn = 0; @@ -163,10 +155,6 @@ void kvm_mips_free_vcpus(struct kvm *kvm) mutex_unlock(&kvm->lock); } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - static void kvm_mips_uninit_tlbs(void *arg) { /* Restore wired count */ @@ -194,21 +182,12 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, return -ENOIOCTLCMD; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, @@ -254,19 +233,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, } } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} - -void kvm_arch_flush_shadow(struct kvm *kvm) -{ -} - struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { int err, size, offset; @@ -998,10 +964,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ -} - int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 8f1866d8124d..468ffa043607 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -221,7 +221,7 @@ void __init ltq_soc_init(void) (request_mem_region(res_sys[2].start, resource_size(&res_sys[2]), res_sys[2].name) < 0)) - pr_err("Failed to request core reources"); + pr_err("Failed to request core resources"); status_membase = ioremap_nocache(res_status.start, resource_size(&res_status)); diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 51804b10a036..2b15491de494 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -318,7 +318,7 @@ void __init ltq_soc_init(void) res_cgu.name) < 0) || (request_mem_region(res_ebu.start, resource_size(&res_ebu), res_ebu.name) < 0)) - pr_err("Failed to request core reources"); + pr_err("Failed to request core resources"); pmu_membase = ioremap_nocache(res_pmu.start, resource_size(&res_pmu)); ltq_cgu_membase = ioremap_nocache(res_cgu.start, diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index bf0fc6b16ad9..7a4727795a70 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -650,9 +650,9 @@ static inline int cop1_64bit(struct pt_regs *xcp) #define SIFROMREG(si, x) \ do { \ if (cop1_64bit(xcp)) \ - (si) = get_fpr32(&ctx->fpr[x], 0); \ + (si) = (int)get_fpr32(&ctx->fpr[x], 0); \ else \ - (si) = get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1); \ + (si) = (int)get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1); \ } while (0) #define SITOREG(si, x) \ @@ -667,7 +667,7 @@ do { \ } \ } while (0) -#define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1)) +#define SIFROMHREG(si, x) ((si) = (int)get_fpr32(&ctx->fpr[x], 1)) #define SITOHREG(si, x) \ do { \ diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 571aab064936..f42e35e42790 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -53,6 +53,7 @@ */ unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL_GPL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); /* * Not static inline because used by IP27 special magic initialization code diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 05a56619ece2..9f7ecbda250c 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -793,6 +793,7 @@ static int build_body(struct jit_ctx *ctx) const struct sock_filter *inst; unsigned int i, off, load_order, condt; u32 k, b_off __maybe_unused; + int tmp; for (i = 0; i < prog->len; i++) { u16 code; @@ -1332,9 +1333,9 @@ jmp_cmp: case BPF_ANC | SKF_AD_PKTTYPE: ctx->flags |= SEEN_SKB; - off = pkt_type_offset(); + tmp = off = pkt_type_offset(); - if (off < 0) + if (tmp < 0) return -1; emit_load_byte(r_tmp, r_skb, off, ctx); /* Keep only the last 3 bits */ diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c index cb1ef9984069..37fe8e7887e2 100644 --- a/arch/mips/pci/pci-lantiq.c +++ b/arch/mips/pci/pci-lantiq.c @@ -218,7 +218,7 @@ static int ltq_pci_probe(struct platform_device *pdev) res_cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0); res_bridge = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (!res_cfg || !res_bridge) { - dev_err(&pdev->dev, "missing memory reources\n"); + dev_err(&pdev->dev, "missing memory resources\n"); return -EINVAL; } diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index a648de1b1096..4434b54e1d87 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -181,7 +181,7 @@ endmenu config SMP bool "Symmetric multi-processing support" default y - depends on MN10300_PROC_MN2WS0038 || MN10300_PROC_MN2WS0050 + depends on MN10300_PROC_MN2WS0050 ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, say N. If you have a system with more diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 6e75e2030927..1554a6f2a5bb 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -321,6 +321,22 @@ source "fs/Kconfig" source "arch/parisc/Kconfig.debug" +config SECCOMP + def_bool y + prompt "Enable seccomp to safely compute untrusted bytecode" + ---help--- + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. Only embedded should say N here. + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 7187664034c3..5db8882f732c 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -48,7 +48,12 @@ cflags-y := -pipe # These flags should be implied by an hppa-linux configuration, but they # are not in gcc 3.2. -cflags-y += -mno-space-regs -mfast-indirect-calls +cflags-y += -mno-space-regs + +# -mfast-indirect-calls is only relevant for 32-bit kernels. +ifndef CONFIG_64BIT +cflags-y += -mfast-indirect-calls +endif # Currently we save and restore fpregs on all kernel entry/interruption paths. # If that gets optimized, we might need to disable the use of fpregs in the diff --git a/arch/parisc/configs/a500_defconfig b/arch/parisc/configs/a500_defconfig index 90025322b75e..0490199d7b15 100644 --- a/arch/parisc/configs/a500_defconfig +++ b/arch/parisc/configs/a500_defconfig @@ -31,6 +31,7 @@ CONFIG_PD6729=m CONFIG_I82092=m # CONFIG_SUPERIO is not set # CONFIG_CHASSIS_LCD_LED is not set +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig index 8249ac9d9cfc..269c23d23fcb 100644 --- a/arch/parisc/configs/c8000_defconfig +++ b/arch/parisc/configs/c8000_defconfig @@ -33,6 +33,7 @@ CONFIG_PCI_LBA=y # CONFIG_PDC_CHASSIS_WARN is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c index d9dc6cd3b7d2..e5c4da035810 100644 --- a/arch/parisc/hpux/sys_hpux.c +++ b/arch/parisc/hpux/sys_hpux.c @@ -456,7 +456,7 @@ int hpux_sysfs(int opcode, unsigned long arg1, unsigned long arg2) } /* String could be altered by userspace after strlen_user() */ - fsname[len] = '\0'; + fsname[len - 1] = '\0'; printk(KERN_DEBUG "that is '%s' as (char *)\n", fsname); if ( !strcmp(fsname, "hfs") ) { diff --git a/arch/parisc/include/asm/seccomp.h b/arch/parisc/include/asm/seccomp.h new file mode 100644 index 000000000000..015f7887aa29 --- /dev/null +++ b/arch/parisc/include/asm/seccomp.h @@ -0,0 +1,16 @@ +#ifndef _ASM_PARISC_SECCOMP_H +#define _ASM_PARISC_SECCOMP_H + +#include <linux/unistd.h> + +#define __NR_seccomp_read __NR_read +#define __NR_seccomp_write __NR_write +#define __NR_seccomp_exit __NR_exit +#define __NR_seccomp_sigreturn __NR_rt_sigreturn + +#define __NR_seccomp_read_32 __NR_read +#define __NR_seccomp_write_32 __NR_write +#define __NR_seccomp_exit_32 __NR_exit +#define __NR_seccomp_sigreturn_32 __NR_rt_sigreturn + +#endif /* _ASM_PARISC_SECCOMP_H */ diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 4b9b10ce1f9d..a84611835549 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -60,6 +60,7 @@ struct thread_info { #define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ #define TIF_SINGLESTEP 9 /* single stepping? */ #define TIF_BLOCKSTEP 10 /* branch stepping? */ +#define TIF_SECCOMP 11 /* secure computing */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -70,11 +71,13 @@ struct thread_info { #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ _TIF_NEED_RESCHED) #define _TIF_SYSCALL_TRACE_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \ - _TIF_BLOCKSTEP | _TIF_SYSCALL_AUDIT) + _TIF_BLOCKSTEP | _TIF_SYSCALL_AUDIT | \ + _TIF_SECCOMP) #ifdef CONFIG_64BIT # ifdef CONFIG_COMPAT diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index 47e0e21d2272..8667f18be238 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -830,8 +830,11 @@ #define __NR_sched_getattr (__NR_Linux + 335) #define __NR_utimes (__NR_Linux + 336) #define __NR_renameat2 (__NR_Linux + 337) +#define __NR_seccomp (__NR_Linux + 338) +#define __NR_getrandom (__NR_Linux + 339) +#define __NR_memfd_create (__NR_Linux + 340) -#define __NR_Linux_syscalls (__NR_renameat2 + 1) +#define __NR_Linux_syscalls (__NR_memfd_create + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index e842ee233db4..92438c21d453 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -17,6 +17,7 @@ #include <linux/user.h> #include <linux/personality.h> #include <linux/security.h> +#include <linux/seccomp.h> #include <linux/compat.h> #include <linux/signal.h> #include <linux/audit.h> @@ -270,6 +271,9 @@ long do_syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + /* Do the secure computing check first. */ + secure_computing_strict(regs->gr[20]); + if (test_thread_flag(TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs)) ret = -1L; diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 838786011037..7ef22e3387e0 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -74,7 +74,7 @@ ENTRY(linux_gateway_page) /* ADDRESS 0xb0 to 0xb8, lws uses two insns for entry */ /* Light-weight-syscall entry must always be located at 0xb0 */ /* WARNING: Keep this number updated with table size changes */ -#define __NR_lws_entries (2) +#define __NR_lws_entries (3) lws_entry: gate lws_start, %r0 /* increase privilege */ @@ -502,7 +502,7 @@ lws_exit: /*************************************************** - Implementing CAS as an atomic operation: + Implementing 32bit CAS as an atomic operation: %r26 - Address to examine %r25 - Old value to check (old) @@ -659,6 +659,230 @@ cas_action: ASM_EXCEPTIONTABLE_ENTRY(2b-linux_gateway_page, 3b-linux_gateway_page) + /*************************************************** + New CAS implementation which uses pointers and variable size + information. The value pointed by old and new MUST NOT change + while performing CAS. The lock only protect the value at %r26. + + %r26 - Address to examine + %r25 - Pointer to the value to check (old) + %r24 - Pointer to the value to set (new) + %r23 - Size of the variable (0/1/2/3 for 8/16/32/64 bit) + %r28 - Return non-zero on failure + %r21 - Kernel error code + + %r21 has the following meanings: + + EAGAIN - CAS is busy, ldcw failed, try again. + EFAULT - Read or write failed. + + Scratch: r20, r22, r28, r29, r1, fr4 (32bit for 64bit CAS only) + + ****************************************************/ + + /* ELF32 Process entry path */ +lws_compare_and_swap_2: +#ifdef CONFIG_64BIT + /* Clip the input registers */ + depdi 0, 31, 32, %r26 + depdi 0, 31, 32, %r25 + depdi 0, 31, 32, %r24 + depdi 0, 31, 32, %r23 +#endif + + /* Check the validity of the size pointer */ + subi,>>= 4, %r23, %r0 + b,n lws_exit_nosys + + /* Jump to the functions which will load the old and new values into + registers depending on the their size */ + shlw %r23, 2, %r29 + blr %r29, %r0 + nop + + /* 8bit load */ +4: ldb 0(%sr3,%r25), %r25 + b cas2_lock_start +5: ldb 0(%sr3,%r24), %r24 + nop + nop + nop + nop + nop + + /* 16bit load */ +6: ldh 0(%sr3,%r25), %r25 + b cas2_lock_start +7: ldh 0(%sr3,%r24), %r24 + nop + nop + nop + nop + nop + + /* 32bit load */ +8: ldw 0(%sr3,%r25), %r25 + b cas2_lock_start +9: ldw 0(%sr3,%r24), %r24 + nop + nop + nop + nop + nop + + /* 64bit load */ +#ifdef CONFIG_64BIT +10: ldd 0(%sr3,%r25), %r25 +11: ldd 0(%sr3,%r24), %r24 +#else + /* Load new value into r22/r23 - high/low */ +10: ldw 0(%sr3,%r25), %r22 +11: ldw 4(%sr3,%r25), %r23 + /* Load new value into fr4 for atomic store later */ +12: flddx 0(%sr3,%r24), %fr4 +#endif + +cas2_lock_start: + /* Load start of lock table */ + ldil L%lws_lock_start, %r20 + ldo R%lws_lock_start(%r20), %r28 + + /* Extract four bits from r26 and hash lock (Bits 4-7) */ + extru %r26, 27, 4, %r20 + + /* Find lock to use, the hash is either one of 0 to + 15, multiplied by 16 (keep it 16-byte aligned) + and add to the lock table offset. */ + shlw %r20, 4, %r20 + add %r20, %r28, %r20 + + rsm PSW_SM_I, %r0 /* Disable interrupts */ + /* COW breaks can cause contention on UP systems */ + LDCW 0(%sr2,%r20), %r28 /* Try to acquire the lock */ + cmpb,<>,n %r0, %r28, cas2_action /* Did we get it? */ +cas2_wouldblock: + ldo 2(%r0), %r28 /* 2nd case */ + ssm PSW_SM_I, %r0 + b lws_exit /* Contended... */ + ldo -EAGAIN(%r0), %r21 /* Spin in userspace */ + + /* + prev = *addr; + if ( prev == old ) + *addr = new; + return prev; + */ + + /* NOTES: + This all works becuse intr_do_signal + and schedule both check the return iasq + and see that we are on the kernel page + so this process is never scheduled off + or is ever sent any signal of any sort, + thus it is wholly atomic from usrspaces + perspective + */ +cas2_action: + /* Jump to the correct function */ + blr %r29, %r0 + /* Set %r28 as non-zero for now */ + ldo 1(%r0),%r28 + + /* 8bit CAS */ +13: ldb,ma 0(%sr3,%r26), %r29 + sub,= %r29, %r25, %r0 + b,n cas2_end +14: stb,ma %r24, 0(%sr3,%r26) + b cas2_end + copy %r0, %r28 + nop + nop + + /* 16bit CAS */ +15: ldh,ma 0(%sr3,%r26), %r29 + sub,= %r29, %r25, %r0 + b,n cas2_end +16: sth,ma %r24, 0(%sr3,%r26) + b cas2_end + copy %r0, %r28 + nop + nop + + /* 32bit CAS */ +17: ldw,ma 0(%sr3,%r26), %r29 + sub,= %r29, %r25, %r0 + b,n cas2_end +18: stw,ma %r24, 0(%sr3,%r26) + b cas2_end + copy %r0, %r28 + nop + nop + + /* 64bit CAS */ +#ifdef CONFIG_64BIT +19: ldd,ma 0(%sr3,%r26), %r29 + sub,= %r29, %r25, %r0 + b,n cas2_end +20: std,ma %r24, 0(%sr3,%r26) + copy %r0, %r28 +#else + /* Compare first word */ +19: ldw,ma 0(%sr3,%r26), %r29 + sub,= %r29, %r22, %r0 + b,n cas2_end + /* Compare second word */ +20: ldw,ma 4(%sr3,%r26), %r29 + sub,= %r29, %r23, %r0 + b,n cas2_end + /* Perform the store */ +21: fstdx %fr4, 0(%sr3,%r26) + copy %r0, %r28 +#endif + +cas2_end: + /* Free lock */ + stw,ma %r20, 0(%sr2,%r20) + /* Enable interrupts */ + ssm PSW_SM_I, %r0 + /* Return to userspace, set no error */ + b lws_exit + copy %r0, %r21 + +22: + /* Error occurred on load or store */ + /* Free lock */ + stw %r20, 0(%sr2,%r20) + ssm PSW_SM_I, %r0 + ldo 1(%r0),%r28 + b lws_exit + ldo -EFAULT(%r0),%r21 /* set errno */ + nop + nop + nop + + /* Exception table entries, for the load and store, return EFAULT. + Each of the entries must be relocated. */ + ASM_EXCEPTIONTABLE_ENTRY(4b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(5b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(6b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(7b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(8b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(9b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(10b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(11b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(13b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(14b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(15b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(16b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(17b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(18b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(19b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(20b-linux_gateway_page, 22b-linux_gateway_page) +#ifndef CONFIG_64BIT + ASM_EXCEPTIONTABLE_ENTRY(12b-linux_gateway_page, 22b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(21b-linux_gateway_page, 22b-linux_gateway_page) +#endif + /* Make sure nothing else is placed on this page */ .align PAGE_SIZE END(linux_gateway_page) @@ -675,8 +899,9 @@ ENTRY(end_linux_gateway_page) /* Light-weight-syscall table */ /* Start of lws table. */ ENTRY(lws_table) - LWS_ENTRY(compare_and_swap32) /* 0 - ELF32 Atomic compare and swap */ - LWS_ENTRY(compare_and_swap64) /* 1 - ELF64 Atomic compare and swap */ + LWS_ENTRY(compare_and_swap32) /* 0 - ELF32 Atomic 32bit CAS */ + LWS_ENTRY(compare_and_swap64) /* 1 - ELF64 Atomic 32bit CAS */ + LWS_ENTRY(compare_and_swap_2) /* 2 - ELF32 Atomic 64bit CAS */ END(lws_table) /* End of lws table */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 84c5d3a58fa1..b563d9c8268b 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -433,6 +433,9 @@ ENTRY_SAME(sched_getattr) /* 335 */ ENTRY_COMP(utimes) ENTRY_SAME(renameat2) + ENTRY_SAME(seccomp) + ENTRY_SAME(getrandom) + ENTRY_SAME(memfd_create) /* 340 */ /* Nothing yet */ diff --git a/arch/powerpc/boot/simpleboot.c b/arch/powerpc/boot/simpleboot.c index 21cd48074ec8..9f8c678f0d9a 100644 --- a/arch/powerpc/boot/simpleboot.c +++ b/arch/powerpc/boot/simpleboot.c @@ -61,7 +61,7 @@ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, if (*reg++ != 0) fatal("Memory range is not based at address 0\n"); - /* get the memsize and trucate it to under 4G on 32 bit machines */ + /* get the memsize and truncate it to under 4G on 32 bit machines */ memsize64 = 0; for (i = 0; i < *ns; i++) memsize64 = (memsize64 << 32) | *reg++; diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index 5e2aa43562b5..59734916986a 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -29,6 +29,7 @@ CONFIG_PM=y CONFIG_PCI_MSI=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_SHPC=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=y diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 4bee1a6d41d0..45fd06cdc3e8 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 diff --git a/arch/powerpc/configs/celleb_defconfig b/arch/powerpc/configs/celleb_defconfig index 6d7b22f41b50..77d7bf3ca2ac 100644 --- a/arch/powerpc/configs/celleb_defconfig +++ b/arch/powerpc/configs/celleb_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 4b07bade1ba9..269d6e47c67d 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -4,6 +4,7 @@ CONFIG_ALTIVEC=y CONFIG_SMP=y CONFIG_NR_CPUS=24 CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 3c72fa615bd9..7594c5ac6481 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -5,6 +5,7 @@ CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 95e545d9f25c..c8b6a9ddb21b 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -4,6 +4,7 @@ CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_COMPAT_BRK is not set diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index cec044a3ff69..e5e7838af008 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -3,6 +3,7 @@ CONFIG_ALTIVEC=y CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 553e66278010..0351b5ffdfef 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -31,6 +31,7 @@ CONFIG_HIBERNATION=y CONFIG_APM_EMULATION=y CONFIG_PCCARD=m CONFIG_YENTA=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index f26b267eb71f..36518870e6b2 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -4,6 +4,7 @@ CONFIG_VSX=y CONFIG_SMP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -57,6 +58,7 @@ CONFIG_ELECTRA_CF=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 438e813dc9cb..c3a3269b0865 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -3,6 +3,7 @@ CONFIG_PPC_BOOK3E_64=y CONFIG_SMP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y @@ -32,6 +33,7 @@ CONFIG_SPARSEMEM_MANUAL=y CONFIG_PCI_MSI=y CONFIG_PCCARD=y CONFIG_HOTPLUG_PCI=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index fdee37fab81c..2e637c881d2b 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_RD_LZMA=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index a905063281cc..dd2a9cab4b50 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2048 CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IRQ_DOMAIN_DEBUG=y @@ -52,6 +53,7 @@ CONFIG_SCHED_SMT=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index 58e3dbf43ca4..63392f4b29a4 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -6,6 +6,7 @@ CONFIG_NR_CPUS=2048 CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IRQ_DOMAIN_DEBUG=y @@ -54,6 +55,7 @@ CONFIG_SCHED_SMT=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 465dfcb82c92..5bca220bbb60 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -53,17 +53,17 @@ #define BOOKE_INTERRUPT_DEBUG 15 /* E500 */ -#define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32 -#define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33 -/* - * TODO: Unify 32-bit and 64-bit kernel exception handlers to use same defines - */ -#define BOOKE_INTERRUPT_SPE_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL -#define BOOKE_INTERRUPT_SPE_FP_DATA BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST -#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL -#define BOOKE_INTERRUPT_ALTIVEC_ASSIST \ - BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST +#ifdef CONFIG_SPE_POSSIBLE +#define BOOKE_INTERRUPT_SPE_UNAVAIL 32 +#define BOOKE_INTERRUPT_SPE_FP_DATA 33 #define BOOKE_INTERRUPT_SPE_FP_ROUND 34 +#endif + +#ifdef CONFIG_PPC_E500MC +#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32 +#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33 +#endif + #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 #define BOOKE_INTERRUPT_DOORBELL 36 #define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37 diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index f7aa5cc395c4..3286f0d6a86c 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -23,15 +23,16 @@ #include <linux/types.h> #include <linux/kvm_host.h> -/* LPIDs we support with this build -- runtime limit may be lower */ +/* + * Number of available lpids. Only the low-order 6 bits of LPID rgister are + * implemented on e500mc+ cores. + */ #define KVMPPC_NR_LPIDS 64 #define KVMPPC_INST_EHPRIV 0x7c00021c #define EHPRIV_OC_SHIFT 11 /* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */ #define EHPRIV_OC_DEBUG 1 -#define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \ - (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT)) static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 98d9dd50d063..047855619cc4 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -53,14 +53,18 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} + #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 @@ -76,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* Physical Address Mask - allowed range of real mode RAM access */ #define KVM_PAM 0x0fffffffffffffffULL -struct kvm; -struct kvm_run; -struct kvm_vcpu; - struct lppaca; struct slb_shadow; struct dtl_entry; @@ -144,6 +144,7 @@ enum kvm_exit_types { EMULATED_TLBWE_EXITS, EMULATED_RFI_EXITS, EMULATED_RFCI_EXITS, + EMULATED_RFDI_EXITS, DEC_EXITS, EXT_INTR_EXITS, HALT_WAKEUP, @@ -589,8 +590,6 @@ struct kvm_vcpu_arch { u32 crit_save; /* guest debug registers*/ struct debug_reg dbg_reg; - /* hardware visible debug registers when in guest state */ - struct debug_reg shadow_dbg_reg; #endif gpa_t paddr_accessed; gva_t vaddr_accessed; @@ -612,7 +611,6 @@ struct kvm_vcpu_arch { u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ struct hrtimer dec_timer; - struct tasklet_struct tasklet; u64 dec_jiffies; u64 dec_expires; unsigned long pending_exceptions; @@ -687,4 +685,12 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_exit(void) {} + #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index fb86a2299d8a..a6dcdb6d13c1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -38,6 +38,12 @@ #include <asm/paca.h> #endif +/* + * KVMPPC_INST_SW_BREAKPOINT is debug Instruction + * for supporting software breakpoint. + */ +#define KVMPPC_INST_SW_BREAKPOINT 0x00dddd00 + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ @@ -89,7 +95,7 @@ extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); -extern void kvmppc_decrementer_func(unsigned long data); +extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu); extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); @@ -206,6 +212,9 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); +void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu); +void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu); + union kvmppc_one_reg { u32 wval; u64 dval; @@ -243,7 +252,7 @@ struct kvmppc_ops { int (*unmap_hva)(struct kvm *kvm, unsigned long hva); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, unsigned long end); - int (*age_hva)(struct kvm *kvm, unsigned long hva); + int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); int (*test_age_hva)(struct kvm *kvm, unsigned long hva); void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); void (*mmu_destroy)(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 279b80f3bb29..c0c61fa9cd9e 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -47,6 +47,12 @@ STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) #define STACK_FRAME_MARKER 12 +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STACK_FRAME_MIN_SIZE 32 +#else +#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD +#endif + /* Size of dummy stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 128 #define __SIGNAL_FRAMESIZE32 64 @@ -60,6 +66,7 @@ #define STACK_FRAME_REGS_MARKER ASM_CONST(0x72656773) #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_FRAME_MARKER 2 +#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD /* Size of stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 64 diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 1d653308a33c..16547efa2d5a 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -319,6 +319,8 @@ * DBSR bits which have conflicting definitions on true Book E versus IBM 40x. */ #ifdef CONFIG_BOOKE +#define DBSR_IDE 0x80000000 /* Imprecise Debug Event */ +#define DBSR_MRR 0x30000000 /* Most Recent Reset */ #define DBSR_IC 0x08000000 /* Instruction Completion */ #define DBSR_BT 0x04000000 /* Branch Taken */ #define DBSR_IRPT 0x02000000 /* Exception Debug Event */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 542bc0f0673f..7d8a60068805 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -362,3 +362,6 @@ SYSCALL(ni_syscall) /* sys_kcmp */ SYSCALL_SPU(sched_setattr) SYSCALL_SPU(sched_getattr) SYSCALL_SPU(renameat2) +SYSCALL_SPU(seccomp) +SYSCALL_SPU(getrandom) +SYSCALL_SPU(memfd_create) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 5ce5552ab9f5..4e9af3fd43e7 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define __NR_syscalls 358 +#define __NR_syscalls 361 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index e0e49dbb145d..ab4d4732c492 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -476,6 +476,11 @@ struct kvm_get_htab_header { /* FP and vector status/control registers */ #define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) +/* + * VSCR register is documented as a 32-bit register in the ISA, but it can + * only be accesses via a vector register. Expose VSCR as a 32-bit register + * even though the kernel represents it as a 128-bit vector. + */ #define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) /* Virtual processor areas */ @@ -557,6 +562,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) #define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9) #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) +#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 2d526f7b48da..0688fc06e183 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -380,5 +380,8 @@ #define __NR_sched_setattr 355 #define __NR_sched_getattr 356 #define __NR_renameat2 357 +#define __NR_seccomp 358 +#define __NR_getrandom 359 +#define __NR_memfd_create 360 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 4f1393d20079..dddba3e94260 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -91,6 +91,7 @@ _GLOBAL(setup_altivec_idle) blr +#ifdef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e6500) mflr r6 #ifdef CONFIG_PPC64 @@ -107,14 +108,20 @@ _GLOBAL(__setup_cpu_e6500) bl __setup_cpu_e5500 mtlr r6 blr +#endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 +#ifdef CONFIG_E200 _GLOBAL(__setup_cpu_e200) /* enable dedicated debug exception handling resources (Debug APU) */ mfspr r3,SPRN_HID0 ori r3,r3,HID0_DAPUEN@l mtspr SPRN_HID0,r3 b __setup_e200_ivors +#endif /* CONFIG_E200 */ + +#ifdef CONFIG_E500 +#ifndef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e500v1) _GLOBAL(__setup_cpu_e500v2) mflr r4 @@ -129,6 +136,7 @@ _GLOBAL(__setup_cpu_e500v2) #endif mtlr r4 blr +#else /* CONFIG_PPC_E500MC */ _GLOBAL(__setup_cpu_e500mc) _GLOBAL(__setup_cpu_e5500) mflr r5 @@ -159,7 +167,9 @@ _GLOBAL(__setup_cpu_e5500) 2: mtlr r5 blr -#endif +#endif /* CONFIG_PPC_E500MC */ +#endif /* CONFIG_E500 */ +#endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC_BOOK3E_64 _GLOBAL(__restore_cpu_e6500) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 9b6dcaaec1a3..808405906336 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1961,6 +1961,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 #ifdef CONFIG_PPC32 +#ifndef CONFIG_PPC_E500MC { /* e500 */ .pvr_mask = 0xffff0000, .pvr_value = 0x80200000, @@ -2000,6 +2001,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500, .platform = "ppc8548", }, +#else { /* e500mc */ .pvr_mask = 0xffff0000, .pvr_value = 0x80230000, @@ -2018,7 +2020,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500mc, .platform = "ppce500mc", }, +#endif /* CONFIG_PPC_E500MC */ #endif /* CONFIG_PPC32 */ +#ifdef CONFIG_PPC_E500MC { /* e5500 */ .pvr_mask = 0xffff0000, .pvr_value = 0x80240000, @@ -2062,6 +2066,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500mc, .platform = "ppce6500", }, +#endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 { /* default match */ .pvr_mask = 0x00000000, diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index bb9cac6c8051..3e68d1c69718 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -635,7 +635,7 @@ interrupt_end_book3e: /* Altivec Unavailable Interrupt */ START_EXCEPTION(altivec_unavailable); - NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL, + NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, PROLOG_ADDITION_NONE) /* we can probably do a shorter exception entry for that one... */ EXCEPTION_COMMON(0x200) @@ -658,7 +658,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* AltiVec Assist */ START_EXCEPTION(altivec_assist); NORMAL_EXCEPTION_PROLOG(0x220, - BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST, + BOOKE_INTERRUPT_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) INTS_DISABLE diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index b497188a94a1..fffd1f96bb1d 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -613,34 +613,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage +/* Define SPE handlers for e200 and e500v2 */ #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) - NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL) + NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) beq 1f bl load_up_spe b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) -#else - EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \ +#elif defined(CONFIG_SPE_POSSIBLE) + EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ unknown_exception, EXC_XFER_EE) -#endif /* CONFIG_SPE */ +#endif /* CONFIG_SPE_POSSIBLE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE - EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, + EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE) /* SPE Floating Point Round */ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ SPEFloatingPointRoundException, EXC_XFER_EE) -#else - EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, +#elif defined(CONFIG_SPE_POSSIBLE) + EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ unknown_exception, EXC_XFER_EE) -#endif /* CONFIG_SPE */ +#endif /* CONFIG_SPE_POSSIBLE */ + /* Performance Monitor */ EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ @@ -947,6 +949,7 @@ get_phys_addr: * Global functions */ +#ifdef CONFIG_E200 /* Adjust or setup IVORs for e200 */ _GLOBAL(__setup_e200_ivors) li r3,DebugDebug@l @@ -959,7 +962,10 @@ _GLOBAL(__setup_e200_ivors) mtspr SPRN_IVOR34,r3 sync blr +#endif +#ifdef CONFIG_E500 +#ifndef CONFIG_PPC_E500MC /* Adjust or setup IVORs for e500v1/v2 */ _GLOBAL(__setup_e500_ivors) li r3,DebugCrit@l @@ -974,7 +980,7 @@ _GLOBAL(__setup_e500_ivors) mtspr SPRN_IVOR35,r3 sync blr - +#else /* Adjust or setup IVORs for e500mc */ _GLOBAL(__setup_e500mc_ivors) li r3,DebugDebug@l @@ -1000,6 +1006,8 @@ _GLOBAL(__setup_ehv_ivors) mtspr SPRN_IVOR41,r3 sync blr +#endif /* CONFIG_PPC_E500MC */ +#endif /* CONFIG_E500 */ #ifdef CONFIG_SPE /* diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index dd03f6b299ba..b32db4b95361 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -535,174 +535,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -ENOTSUPP; } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { - int r; - union kvmppc_one_reg val; - int size; + int r = 0; long int i; - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - - r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val); if (r == -EINVAL) { r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_DAR: - val = get_reg_val(reg->id, kvmppc_get_dar(vcpu)); + *val = get_reg_val(id, kvmppc_get_dar(vcpu)); break; case KVM_REG_PPC_DSISR: - val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu)); + *val = get_reg_val(id, kvmppc_get_dsisr(vcpu)); break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: - i = reg->id - KVM_REG_PPC_FPR0; - val = get_reg_val(reg->id, VCPU_FPR(vcpu, i)); + i = id - KVM_REG_PPC_FPR0; + *val = get_reg_val(id, VCPU_FPR(vcpu, i)); break; case KVM_REG_PPC_FPSCR: - val = get_reg_val(reg->id, vcpu->arch.fp.fpscr); - break; -#ifdef CONFIG_ALTIVEC - case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; - break; - case KVM_REG_PPC_VSCR: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); + *val = get_reg_val(id, vcpu->arch.fp.fpscr); break; - case KVM_REG_PPC_VRSAVE: - val = get_reg_val(reg->id, vcpu->arch.vrsave); - break; -#endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: if (cpu_has_feature(CPU_FTR_VSX)) { - long int i = reg->id - KVM_REG_PPC_VSR0; - val.vsxval[0] = vcpu->arch.fp.fpr[i][0]; - val.vsxval[1] = vcpu->arch.fp.fpr[i][1]; + i = id - KVM_REG_PPC_VSR0; + val->vsxval[0] = vcpu->arch.fp.fpr[i][0]; + val->vsxval[1] = vcpu->arch.fp.fpr[i][1]; } else { r = -ENXIO; } break; #endif /* CONFIG_VSX */ - case KVM_REG_PPC_DEBUG_INST: { - u32 opcode = INS_TW; - r = copy_to_user((u32 __user *)(long)reg->addr, - &opcode, sizeof(u32)); + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, INS_TW); break; - } #ifdef CONFIG_KVM_XICS case KVM_REG_PPC_ICP_STATE: if (!vcpu->arch.icp) { r = -ENXIO; break; } - val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); + *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); break; #endif /* CONFIG_KVM_XICS */ case KVM_REG_PPC_FSCR: - val = get_reg_val(reg->id, vcpu->arch.fscr); + *val = get_reg_val(id, vcpu->arch.fscr); break; case KVM_REG_PPC_TAR: - val = get_reg_val(reg->id, vcpu->arch.tar); + *val = get_reg_val(id, vcpu->arch.tar); break; case KVM_REG_PPC_EBBHR: - val = get_reg_val(reg->id, vcpu->arch.ebbhr); + *val = get_reg_val(id, vcpu->arch.ebbhr); break; case KVM_REG_PPC_EBBRR: - val = get_reg_val(reg->id, vcpu->arch.ebbrr); + *val = get_reg_val(id, vcpu->arch.ebbrr); break; case KVM_REG_PPC_BESCR: - val = get_reg_val(reg->id, vcpu->arch.bescr); + *val = get_reg_val(id, vcpu->arch.bescr); break; case KVM_REG_PPC_VTB: - val = get_reg_val(reg->id, vcpu->arch.vtb); + *val = get_reg_val(id, vcpu->arch.vtb); break; case KVM_REG_PPC_IC: - val = get_reg_val(reg->id, vcpu->arch.ic); + *val = get_reg_val(id, vcpu->arch.ic); break; default: r = -EINVAL; break; } } - if (r) - return r; - - if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) - r = -EFAULT; return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { - int r; - union kvmppc_one_reg val; - int size; + int r = 0; long int i; - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - - if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) - return -EFAULT; - - r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val); if (r == -EINVAL) { r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_DAR: - kvmppc_set_dar(vcpu, set_reg_val(reg->id, val)); + kvmppc_set_dar(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DSISR: - kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val)); + kvmppc_set_dsisr(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: - i = reg->id - KVM_REG_PPC_FPR0; - VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val); + i = id - KVM_REG_PPC_FPR0; + VCPU_FPR(vcpu, i) = set_reg_val(id, *val); break; case KVM_REG_PPC_FPSCR: - vcpu->arch.fp.fpscr = set_reg_val(reg->id, val); - break; -#ifdef CONFIG_ALTIVEC - case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; - break; - case KVM_REG_PPC_VSCR: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); - break; - case KVM_REG_PPC_VRSAVE: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vrsave = set_reg_val(reg->id, val); + vcpu->arch.fp.fpscr = set_reg_val(id, *val); break; -#endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: if (cpu_has_feature(CPU_FTR_VSX)) { - long int i = reg->id - KVM_REG_PPC_VSR0; - vcpu->arch.fp.fpr[i][0] = val.vsxval[0]; - vcpu->arch.fp.fpr[i][1] = val.vsxval[1]; + i = id - KVM_REG_PPC_VSR0; + vcpu->arch.fp.fpr[i][0] = val->vsxval[0]; + vcpu->arch.fp.fpr[i][1] = val->vsxval[1]; } else { r = -ENXIO; } @@ -715,29 +652,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) break; } r = kvmppc_xics_set_icp(vcpu, - set_reg_val(reg->id, val)); + set_reg_val(id, *val)); break; #endif /* CONFIG_KVM_XICS */ case KVM_REG_PPC_FSCR: - vcpu->arch.fscr = set_reg_val(reg->id, val); + vcpu->arch.fscr = set_reg_val(id, *val); break; case KVM_REG_PPC_TAR: - vcpu->arch.tar = set_reg_val(reg->id, val); + vcpu->arch.tar = set_reg_val(id, *val); break; case KVM_REG_PPC_EBBHR: - vcpu->arch.ebbhr = set_reg_val(reg->id, val); + vcpu->arch.ebbhr = set_reg_val(id, *val); break; case KVM_REG_PPC_EBBRR: - vcpu->arch.ebbrr = set_reg_val(reg->id, val); + vcpu->arch.ebbrr = set_reg_val(id, *val); break; case KVM_REG_PPC_BESCR: - vcpu->arch.bescr = set_reg_val(reg->id, val); + vcpu->arch.bescr = set_reg_val(id, *val); break; case KVM_REG_PPC_VTB: - vcpu->arch.vtb = set_reg_val(reg->id, val); + vcpu->arch.vtb = set_reg_val(id, *val); break; case KVM_REG_PPC_IC: - vcpu->arch.ic = set_reg_val(reg->id, val); + vcpu->arch.ic = set_reg_val(id, *val); break; default: r = -EINVAL; @@ -778,13 +715,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EINVAL; + vcpu->guest_debug = dbg->control; + return 0; } -void kvmppc_decrementer_func(unsigned long data) +void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - kvmppc_core_queue_dec(vcpu); kvm_vcpu_kick(vcpu); } @@ -851,9 +787,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvm->arch.kvm_ops->age_hva(kvm, hva); + return kvm->arch.kvm_ops->age_hva(kvm, start, end); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 4bf956cf94d6..d2b3ec088b8c 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, + unsigned long end); extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 79294c4c5015..d40770248b6a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, return ret; } -int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) +int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) { if (!kvm->arch.using_mmu_notifiers) return 0; - return kvm_handle_hva(kvm, hva, kvm_age_rmapp); + return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 27cced9c7249..e63587d30b70 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -725,6 +725,30 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) return kvmppc_hcall_impl_hv_realmode(cmd); } +static int kvmppc_emulate_debug_inst(struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + u32 last_inst; + + if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != + EMULATE_DONE) { + /* + * Fetch failed, so return to guest and + * try executing it again. + */ + return RESUME_GUEST; + } + + if (last_inst == KVMPPC_INST_SW_BREAKPOINT) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.address = kvmppc_get_pc(vcpu); + return RESUME_HOST; + } else { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } +} + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -807,12 +831,18 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, break; /* * This occurs if the guest executes an illegal instruction. - * We just generate a program interrupt to the guest, since - * we don't emulate any guest instructions at this stage. + * If the guest debug is disabled, generate a program interrupt + * to the guest. If guest debug is enabled, we need to check + * whether the instruction is a software breakpoint instruction. + * Accordingly return to Guest or Host. */ case BOOK3S_INTERRUPT_H_EMUL_ASSIST: - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - r = RESUME_GUEST; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { + r = kvmppc_emulate_debug_inst(run, vcpu); + } else { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + r = RESUME_GUEST; + } break; /* * This occurs if the guest (kernel or userspace), does something that @@ -856,7 +886,9 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, { int i, j; - kvmppc_set_pvr_hv(vcpu, sregs->pvr); + /* Only accept the same PVR as the host's, since we can't spoof it */ + if (sregs->pvr != vcpu->arch.pvr) + return -EINVAL; j = 0; for (i = 0; i < vcpu->arch.slb_nr; i++) { @@ -922,6 +954,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, long int i; switch (id) { + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); + break; case KVM_REG_PPC_HIOR: *val = get_reg_val(id, 0); break; @@ -1489,7 +1524,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, static int kvmppc_grab_hwthread(int cpu) { struct paca_struct *tpaca; - long timeout = 1000; + long timeout = 10000; tpaca = &paca[cpu]; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index b9615ba5b083..4fdc27c80f4c 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -163,6 +163,12 @@ void __init kvm_cma_reserve(void) unsigned long align_size; struct memblock_region *reg; phys_addr_t selected_size = 0; + + /* + * We need CMA reservation only when we are in HV mode + */ + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return; /* * We cannot use memblock_phys_mem_size() here, because * memblock_analyze() has not been called yet. diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index f0c4db7704c3..edb2ccdbb2ba 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -355,6 +355,7 @@ kvmppc_hv_entry: * MSR = ~IR|DR * R13 = PACA * R1 = host R1 + * R2 = TOC * all other volatile GPRS = free */ mflr r0 @@ -503,7 +504,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) toc_tlbie_lock: .tc native_tlbie_lock[TC],native_tlbie_lock .previous - ld r3,toc_tlbie_lock@toc(2) + ld r3,toc_tlbie_lock@toc(r2) #ifdef __BIG_ENDIAN__ lwz r8,PACA_LOCK_TOKEN(r13) #else diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index faffb27badd9..cf2eb16846d1 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, return 0; } -static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) +static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start, + unsigned long end) { /* XXX could be more clever ;) */ return 0; @@ -1319,6 +1320,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, int r = 0; switch (id) { + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); + break; case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index b4c89fa6f109..9b55dec2d6cc 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -124,6 +124,40 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) } #endif +/* + * Load up guest vcpu FP state if it's needed. + * It also set the MSR_FP in thread so that host know + * we're holding FPU, and then host can help to save + * guest vcpu FP state if other threads require to use FPU. + * This simulates an FP unavailable fault. + * + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_FPU + if (!(current->thread.regs->msr & MSR_FP)) { + enable_kernel_fp(); + load_fp_state(&vcpu->arch.fp); + current->thread.fp_save_area = &vcpu->arch.fp; + current->thread.regs->msr |= MSR_FP; + } +#endif +} + +/* + * Save guest vcpu FP state into thread. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_FPU + if (current->thread.regs->msr & MSR_FP) + giveup_fpu(current); + current->thread.fp_save_area = NULL; +#endif +} + static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) { #if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV) @@ -134,6 +168,40 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) #endif } +/* + * Simulate AltiVec unavailable fault to load guest state + * from thread to AltiVec unit. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) { + if (!(current->thread.regs->msr & MSR_VEC)) { + enable_kernel_altivec(); + load_vr_state(&vcpu->arch.vr); + current->thread.vr_save_area = &vcpu->arch.vr; + current->thread.regs->msr |= MSR_VEC; + } + } +#endif +} + +/* + * Save guest vcpu AltiVec state into thread. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) { + if (current->thread.regs->msr & MSR_VEC) + giveup_altivec(current); + current->thread.vr_save_area = NULL; + } +#endif +} + static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) { /* Synchronize guest's desire to get debug interrupts into shadow MSR */ @@ -267,6 +335,16 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); } +void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DEBUG); +} + +void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu) +{ + clear_bit(BOOKE_IRQPRIO_DEBUG, &vcpu->arch.pending_exceptions); +} + static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) { kvmppc_set_srr0(vcpu, srr0); @@ -341,9 +419,15 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_ITLB_MISS: case BOOKE_IRQPRIO_SYSCALL: case BOOKE_IRQPRIO_FP_UNAVAIL: +#ifdef CONFIG_SPE_POSSIBLE case BOOKE_IRQPRIO_SPE_UNAVAIL: case BOOKE_IRQPRIO_SPE_FP_DATA: case BOOKE_IRQPRIO_SPE_FP_ROUND: +#endif +#ifdef CONFIG_ALTIVEC + case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL: + case BOOKE_IRQPRIO_ALTIVEC_ASSIST: +#endif case BOOKE_IRQPRIO_AP_UNAVAIL: allowed = 1; msr_mask = MSR_CE | MSR_ME | MSR_DE; @@ -377,7 +461,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, allowed = vcpu->arch.shared->msr & MSR_DE; allowed = allowed && !crit; msr_mask = MSR_ME; - int_class = INT_CLASS_CRIT; + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) + int_class = INT_CLASS_DBG; + else + int_class = INT_CLASS_CRIT; + break; } @@ -654,20 +742,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* * Since we can't trap on MSR_FP in GS-mode, we consider the guest - * as always using the FPU. Kernel usage of FP (via - * enable_kernel_fp()) in this thread must not occur while - * vcpu->fpu_active is set. + * as always using the FPU. */ - vcpu->fpu_active = 1; - kvmppc_load_guest_fp(vcpu); #endif +#ifdef CONFIG_ALTIVEC + /* Save userspace AltiVec state in stack */ + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + enable_kernel_altivec(); + /* + * Since we can't trap on MSR_VEC in GS-mode, we consider the guest + * as always using the AltiVec. + */ + kvmppc_load_guest_altivec(vcpu); +#endif + /* Switch to guest debug context */ - debug = vcpu->arch.shadow_dbg_reg; + debug = vcpu->arch.dbg_reg; switch_booke_debug_regs(&debug); debug = current->thread.debug; - current->thread.debug = vcpu->arch.shadow_dbg_reg; + current->thread.debug = vcpu->arch.dbg_reg; vcpu->arch.pgdir = current->mm->pgd; kvmppc_fix_ee_before_entry(); @@ -683,8 +778,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #ifdef CONFIG_PPC_FPU kvmppc_save_guest_fp(vcpu); +#endif - vcpu->fpu_active = 0; +#ifdef CONFIG_ALTIVEC + kvmppc_save_guest_altivec(vcpu); #endif out: @@ -728,9 +825,36 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) { - struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg); + struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg); u32 dbsr = vcpu->arch.dbsr; + if (vcpu->guest_debug == 0) { + /* + * Debug resources belong to Guest. + * Imprecise debug event is not injected + */ + if (dbsr & DBSR_IDE) { + dbsr &= ~DBSR_IDE; + if (!dbsr) + return RESUME_GUEST; + } + + if (dbsr && (vcpu->arch.shared->msr & MSR_DE) && + (vcpu->arch.dbg_reg.dbcr0 & DBCR0_IDM)) + kvmppc_core_queue_debug(vcpu); + + /* Inject a program interrupt if trap debug is not allowed */ + if ((dbsr & DBSR_TIE) && !(vcpu->arch.shared->msr & MSR_DE)) + kvmppc_core_queue_program(vcpu, ESR_PTR); + + return RESUME_GUEST; + } + + /* + * Debug resource owned by userspace. + * Clear guest dbsr (vcpu->arch.dbsr) + */ + vcpu->arch.dbsr = 0; run->debug.arch.status = 0; run->debug.arch.address = vcpu->arch.pc; @@ -868,7 +992,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOKE_INTERRUPT_DATA_STORAGE: case BOOKE_INTERRUPT_DTLB_MISS: case BOOKE_INTERRUPT_HV_PRIV: - emulated = kvmppc_get_last_inst(vcpu, false, &last_inst); + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + break; + case BOOKE_INTERRUPT_PROGRAM: + /* SW breakpoints arrive as illegal instructions on HV */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); break; default: break; @@ -947,6 +1076,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_PROGRAM: + if ((vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) && + (last_inst == KVMPPC_INST_SW_BREAKPOINT)) { + /* + * We are here because of an SW breakpoint instr, + * so lets return to host to handle. + */ + r = kvmppc_handle_debug(run, vcpu); + run->exit_reason = KVM_EXIT_DEBUG; + kvmppc_account_exit(vcpu, DEBUG_EXITS); + break; + } + if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { /* * Program traps generated by user-level software must @@ -991,7 +1132,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); r = RESUME_GUEST; break; -#else +#elif defined(CONFIG_SPE_POSSIBLE) case BOOKE_INTERRUPT_SPE_UNAVAIL: /* * Guest wants SPE, but host kernel doesn't support it. Send @@ -1012,6 +1153,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->hw.hardware_exit_reason = exit_nr; r = RESUME_HOST; break; +#endif /* CONFIG_SPE_POSSIBLE */ + +/* + * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC, + * see kvmppc_core_check_processor_compat(). + */ +#ifdef CONFIG_ALTIVEC + case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_ALTIVEC_ASSIST: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST); + r = RESUME_GUEST; + break; #endif case BOOKE_INTERRUPT_DATA_STORAGE: @@ -1188,6 +1345,8 @@ out: else { /* interrupts now hard-disabled */ kvmppc_fix_ee_before_entry(); + kvmppc_load_guest_fp(vcpu); + kvmppc_load_guest_altivec(vcpu); } } @@ -1243,6 +1402,11 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, (unsigned long)vcpu); + /* + * Clear DBSR.MRR to avoid guest debug interrupt as + * this is of host interest + */ + mtspr(SPRN_DBSR, DBSR_MRR); return 0; } @@ -1457,144 +1621,125 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = 0; - union kvmppc_one_reg val; - int size; - - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_IAC1: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac1); break; case KVM_REG_PPC_IAC2: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac2); break; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 case KVM_REG_PPC_IAC3: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac3); break; case KVM_REG_PPC_IAC4: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac4); break; #endif case KVM_REG_PPC_DAC1: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1); + *val = get_reg_val(id, vcpu->arch.dbg_reg.dac1); break; case KVM_REG_PPC_DAC2: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); + *val = get_reg_val(id, vcpu->arch.dbg_reg.dac2); break; case KVM_REG_PPC_EPR: { u32 epr = kvmppc_get_epr(vcpu); - val = get_reg_val(reg->id, epr); + *val = get_reg_val(id, epr); break; } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: - val = get_reg_val(reg->id, vcpu->arch.epcr); + *val = get_reg_val(id, vcpu->arch.epcr); break; #endif case KVM_REG_PPC_TCR: - val = get_reg_val(reg->id, vcpu->arch.tcr); + *val = get_reg_val(id, vcpu->arch.tcr); break; case KVM_REG_PPC_TSR: - val = get_reg_val(reg->id, vcpu->arch.tsr); + *val = get_reg_val(id, vcpu->arch.tsr); break; case KVM_REG_PPC_DEBUG_INST: - val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG); + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); break; case KVM_REG_PPC_VRSAVE: - val = get_reg_val(reg->id, vcpu->arch.vrsave); + *val = get_reg_val(id, vcpu->arch.vrsave); break; default: - r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val); break; } - if (r) - return r; - - if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) - r = -EFAULT; - return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = 0; - union kvmppc_one_reg val; - int size; - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - - if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) - return -EFAULT; - - switch (reg->id) { + switch (id) { case KVM_REG_PPC_IAC1: - vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac1 = set_reg_val(id, *val); break; case KVM_REG_PPC_IAC2: - vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac2 = set_reg_val(id, *val); break; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 case KVM_REG_PPC_IAC3: - vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac3 = set_reg_val(id, *val); break; case KVM_REG_PPC_IAC4: - vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac4 = set_reg_val(id, *val); break; #endif case KVM_REG_PPC_DAC1: - vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.dac1 = set_reg_val(id, *val); break; case KVM_REG_PPC_DAC2: - vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.dac2 = set_reg_val(id, *val); break; case KVM_REG_PPC_EPR: { - u32 new_epr = set_reg_val(reg->id, val); + u32 new_epr = set_reg_val(id, *val); kvmppc_set_epr(vcpu, new_epr); break; } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: { - u32 new_epcr = set_reg_val(reg->id, val); + u32 new_epcr = set_reg_val(id, *val); kvmppc_set_epcr(vcpu, new_epcr); break; } #endif case KVM_REG_PPC_OR_TSR: { - u32 tsr_bits = set_reg_val(reg->id, val); + u32 tsr_bits = set_reg_val(id, *val); kvmppc_set_tsr_bits(vcpu, tsr_bits); break; } case KVM_REG_PPC_CLEAR_TSR: { - u32 tsr_bits = set_reg_val(reg->id, val); + u32 tsr_bits = set_reg_val(id, *val); kvmppc_clr_tsr_bits(vcpu, tsr_bits); break; } case KVM_REG_PPC_TSR: { - u32 tsr = set_reg_val(reg->id, val); + u32 tsr = set_reg_val(id, *val); kvmppc_set_tsr(vcpu, tsr); break; } case KVM_REG_PPC_TCR: { - u32 tcr = set_reg_val(reg->id, val); + u32 tcr = set_reg_val(id, *val); kvmppc_set_tcr(vcpu, tcr); break; } case KVM_REG_PPC_VRSAVE: - vcpu->arch.vrsave = set_reg_val(reg->id, val); + vcpu->arch.vrsave = set_reg_val(id, *val); break; default: - r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val); break; } @@ -1694,10 +1839,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) update_timer_ints(vcpu); } -void kvmppc_decrementer_func(unsigned long data) +void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - if (vcpu->arch.tcr & TCR_ARE) { vcpu->arch.dec = vcpu->arch.decar; kvmppc_emulate_dec(vcpu); @@ -1842,7 +1985,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int n, b = 0, w = 0; if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { - vcpu->arch.shadow_dbg_reg.dbcr0 = 0; + vcpu->arch.dbg_reg.dbcr0 = 0; vcpu->guest_debug = 0; kvm_guest_protect_msr(vcpu, MSR_DE, false); return 0; @@ -1850,15 +1993,13 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, kvm_guest_protect_msr(vcpu, MSR_DE, true); vcpu->guest_debug = dbg->control; - vcpu->arch.shadow_dbg_reg.dbcr0 = 0; - /* Set DBCR0_EDM in guest visible DBCR0 register. */ - vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM; + vcpu->arch.dbg_reg.dbcr0 = 0; if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; + vcpu->arch.dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; /* Code below handles only HW breakpoints */ - dbg_reg = &(vcpu->arch.shadow_dbg_reg); + dbg_reg = &(vcpu->arch.dbg_reg); #ifdef CONFIG_KVM_BOOKE_HV /* diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index f753543c56fa..22ba08ea68e9 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -32,9 +32,15 @@ #define BOOKE_IRQPRIO_ALIGNMENT 2 #define BOOKE_IRQPRIO_PROGRAM 3 #define BOOKE_IRQPRIO_FP_UNAVAIL 4 +#ifdef CONFIG_SPE_POSSIBLE #define BOOKE_IRQPRIO_SPE_UNAVAIL 5 #define BOOKE_IRQPRIO_SPE_FP_DATA 6 #define BOOKE_IRQPRIO_SPE_FP_ROUND 7 +#endif +#ifdef CONFIG_PPC_E500MC +#define BOOKE_IRQPRIO_ALTIVEC_UNAVAIL 5 +#define BOOKE_IRQPRIO_ALTIVEC_ASSIST 6 +#endif #define BOOKE_IRQPRIO_SYSCALL 8 #define BOOKE_IRQPRIO_AP_UNAVAIL 9 #define BOOKE_IRQPRIO_DTLB_MISS 10 @@ -116,40 +122,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); -/* - * Load up guest vcpu FP state if it's needed. - * It also set the MSR_FP in thread so that host know - * we're holding FPU, and then host can help to save - * guest vcpu FP state if other threads require to use FPU. - * This simulates an FP unavailable fault. - * - * It requires to be called with preemption disabled. - */ -static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_PPC_FPU - if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) { - enable_kernel_fp(); - load_fp_state(&vcpu->arch.fp); - current->thread.fp_save_area = &vcpu->arch.fp; - current->thread.regs->msr |= MSR_FP; - } -#endif -} - -/* - * Save guest vcpu FP state into thread. - * It requires to be called with preemption disabled. - */ -static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_PPC_FPU - if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP)) - giveup_fpu(current); - current->thread.fp_save_area = NULL; -#endif -} - static inline void kvmppc_clear_dbsr(void) { mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 28c158881d23..a82f64502de1 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -25,6 +25,7 @@ #define OP_19_XOP_RFI 50 #define OP_19_XOP_RFCI 51 +#define OP_19_XOP_RFDI 39 #define OP_31_XOP_MFMSR 83 #define OP_31_XOP_WRTEE 131 @@ -37,6 +38,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); } +static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.dsrr0; + kvmppc_set_msr(vcpu, vcpu->arch.dsrr1); +} + static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) { vcpu->arch.pc = vcpu->arch.csrr0; @@ -65,6 +72,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, *advance = 0; break; + case OP_19_XOP_RFDI: + kvmppc_emul_rfdi(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFDI_EXITS); + *advance = 0; + break; + default: emulated = EMULATE_FAIL; break; @@ -118,6 +131,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; + bool debug_inst = false; switch (sprn) { case SPRN_DEAR: @@ -132,14 +146,128 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_CSRR1: vcpu->arch.csrr1 = spr_val; break; + case SPRN_DSRR0: + vcpu->arch.dsrr0 = spr_val; + break; + case SPRN_DSRR1: + vcpu->arch.dsrr1 = spr_val; + break; + case SPRN_IAC1: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac1 = spr_val; + break; + case SPRN_IAC2: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac2 = spr_val; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case SPRN_IAC3: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac3 = spr_val; + break; + case SPRN_IAC4: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac4 = spr_val; + break; +#endif + case SPRN_DAC1: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.dac1 = spr_val; + break; + case SPRN_DAC2: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.dac2 = spr_val; + break; case SPRN_DBCR0: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + spr_val &= (DBCR0_IDM | DBCR0_IC | DBCR0_BT | DBCR0_TIE | + DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | + DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W); + vcpu->arch.dbg_reg.dbcr0 = spr_val; break; case SPRN_DBCR1: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; vcpu->arch.dbg_reg.dbcr1 = spr_val; break; + case SPRN_DBCR2: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.dbcr2 = spr_val; + break; case SPRN_DBSR: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + vcpu->arch.dbsr &= ~spr_val; + if (!(vcpu->arch.dbsr & ~DBSR_IDE)) + kvmppc_core_dequeue_debug(vcpu); break; case SPRN_TSR: kvmppc_clr_tsr_bits(vcpu, spr_val); @@ -252,6 +380,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) emulated = EMULATE_FAIL; } + if (debug_inst) { + current->thread.debug = vcpu->arch.dbg_reg; + switch_booke_debug_regs(&vcpu->arch.dbg_reg); + } return emulated; } @@ -278,12 +410,43 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_CSRR1: *spr_val = vcpu->arch.csrr1; break; + case SPRN_DSRR0: + *spr_val = vcpu->arch.dsrr0; + break; + case SPRN_DSRR1: + *spr_val = vcpu->arch.dsrr1; + break; + case SPRN_IAC1: + *spr_val = vcpu->arch.dbg_reg.iac1; + break; + case SPRN_IAC2: + *spr_val = vcpu->arch.dbg_reg.iac2; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case SPRN_IAC3: + *spr_val = vcpu->arch.dbg_reg.iac3; + break; + case SPRN_IAC4: + *spr_val = vcpu->arch.dbg_reg.iac4; + break; +#endif + case SPRN_DAC1: + *spr_val = vcpu->arch.dbg_reg.dac1; + break; + case SPRN_DAC2: + *spr_val = vcpu->arch.dbg_reg.dac2; + break; case SPRN_DBCR0: *spr_val = vcpu->arch.dbg_reg.dbcr0; + if (vcpu->guest_debug) + *spr_val = *spr_val | DBCR0_EDM; break; case SPRN_DBCR1: *spr_val = vcpu->arch.dbg_reg.dbcr1; break; + case SPRN_DBCR2: + *spr_val = vcpu->arch.dbg_reg.dbcr2; + break; case SPRN_DBSR: *spr_val = vcpu->arch.dbsr; break; diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e9fa56a911fd..81bd8a07aa51 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -238,7 +238,7 @@ kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \ kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR) kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \ - SPRN_SRR0, SPRN_SRR1,NEED_ESR + SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU) kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \ @@ -256,11 +256,9 @@ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \ SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \ +kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \ - SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \ +kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 @@ -350,7 +348,7 @@ kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \ SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR) -kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR +kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU) kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 @@ -361,9 +359,6 @@ kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0 kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \ diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index a326178bdea5..72920bed3ac6 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -22,6 +22,7 @@ #include <linux/kvm_host.h> #include <asm/mmu-book3e.h> #include <asm/tlb.h> +#include <asm/cputhreads.h> enum vcpu_ftr { VCPU_FTR_MMU_V2 @@ -289,6 +290,25 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500); #define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe) #define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu) #define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS) + +/* + * These functions should be called with preemption disabled + * and the returned value is valid only in that context + */ +static inline int get_thread_specific_lpid(int vm_lpid) +{ + int vcpu_lpid = vm_lpid; + + if (threads_per_core == 2) + vcpu_lpid |= smp_processor_id() & 1; + + return vcpu_lpid; +} + +static inline int get_lpid(struct kvm_vcpu *vcpu) +{ + return get_thread_specific_lpid(vcpu->kvm->arch.lpid); +} #else unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, struct kvm_book3e_206_tlb_entry *gtlbe); diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index c99c40e9182a..ce7291c79f6c 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -259,6 +259,7 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va break; /* extra exceptions */ +#ifdef CONFIG_SPE_POSSIBLE case SPRN_IVOR32: vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; break; @@ -268,6 +269,15 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va case SPRN_IVOR34: vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val; break; +#endif +#ifdef CONFIG_ALTIVEC + case SPRN_IVOR32: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL] = spr_val; + break; + case SPRN_IVOR33: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST] = spr_val; + break; +#endif case SPRN_IVOR35: vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; break; @@ -381,6 +391,7 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v break; /* extra exceptions */ +#ifdef CONFIG_SPE_POSSIBLE case SPRN_IVOR32: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; break; @@ -390,6 +401,15 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v case SPRN_IVOR34: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; break; +#endif +#ifdef CONFIG_ALTIVEC + case SPRN_IVOR32: + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL]; + break; + case SPRN_IVOR33: + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST]; + break; +#endif case SPRN_IVOR35: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; break; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 08f14bb57897..769778f855b0 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -69,7 +69,8 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) * writing shadow tlb entry to host TLB */ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, - uint32_t mas0) + uint32_t mas0, + uint32_t lpid) { unsigned long flags; @@ -80,7 +81,7 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); #ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_MAS8, stlbe->mas8); + mtspr(SPRN_MAS8, MAS8_TGS | get_thread_specific_lpid(lpid)); #endif asm volatile("isync; tlbwe" : : : "memory"); @@ -129,11 +130,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, if (tlbsel == 0) { mas0 = get_host_mas0(stlbe->mas2); - __write_host_tlbe(stlbe, mas0); + __write_host_tlbe(stlbe, mas0, vcpu_e500->vcpu.kvm->arch.lpid); } else { __write_host_tlbe(stlbe, MAS0_TLBSEL(1) | - MAS0_ESEL(to_htlb1_esel(sesel))); + MAS0_ESEL(to_htlb1_esel(sesel)), + vcpu_e500->vcpu.kvm->arch.lpid); } } @@ -176,7 +178,7 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; magic.mas8 = 0; - __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); + __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index), 0); preempt_enable(); } #endif @@ -317,10 +319,6 @@ static void kvmppc_e500_setup_stlbe( stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); - -#ifdef CONFIG_KVM_BOOKE_HV - stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; -#endif } static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -633,7 +631,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, local_irq_save(flags); mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space); - mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid); + mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(vcpu)); asm volatile("tlbsx 0, %[geaddr]\n" : : [geaddr] "r" (geaddr)); mtspr(SPRN_MAS5, 0); @@ -732,7 +730,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return 0; } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { /* XXX could be more clever ;) */ return 0; diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 164bad2a19bf..2fdc8722e324 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -48,10 +48,11 @@ void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type) return; } - - tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id; + preempt_disable(); + tag = PPC_DBELL_LPID(get_lpid(vcpu)) | vcpu->vcpu_id; mb(); ppc_msgsnd(dbell_type, 0, tag); + preempt_enable(); } /* gtlbe must not be mapped by more than one host tlb entry */ @@ -60,12 +61,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, { unsigned int tid, ts; gva_t eaddr; - u32 val, lpid; + u32 val; unsigned long flags; ts = get_tlb_ts(gtlbe); tid = get_tlb_tid(gtlbe); - lpid = vcpu_e500->vcpu.kvm->arch.lpid; /* We search the host TLB to invalidate its shadow TLB entry */ val = (tid << 16) | ts; @@ -74,7 +74,7 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, local_irq_save(flags); mtspr(SPRN_MAS6, val); - mtspr(SPRN_MAS5, MAS5_SGS | lpid); + mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu)); asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr)); val = mfspr(SPRN_MAS1); @@ -95,7 +95,7 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) unsigned long flags; local_irq_save(flags); - mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid); + mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu)); asm volatile("tlbilxlpid"); mtspr(SPRN_MAS5, 0); local_irq_restore(flags); @@ -110,6 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { } +/* We use two lpids per VM */ static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid); static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) @@ -118,10 +119,12 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) kvmppc_booke_vcpu_load(vcpu, cpu); - mtspr(SPRN_LPID, vcpu->kvm->arch.lpid); + mtspr(SPRN_LPID, get_lpid(vcpu)); mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr); mtspr(SPRN_GPIR, vcpu->vcpu_id); mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp); + vcpu->arch.eplc = EPC_EGS | (get_lpid(vcpu) << EPC_ELPID_SHIFT); + vcpu->arch.epsc = vcpu->arch.eplc; mtspr(SPRN_EPLC, vcpu->arch.eplc); mtspr(SPRN_EPSC, vcpu->arch.epsc); @@ -141,12 +144,10 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { + __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; + __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu; } - - kvmppc_load_guest_fp(vcpu); } static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu) @@ -179,6 +180,16 @@ int kvmppc_core_check_processor_compat(void) r = 0; else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) r = 0; +#ifdef CONFIG_ALTIVEC + /* + * Since guests have the priviledge to enable AltiVec, we need AltiVec + * support in the host to save/restore their context. + * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit + * because it's cleared in the absence of CONFIG_ALTIVEC! + */ + else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0) + r = 0; +#endif else r = -ENOTSUPP; @@ -194,9 +205,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) #ifdef CONFIG_64BIT vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM; #endif - vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP; - vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT); - vcpu->arch.epsc = vcpu->arch.eplc; + vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP; vcpu->arch.pvr = mfspr(SPRN_PVR); vcpu_e500->svr = mfspr(SPRN_SVR); @@ -356,13 +365,26 @@ static int kvmppc_core_init_vm_e500mc(struct kvm *kvm) if (lpid < 0) return lpid; + /* + * Use two lpids per VM on cores with two threads like e6500. Use + * even numbers to speedup vcpu lpid computation with consecutive lpids + * per VM. vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on. + */ + if (threads_per_core == 2) + lpid <<= 1; + kvm->arch.lpid = lpid; return 0; } static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm) { - kvmppc_free_lpid(kvm->arch.lpid); + int lpid = kvm->arch.lpid; + + if (threads_per_core == 2) + lpid >>= 1; + + kvmppc_free_lpid(lpid); } static struct kvmppc_ops kvm_ops_e500mc = { @@ -390,7 +412,13 @@ static int __init kvmppc_e500mc_init(void) if (r) goto err_out; - kvmppc_init_lpid(64); + /* + * Use two lpids per VM on dual threaded processors like e6500 + * to workarround the lack of tlb write conditional instruction. + * Expose half the number of available hardware lpids to the lpid + * allocator. + */ + kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core); kvmppc_claim_lpid(0); /* host */ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index e96b50d0bdab..5cc2e7af3a7b 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -219,7 +219,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); - emulated = kvmppc_get_last_inst(vcpu, false, &inst); + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); if (emulated != EMULATE_DONE) return emulated; @@ -274,6 +274,21 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; + case 0: + /* + * Instruction with primary opcode 0. Based on PowerISA + * these are illegal instructions. + */ + if (inst == KVMPPC_INST_SW_BREAKPOINT) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.address = kvmppc_get_pc(vcpu); + emulated = EMULATE_EXIT_USER; + advance = 0; + } else + emulated = EMULATE_FAIL; + + break; + default: emulated = EMULATE_FAIL; } diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 0de4ffa175a9..6d3c0ee1d744 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -58,7 +58,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); - emulated = kvmppc_get_last_inst(vcpu, false, &inst); + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); if (emulated != EMULATE_DONE) return emulated; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4c79284b58be..c1f8f53cd312 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -294,7 +294,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 last_inst; - kvmppc_get_last_inst(vcpu, false, &last_inst); + kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); /* XXX Deliver Program interrupt to guest. */ pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); r = RESUME_HOST; @@ -384,24 +384,16 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, } EXPORT_SYMBOL_GPL(kvmppc_ld); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - int kvm_arch_hardware_setup(void) { return 0; } -void kvm_arch_hardware_unsetup(void) -{ -} - void kvm_arch_check_processor_compat(void *rtn) { *(int *)rtn = kvmppc_core_check_processor_compat(); @@ -462,10 +454,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) module_put(kvm->arch.kvm_ops->owner); } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -608,10 +596,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return kvmppc_core_create_memslot(kvm, slot, npages); } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem, @@ -628,10 +612,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvmppc_core_commit_memory_region(kvm, mem, old); } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { @@ -658,7 +638,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { /* Make sure we're not using the vcpu anymore */ hrtimer_cancel(&vcpu->arch.dec_timer); - tasklet_kill(&vcpu->arch.tasklet); kvmppc_remove_vcpu_debugfs(vcpu); @@ -684,16 +663,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvmppc_core_pending_dec(vcpu); } -/* - * low level hrtimer wake routine. Because this runs in hardirq context - * we schedule a tasklet to do the real work. - */ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); - tasklet_schedule(&vcpu->arch.tasklet); + kvmppc_decrementer_func(vcpu); return HRTIMER_NORESTART; } @@ -703,7 +678,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) int ret; hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; vcpu->arch.dec_expires = ~(u64)0; @@ -927,6 +901,103 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvmppc_handle_store); +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = 0; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + r = kvmppc_get_one_reg(vcpu, reg->id, &val); + if (r == -EINVAL) { + r = 0; + switch (reg->id) { +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); + break; + case KVM_REG_PPC_VRSAVE: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vrsave = set_reg_val(reg->id, val); + break; +#endif /* CONFIG_ALTIVEC */ + default: + r = -EINVAL; + break; + } + } + + if (r) + return r; + + if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) + r = -EFAULT; + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) + return -EFAULT; + + r = kvmppc_set_one_reg(vcpu, reg->id, &val); + if (r == -EINVAL) { + r = 0; + switch (reg->id) { +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); + break; + case KVM_REG_PPC_VRSAVE: + val = get_reg_val(reg->id, vcpu->arch.vrsave); + break; +#endif /* CONFIG_ALTIVEC */ + default: + r = -EINVAL; + break; + } + } + + return r; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r; @@ -1343,9 +1414,4 @@ int kvm_arch_init(void *opaque) return 0; } -void kvm_arch_exit(void) -{ - -} - EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 74d1e780748b..2396dda282cd 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -35,7 +35,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; /* must be 16-byte aligned */ if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return 0; - if (sp >= prev_sp + STACK_FRAME_OVERHEAD) + if (sp >= prev_sp + STACK_FRAME_MIN_SIZE) return 1; /* * sp could decrease when we jump off an interrupt stack diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e8bc40869cbd..7d9ee3d8c618 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -303,9 +303,13 @@ config PPC_ICSWX_USE_SIGILL If in doubt, say N here. +config SPE_POSSIBLE + def_bool y + depends on E200 || (E500 && !PPC_E500MC) + config SPE bool "SPE Support" - depends on E200 || (E500 && !PPC_E500MC) + depends on SPE_POSSIBLE default y ---help--- This option enables kernel support for the Signal Processing diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index 97ac8dc33667..5e1ed1575aab 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -28,6 +28,7 @@ #include <asm/opal.h> #include <asm/cputable.h> +#include <asm/machdep.h> static int opal_hmi_handler_nb_init; struct OpalHmiEvtNode { @@ -185,4 +186,4 @@ static int __init opal_hmi_handler_init(void) } return 0; } -subsys_initcall(opal_hmi_handler_init); +machine_subsys_initcall(powernv, opal_hmi_handler_init); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index b44eec3e8dbd..4b005ae5dc4b 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -322,7 +322,7 @@ static void opal_handle_message(void) /* check for errors. */ if (ret) { - pr_warning("%s: Failed to retrive opal message, err=%lld\n", + pr_warning("%s: Failed to retrieve opal message, err=%lld\n", __func__, ret); return; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index c904583baf4b..34064f50945e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -113,7 +113,7 @@ out: static int pseries_remove_mem_node(struct device_node *np) { const char *type; - const unsigned int *regs; + const __be32 *regs; unsigned long base; unsigned int lmb_size; int ret = -EINVAL; @@ -126,14 +126,14 @@ static int pseries_remove_mem_node(struct device_node *np) return 0; /* - * Find the bae address and size of the memblock + * Find the base address and size of the memblock */ regs = of_get_property(np, "reg", NULL); if (!regs) return ret; - base = *(unsigned long *)regs; - lmb_size = regs[3]; + base = be64_to_cpu(*(unsigned long *)regs); + lmb_size = be32_to_cpu(regs[3]); pseries_remove_memblock(base, lmb_size); return 0; @@ -153,7 +153,7 @@ static inline int pseries_remove_mem_node(struct device_node *np) static int pseries_add_mem_node(struct device_node *np) { const char *type; - const unsigned int *regs; + const __be32 *regs; unsigned long base; unsigned int lmb_size; int ret = -EINVAL; @@ -172,8 +172,8 @@ static int pseries_add_mem_node(struct device_node *np) if (!regs) return ret; - base = *(unsigned long *)regs; - lmb_size = regs[3]; + base = be64_to_cpu(*(unsigned long *)regs); + lmb_size = be32_to_cpu(regs[3]); /* * Update memory region to represent the memory add @@ -187,44 +187,46 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) struct of_drconf_cell *new_drmem, *old_drmem; unsigned long memblock_size; u32 entries; - u32 *p; + __be32 *p; int i, rc = -EINVAL; memblock_size = pseries_memory_block_size(); if (!memblock_size) return -EINVAL; - p = (u32 *) pr->old_prop->value; + p = (__be32 *) pr->old_prop->value; if (!p) return -EINVAL; /* The first int of the property is the number of lmb's described * by the property. This is followed by an array of of_drconf_cell - * entries. Get the niumber of entries and skip to the array of + * entries. Get the number of entries and skip to the array of * of_drconf_cell's. */ - entries = *p++; + entries = be32_to_cpu(*p++); old_drmem = (struct of_drconf_cell *)p; - p = (u32 *)pr->prop->value; + p = (__be32 *)pr->prop->value; p++; new_drmem = (struct of_drconf_cell *)p; for (i = 0; i < entries; i++) { - if ((old_drmem[i].flags & DRCONF_MEM_ASSIGNED) && - (!(new_drmem[i].flags & DRCONF_MEM_ASSIGNED))) { - rc = pseries_remove_memblock(old_drmem[i].base_addr, + if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) && + (!(be32_to_cpu(new_drmem[i].flags) & DRCONF_MEM_ASSIGNED))) { + rc = pseries_remove_memblock( + be64_to_cpu(old_drmem[i].base_addr), memblock_size); break; - } else if ((!(old_drmem[i].flags & DRCONF_MEM_ASSIGNED)) && - (new_drmem[i].flags & DRCONF_MEM_ASSIGNED)) { - rc = memblock_add(old_drmem[i].base_addr, + } else if ((!(be32_to_cpu(old_drmem[i].flags) & + DRCONF_MEM_ASSIGNED)) && + (be32_to_cpu(new_drmem[i].flags) & + DRCONF_MEM_ASSIGNED)) { + rc = memblock_add(be64_to_cpu(old_drmem[i].base_addr), memblock_size); rc = (rc < 0) ? -EINVAL : 0; break; } } - return rc; } diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 3ca1894ade09..9d94fdd9f525 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -63,6 +63,7 @@ CONFIG_CRASH_DUMP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m CONFIG_UNIX=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 4830aa6e6f53..90f514baa37d 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -61,6 +61,7 @@ CONFIG_CRASH_DUMP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m CONFIG_UNIX=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 61db449bf309..13559d32af69 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -59,6 +59,7 @@ CONFIG_CRASH_DUMP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m CONFIG_UNIX=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 948e0e057a23..e376789f2d8d 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -23,6 +23,7 @@ CONFIG_CRASH_DUMP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_SECCOMP is not set # CONFIG_IUCV is not set +CONFIG_NET=y CONFIG_ATM=y CONFIG_ATM_LANE=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 2e56498a40df..fab35a8efa4f 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -50,6 +50,7 @@ CONFIG_CMA=y CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_NET_KEY=y diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 2fcccc0c997c..c81661e756a0 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -17,12 +17,12 @@ #define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \ sizeof(struct ipl_block_fcp)) -#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 8) +#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 16) #define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \ sizeof(struct ipl_block_ccw)) -#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 8) +#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 16) #define IPL_MAX_SUPPORTED_VERSION (0) @@ -38,10 +38,11 @@ struct ipl_list_hdr { u8 pbt; u8 flags; u16 reserved2; + u8 loadparm[8]; } __attribute__((packed)); struct ipl_block_fcp { - u8 reserved1[313-1]; + u8 reserved1[305-1]; u8 opt; u8 reserved2[3]; u16 reserved3; @@ -62,7 +63,6 @@ struct ipl_block_fcp { offsetof(struct ipl_block_fcp, scp_data))) struct ipl_block_ccw { - u8 load_parm[8]; u8 reserved1[84]; u8 reserved2[2]; u16 devno; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 773bef7614d8..2175f911a73a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -13,8 +13,11 @@ #ifndef ASM_KVM_HOST_H #define ASM_KVM_HOST_H + +#include <linux/types.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> +#include <linux/kvm_types.h> #include <linux/kvm_host.h> #include <linux/kvm.h> #include <asm/debug.h> @@ -154,7 +157,9 @@ struct kvm_s390_sie_block { __u8 armid; /* 0x00e3 */ __u8 reservede4[4]; /* 0x00e4 */ __u64 tecmc; /* 0x00e8 */ - __u8 reservedf0[16]; /* 0x00f0 */ + __u8 reservedf0[12]; /* 0x00f0 */ +#define CRYCB_FORMAT1 0x00000001 + __u32 crycbd; /* 0x00fc */ __u64 gcr[16]; /* 0x0100 */ __u64 gbea; /* 0x0180 */ __u8 reserved188[24]; /* 0x0188 */ @@ -187,6 +192,7 @@ struct kvm_vcpu_stat { u32 exit_stop_request; u32 exit_validity; u32 exit_instruction; + u32 halt_wakeup; u32 instruction_lctl; u32 instruction_lctlg; u32 instruction_stctl; @@ -407,6 +413,15 @@ struct s390_io_adapter { #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) #define MAX_S390_ADAPTER_MAPS 256 +struct kvm_s390_crypto { + struct kvm_s390_crypto_cb *crycb; + __u32 crycbd; +}; + +struct kvm_s390_crypto_cb { + __u8 reserved00[128]; /* 0x0000 */ +}; + struct kvm_arch{ struct sca_block *sca; debug_info_t *dbf; @@ -420,6 +435,7 @@ struct kvm_arch{ struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; spinlock_t start_stop_lock; + struct kvm_s390_crypto crypto; }; #define KVM_HVA_ERR_BAD (-1UL) @@ -431,8 +447,6 @@ static inline bool kvm_is_error_hva(unsigned long addr) } #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm_async_pf; struct kvm_arch_async_pf { unsigned long pfault_token; }; @@ -450,4 +464,18 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, extern int sie64a(struct kvm_s390_sie_block *, u64 *); extern char sie_exit; + +static inline void kvm_arch_hardware_disable(void) {} +static inline void kvm_arch_check_processor_compat(void *rtn) {} +static inline void kvm_arch_exit(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} +static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) {} + #endif diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 9e18a61d3df3..d39a31c3cdf2 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -18,9 +18,9 @@ unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); -unsigned long *page_table_alloc(struct mm_struct *, unsigned long); +unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); -void page_table_free_rcu(struct mmu_gather *, unsigned long *); +void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long, bool init_skey); @@ -145,8 +145,8 @@ static inline void pmd_populate(struct mm_struct *mm, /* * page table entry allocation/free routines. */ -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) +#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) +#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5efb2fe186e7..b7054356cc98 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -30,6 +30,7 @@ #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/page-flags.h> +#include <linux/radix-tree.h> #include <asm/bug.h> #include <asm/page.h> @@ -789,82 +790,67 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) /** * struct gmap_struct - guest address space + * @crst_list: list of all crst tables used in the guest address space * @mm: pointer to the parent mm_struct + * @guest_to_host: radix tree with guest to host address translation + * @host_to_guest: radix tree with pointer to segment table entries + * @guest_table_lock: spinlock to protect all entries in the guest page table * @table: pointer to the page directory * @asce: address space control element for gmap page table - * @crst_list: list of all crst tables used in the guest address space * @pfault_enabled: defines if pfaults are applicable for the guest */ struct gmap { struct list_head list; + struct list_head crst_list; struct mm_struct *mm; + struct radix_tree_root guest_to_host; + struct radix_tree_root host_to_guest; + spinlock_t guest_table_lock; unsigned long *table; unsigned long asce; + unsigned long asce_end; void *private; - struct list_head crst_list; bool pfault_enabled; }; /** - * struct gmap_rmap - reverse mapping for segment table entries - * @gmap: pointer to the gmap_struct - * @entry: pointer to a segment table entry - * @vmaddr: virtual address in the guest address space - */ -struct gmap_rmap { - struct list_head list; - struct gmap *gmap; - unsigned long *entry; - unsigned long vmaddr; -}; - -/** - * struct gmap_pgtable - gmap information attached to a page table - * @vmaddr: address of the 1MB segment in the process virtual memory - * @mapper: list of segment table entries mapping a page table - */ -struct gmap_pgtable { - unsigned long vmaddr; - struct list_head mapper; -}; - -/** * struct gmap_notifier - notify function block for page invalidation * @notifier_call: address of callback function */ struct gmap_notifier { struct list_head list; - void (*notifier_call)(struct gmap *gmap, unsigned long address); + void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); }; -struct gmap *gmap_alloc(struct mm_struct *mm); +struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); void gmap_free(struct gmap *gmap); void gmap_enable(struct gmap *gmap); void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); -unsigned long __gmap_translate(unsigned long address, struct gmap *); -unsigned long gmap_translate(unsigned long address, struct gmap *); -unsigned long __gmap_fault(unsigned long address, struct gmap *); -unsigned long gmap_fault(unsigned long address, struct gmap *); -void gmap_discard(unsigned long from, unsigned long to, struct gmap *); -void __gmap_zap(unsigned long address, struct gmap *); +unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); +unsigned long gmap_translate(struct gmap *, unsigned long gaddr); +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); +int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); +void gmap_discard(struct gmap *, unsigned long from, unsigned long to); +void __gmap_zap(struct gmap *, unsigned long gaddr); bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); void gmap_register_ipte_notifier(struct gmap_notifier *); void gmap_unregister_ipte_notifier(struct gmap_notifier *); int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); -void gmap_do_ipte_notify(struct mm_struct *, pte_t *); +void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE if (pgste_val(pgste) & PGSTE_IN_BIT) { pgste_val(pgste) &= ~PGSTE_IN_BIT; - gmap_do_ipte_notify(mm, ptep); + gmap_do_ipte_notify(mm, addr, ptep); } #endif return pgste; @@ -1110,7 +1096,7 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, pgste_val(pgste) &= ~PGSTE_UC_BIT; pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { - pgste = pgste_ipte_notify(mm, ptep, pgste); + pgste = pgste_ipte_notify(mm, addr, ptep, pgste); __ptep_ipte(addr, ptep); if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) pte_val(pte) |= _PAGE_PROTECT; @@ -1132,7 +1118,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); + pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); } oldpte = pte = *ptep; @@ -1179,7 +1165,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, ptep, pgste); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; @@ -1203,7 +1189,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste_ipte_notify(mm, ptep, pgste); + pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; @@ -1240,7 +1226,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); } pte = *ptep; @@ -1274,7 +1260,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, if (!full && mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, ptep, pgste); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; @@ -1299,7 +1285,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, if (pte_write(pte)) { if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, ptep, pgste); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); } ptep_flush_lazy(mm, address, ptep); @@ -1325,7 +1311,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, return 0; if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); } ptep_flush_direct(vma->vm_mm, address, ptep); diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index a25f09fbaf36..572c59949004 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -105,7 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long address) { - page_table_free_rcu(tlb, (unsigned long *) pte); + page_table_free_rcu(tlb, (unsigned long *) pte, address); } /* diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 0fc26430a1e5..48eda3ab4944 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -111,12 +111,22 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_GPRS (1UL << 1) #define KVM_SYNC_ACRS (1UL << 2) #define KVM_SYNC_CRS (1UL << 3) +#define KVM_SYNC_ARCH0 (1UL << 4) +#define KVM_SYNC_PFAULT (1UL << 5) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ __u64 gprs[16]; /* general purpose registers */ __u32 acrs[16]; /* access registers */ __u64 crs[16]; /* control registers */ + __u64 todpr; /* tod programmable register [ARCH0] */ + __u64 cputm; /* cpu timer [ARCH0] */ + __u64 ckc; /* clock comparator [ARCH0] */ + __u64 pp; /* program parameter [ARCH0] */ + __u64 gbea; /* guest breaking-event address [ARCH0] */ + __u64 pft; /* pfault token [PFAULT] */ + __u64 pfs; /* pfault select [PFAULT] */ + __u64 pfc; /* pfault compare [PFAULT] */ }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 22aac5885ba2..39badb9ca0b3 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -455,22 +455,6 @@ DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long) DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long) IPL_PARMBLOCK_START->ipl_info.fcp.br_lba); -static struct attribute *ipl_fcp_attrs[] = { - &sys_ipl_type_attr.attr, - &sys_ipl_device_attr.attr, - &sys_ipl_fcp_wwpn_attr.attr, - &sys_ipl_fcp_lun_attr.attr, - &sys_ipl_fcp_bootprog_attr.attr, - &sys_ipl_fcp_br_lba_attr.attr, - NULL, -}; - -static struct attribute_group ipl_fcp_attr_group = { - .attrs = ipl_fcp_attrs, -}; - -/* CCW ipl device attributes */ - static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { @@ -487,6 +471,23 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, static struct kobj_attribute sys_ipl_ccw_loadparm_attr = __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL); +static struct attribute *ipl_fcp_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_device_attr.attr, + &sys_ipl_fcp_wwpn_attr.attr, + &sys_ipl_fcp_lun_attr.attr, + &sys_ipl_fcp_bootprog_attr.attr, + &sys_ipl_fcp_br_lba_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + NULL, +}; + +static struct attribute_group ipl_fcp_attr_group = { + .attrs = ipl_fcp_attrs, +}; + +/* CCW ipl device attributes */ + static struct attribute *ipl_ccw_attrs_vm[] = { &sys_ipl_type_attr.attr, &sys_ipl_device_attr.attr, @@ -765,28 +766,10 @@ DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n", DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", reipl_block_fcp->ipl_info.fcp.devno); -static struct attribute *reipl_fcp_attrs[] = { - &sys_reipl_fcp_device_attr.attr, - &sys_reipl_fcp_wwpn_attr.attr, - &sys_reipl_fcp_lun_attr.attr, - &sys_reipl_fcp_bootprog_attr.attr, - &sys_reipl_fcp_br_lba_attr.attr, - NULL, -}; - -static struct attribute_group reipl_fcp_attr_group = { - .attrs = reipl_fcp_attrs, -}; - -/* CCW reipl device attributes */ - -DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", - reipl_block_ccw->ipl_info.ccw.devno); - static void reipl_get_ascii_loadparm(char *loadparm, struct ipl_parameter_block *ibp) { - memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN); + memcpy(loadparm, ibp->hdr.loadparm, LOADPARM_LEN); EBCASC(loadparm, LOADPARM_LEN); loadparm[LOADPARM_LEN] = 0; strim(loadparm); @@ -821,13 +804,50 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, return -EINVAL; } /* initialize loadparm with blanks */ - memset(ipb->ipl_info.ccw.load_parm, ' ', LOADPARM_LEN); + memset(ipb->hdr.loadparm, ' ', LOADPARM_LEN); /* copy and convert to ebcdic */ - memcpy(ipb->ipl_info.ccw.load_parm, buf, lp_len); - ASCEBC(ipb->ipl_info.ccw.load_parm, LOADPARM_LEN); + memcpy(ipb->hdr.loadparm, buf, lp_len); + ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN); return len; } +/* FCP wrapper */ +static ssize_t reipl_fcp_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_loadparm_show(reipl_block_fcp, page); +} + +static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_loadparm_store(reipl_block_fcp, buf, len); +} + +static struct kobj_attribute sys_reipl_fcp_loadparm_attr = + __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show, + reipl_fcp_loadparm_store); + +static struct attribute *reipl_fcp_attrs[] = { + &sys_reipl_fcp_device_attr.attr, + &sys_reipl_fcp_wwpn_attr.attr, + &sys_reipl_fcp_lun_attr.attr, + &sys_reipl_fcp_bootprog_attr.attr, + &sys_reipl_fcp_br_lba_attr.attr, + &sys_reipl_fcp_loadparm_attr.attr, + NULL, +}; + +static struct attribute_group reipl_fcp_attr_group = { + .attrs = reipl_fcp_attrs, +}; + +/* CCW reipl device attributes */ + +DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", + reipl_block_ccw->ipl_info.ccw.devno); + /* NSS wrapper */ static ssize_t reipl_nss_loadparm_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) @@ -1125,11 +1145,10 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) /* LOADPARM */ /* check if read scp info worked and set loadparm */ if (sclp_ipl_info.is_valid) - memcpy(ipb->ipl_info.ccw.load_parm, - &sclp_ipl_info.loadparm, LOADPARM_LEN); + memcpy(ipb->hdr.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN); else /* read scp info failed: set empty loadparm (EBCDIC blanks) */ - memset(ipb->ipl_info.ccw.load_parm, 0x40, LOADPARM_LEN); + memset(ipb->hdr.loadparm, 0x40, LOADPARM_LEN); ipb->hdr.flags = DIAG308_FLAGS_LP_VALID; /* VM PARM */ @@ -1251,9 +1270,16 @@ static int __init reipl_fcp_init(void) return rc; } - if (ipl_info.type == IPL_TYPE_FCP) + if (ipl_info.type == IPL_TYPE_FCP) { memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE); - else { + /* + * Fix loadparm: There are systems where the (SCSI) LOADPARM + * is invalid in the SCSI IPL parameter block, so take it + * always from sclp_ipl_info. + */ + memcpy(reipl_block_fcp->hdr.loadparm, sclp_ipl_info.loadparm, + LOADPARM_LEN); + } else { reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN; @@ -1864,7 +1890,23 @@ static void __init shutdown_actions_init(void) static int __init s390_ipl_init(void) { + char str[8] = {0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40}; + sclp_get_ipl_info(&sclp_ipl_info); + /* + * Fix loadparm: There are systems where the (SCSI) LOADPARM + * returned by read SCP info is invalid (contains EBCDIC blanks) + * when the system has been booted via diag308. In that case we use + * the value from diag308, if available. + * + * There are also systems where diag308 store does not work in + * case the system is booted from HMC. Fortunately in this case + * READ SCP info provides the correct value. + */ + if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && + diag308_set_works) + memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm, + LOADPARM_LEN); shutdown_actions_init(); shutdown_triggers_init(); return 0; diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index 65fc3979c2f1..7cf18f8d4cb4 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -22,13 +22,11 @@ __kernel_clock_gettime: basr %r5,0 0: al %r5,21f-0b(%r5) /* get &_vdso_data */ chi %r2,__CLOCK_REALTIME - je 10f + je 11f chi %r2,__CLOCK_MONOTONIC jne 19f /* CLOCK_MONOTONIC */ - ltr %r3,%r3 - jz 9f /* tp == NULL */ 1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 1b @@ -67,12 +65,10 @@ __kernel_clock_gettime: j 6b 8: st %r2,0(%r3) /* store tp->tv_sec */ st %r1,4(%r3) /* store tp->tv_nsec */ -9: lhi %r2,0 + lhi %r2,0 br %r14 /* CLOCK_REALTIME */ -10: ltr %r3,%r3 /* tp == NULL */ - jz 18f 11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 11b @@ -111,7 +107,7 @@ __kernel_clock_gettime: j 15b 17: st %r2,0(%r3) /* store tp->tv_sec */ st %r1,4(%r3) /* store tp->tv_nsec */ -18: lhi %r2,0 + lhi %r2,0 br %r14 /* Fallback to system call */ diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 91940ed33a4a..3f34e09db5f4 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -21,7 +21,7 @@ __kernel_clock_gettime: .cfi_startproc larl %r5,_vdso_data cghi %r2,__CLOCK_REALTIME - je 4f + je 5f cghi %r2,__CLOCK_THREAD_CPUTIME_ID je 9f cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ @@ -30,8 +30,6 @@ __kernel_clock_gettime: jne 12f /* CLOCK_MONOTONIC */ - ltgr %r3,%r3 - jz 3f /* tp == NULL */ 0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 0b @@ -53,12 +51,10 @@ __kernel_clock_gettime: j 1b 2: stg %r0,0(%r3) /* store tp->tv_sec */ stg %r1,8(%r3) /* store tp->tv_nsec */ -3: lghi %r2,0 + lghi %r2,0 br %r14 /* CLOCK_REALTIME */ -4: ltr %r3,%r3 /* tp == NULL */ - jz 8f 5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 5b @@ -80,7 +76,7 @@ __kernel_clock_gettime: j 6b 7: stg %r0,0(%r3) /* store tp->tv_sec */ stg %r1,8(%r3) /* store tp->tv_nsec */ -8: lghi %r2,0 + lghi %r2,0 br %r14 /* CLOCK_THREAD_CPUTIME_ID for this thread */ diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 59bd8f991b98..9254afff250c 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -28,22 +28,32 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; - if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end + if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end || start < 2 * PAGE_SIZE) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); vcpu->stat.diagnose_10++; - /* we checked for start > end above */ - if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { - gmap_discard(start, end, vcpu->arch.gmap); + /* + * We checked for start >= end above, so lets check for the + * fast path (no prefix swap page involved) + */ + if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) { + gmap_discard(vcpu->arch.gmap, start, end); } else { - if (start < prefix) - gmap_discard(start, prefix, vcpu->arch.gmap); - if (end >= prefix) - gmap_discard(prefix + 2 * PAGE_SIZE, - end, vcpu->arch.gmap); + /* + * This is slow path. gmap_discard will check for start + * so lets split this into before prefix, prefix, after + * prefix and let gmap_discard make some of these calls + * NOPs. + */ + gmap_discard(vcpu->arch.gmap, start, prefix); + if (start <= prefix) + gmap_discard(vcpu->arch.gmap, 0, 4096); + if (end > prefix + 4096) + gmap_discard(vcpu->arch.gmap, 4096, 8192); + gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end); } return 0; } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4653ac6e182b..0f961a1c64b3 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -254,8 +254,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) new = old = ACCESS_ONCE(*ic); new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); - if (!ipte_lock_count) - wake_up(&vcpu->kvm->arch.ipte_wq); + wake_up(&vcpu->kvm->arch.ipte_wq); out: mutex_unlock(&ipte_mutex); } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f4c819bfc193..a39838457f01 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -26,8 +26,9 @@ #define IOINT_SSID_MASK 0x00030000 #define IOINT_CSSID_MASK 0x03fc0000 #define IOINT_AI_MASK 0x04000000 +#define PFAULT_INIT 0x0600 -static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu); +static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu); static int is_ioint(u64 type) { @@ -76,7 +77,7 @@ static u64 int_word_to_isc_bits(u32 int_word) return (0x80 >> isc) << 24; } -static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, +static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { @@ -85,6 +86,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, return 0; if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) return 1; + return 0; case KVM_S390_INT_EMERGENCY: if (psw_extint_disabled(vcpu)) return 0; @@ -205,11 +207,30 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, } } -static int __deliver_prog_irq(struct kvm_vcpu *vcpu, - struct kvm_s390_pgm_info *pgm_info) +static u16 get_ilc(struct kvm_vcpu *vcpu) { const unsigned short table[] = { 2, 4, 4, 6 }; + + switch (vcpu->arch.sie_block->icptcode) { + case ICPT_INST: + case ICPT_INSTPROGI: + case ICPT_OPEREXC: + case ICPT_PARTEXEC: + case ICPT_IOINST: + /* last instruction only stored for these icptcodes */ + return table[vcpu->arch.sie_block->ipa >> 14]; + case ICPT_PROGI: + return vcpu->arch.sie_block->pgmilc; + default: + return 0; + } +} + +static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu, + struct kvm_s390_pgm_info *pgm_info) +{ int rc = 0; + u16 ilc = get_ilc(vcpu); switch (pgm_info->code & ~PGM_PER) { case PGM_AFX_TRANSLATION: @@ -276,25 +297,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu, (u8 *) __LC_PER_ACCESS_ID); } - switch (vcpu->arch.sie_block->icptcode) { - case ICPT_INST: - case ICPT_INSTPROGI: - case ICPT_OPEREXC: - case ICPT_PARTEXEC: - case ICPT_IOINST: - /* last instruction only stored for these icptcodes */ - rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14], - (u16 *) __LC_PGM_ILC); - break; - case ICPT_PROGI: - rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc, - (u16 *) __LC_PGM_ILC); - break; - default: - rc |= put_guest_lc(vcpu, 0, - (u16 *) __LC_PGM_ILC); - } - + rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); rc |= put_guest_lc(vcpu, pgm_info->code, (u16 *)__LC_PGM_INT_CODE); rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, @@ -305,7 +308,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu, return rc; } -static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, +static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { const unsigned short table[] = { 2, 4, 4, 6 }; @@ -343,7 +346,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, case KVM_S390_INT_CLOCK_COMP: trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, 0); - deliver_ckc_interrupt(vcpu); + rc = deliver_ckc_interrupt(vcpu); break; case KVM_S390_INT_CPU_TIMER: trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, @@ -376,8 +379,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, case KVM_S390_INT_PFAULT_INIT: trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, inti->ext.ext_params2); - rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR); + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, + (u16 *) __LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR); rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, @@ -501,14 +505,11 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, default: BUG(); } - if (rc) { - printk("kvm: The guest lowcore is not mapped during interrupt " - "delivery, killing userspace\n"); - do_exit(SIGKILL); - } + + return rc; } -static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu) +static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu) { int rc; @@ -518,11 +519,7 @@ static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu) rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc) { - printk("kvm: The guest lowcore is not mapped during interrupt " - "delivery, killing userspace\n"); - do_exit(SIGKILL); - } + return rc; } /* Check whether SIGP interpretation facility has an external call pending */ @@ -629,6 +626,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) */ vcpu->preempted = true; wake_up_interruptible(&vcpu->wq); + vcpu->stat.halt_wakeup++; } } @@ -661,12 +659,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); } -void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) +int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; struct kvm_s390_interrupt_info *n, *inti = NULL; int deliver; + int rc = 0; __reset_intercept_indicators(vcpu); if (atomic_read(&li->active)) { @@ -685,16 +684,16 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) atomic_set(&li->active, 0); spin_unlock(&li->lock); if (deliver) { - __do_deliver_interrupt(vcpu, inti); + rc = __do_deliver_interrupt(vcpu, inti); kfree(inti); } - } while (deliver); + } while (!rc && deliver); } - if (kvm_cpu_has_pending_timer(vcpu)) - deliver_ckc_interrupt(vcpu); + if (!rc && kvm_cpu_has_pending_timer(vcpu)) + rc = deliver_ckc_interrupt(vcpu); - if (atomic_read(&fi->active)) { + if (!rc && atomic_read(&fi->active)) { do { deliver = 0; spin_lock(&fi->lock); @@ -711,67 +710,13 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) atomic_set(&fi->active, 0); spin_unlock(&fi->lock); if (deliver) { - __do_deliver_interrupt(vcpu, inti); - kfree(inti); - } - } while (deliver); - } -} - -void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) -{ - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; - struct kvm_s390_interrupt_info *n, *inti = NULL; - int deliver; - - __reset_intercept_indicators(vcpu); - if (atomic_read(&li->active)) { - do { - deliver = 0; - spin_lock(&li->lock); - list_for_each_entry_safe(inti, n, &li->list, list) { - if ((inti->type == KVM_S390_MCHK) && - __interrupt_is_deliverable(vcpu, inti)) { - list_del(&inti->list); - deliver = 1; - break; - } - __set_intercept_indicator(vcpu, inti); - } - if (list_empty(&li->list)) - atomic_set(&li->active, 0); - spin_unlock(&li->lock); - if (deliver) { - __do_deliver_interrupt(vcpu, inti); + rc = __do_deliver_interrupt(vcpu, inti); kfree(inti); } - } while (deliver); + } while (!rc && deliver); } - if (atomic_read(&fi->active)) { - do { - deliver = 0; - spin_lock(&fi->lock); - list_for_each_entry_safe(inti, n, &fi->list, list) { - if ((inti->type == KVM_S390_MCHK) && - __interrupt_is_deliverable(vcpu, inti)) { - list_del(&inti->list); - fi->irq_count--; - deliver = 1; - break; - } - __set_intercept_indicator(vcpu, inti); - } - if (list_empty(&fi->list)) - atomic_set(&fi->active, 0); - spin_unlock(&fi->lock); - if (deliver) { - __do_deliver_interrupt(vcpu, inti); - kfree(inti); - } - } while (deliver); - } + return rc; } int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) @@ -1048,7 +993,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm, s390int->parm64, 2); - mutex_lock(&vcpu->kvm->lock); li = &vcpu->arch.local_int; spin_lock(&li->lock); if (inti->type == KVM_S390_PROGRAM_INT) @@ -1060,7 +1004,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, li->action_bits |= ACTION_STOP_ON_STOP; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); spin_unlock(&li->lock); - mutex_unlock(&vcpu->kvm->lock); kvm_s390_vcpu_wakeup(vcpu); return 0; } @@ -1300,7 +1243,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr) } INIT_LIST_HEAD(&map->list); map->guest_addr = addr; - map->addr = gmap_translate(addr, kvm->arch.gmap); + map->addr = gmap_translate(kvm->arch.gmap, addr); if (map->addr == -EFAULT) { ret = -EFAULT; goto out; @@ -1410,7 +1353,6 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = enqueue_floating_irq(dev, attr); break; case KVM_DEV_FLIC_CLEAR_IRQS: - r = 0; kvm_s390_clear_float_irqs(dev->kvm); break; case KVM_DEV_FLIC_APF_ENABLE: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 81b0e11521e4..55aade49b6d1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_instruction", VCPU_STAT(exit_instruction) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, { "instruction_stctl", VCPU_STAT(instruction_stctl) }, @@ -100,16 +101,12 @@ int test_vfacility(unsigned long nr) } /* Section: not file related */ -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { /* every s390 is virtualization enabled ;-) */ return 0; } -void kvm_arch_hardware_disable(void *garbage) -{ -} - static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); int kvm_arch_hardware_setup(void) @@ -124,17 +121,10 @@ void kvm_arch_hardware_unsetup(void) gmap_unregister_ipte_notifier(&gmap_notifier); } -void kvm_arch_check_processor_compat(void *rtn) -{ -} - int kvm_arch_init(void *opaque) { - return 0; -} - -void kvm_arch_exit(void) -{ + /* Register floating interrupt controller interface. */ + return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); } /* Section: device related */ @@ -404,6 +394,22 @@ long kvm_arch_vm_ioctl(struct file *filp, return r; } +static int kvm_s390_crypto_init(struct kvm *kvm) +{ + if (!test_vfacility(76)) + return 0; + + kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb), + GFP_KERNEL | GFP_DMA); + if (!kvm->arch.crypto.crycb) + return -ENOMEM; + + kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb | + CRYCB_FORMAT1; + + return 0; +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int rc; @@ -441,6 +447,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.dbf) goto out_nodbf; + if (kvm_s390_crypto_init(kvm) < 0) + goto out_crypto; + spin_lock_init(&kvm->arch.float_int.lock); INIT_LIST_HEAD(&kvm->arch.float_int.list); init_waitqueue_head(&kvm->arch.ipte_wq); @@ -451,7 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type & KVM_VM_S390_UCONTROL) { kvm->arch.gmap = NULL; } else { - kvm->arch.gmap = gmap_alloc(current->mm); + kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); if (!kvm->arch.gmap) goto out_nogmap; kvm->arch.gmap->private = kvm; @@ -465,6 +474,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return 0; out_nogmap: + kfree(kvm->arch.crypto.crycb); +out_crypto: debug_unregister(kvm->arch.dbf); out_nodbf: free_page((unsigned long)(kvm->arch.sca)); @@ -514,15 +525,12 @@ static void kvm_free_vcpus(struct kvm *kvm) mutex_unlock(&kvm->lock); } -void kvm_arch_sync_events(struct kvm *kvm) -{ -} - void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); free_page((unsigned long)(kvm->arch.sca)); debug_unregister(kvm->arch.dbf); + kfree(kvm->arch.crypto.crycb); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); @@ -535,7 +543,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); if (kvm_is_ucontrol(vcpu->kvm)) { - vcpu->arch.gmap = gmap_alloc(current->mm); + vcpu->arch.gmap = gmap_alloc(current->mm, -1UL); if (!vcpu->arch.gmap) return -ENOMEM; vcpu->arch.gmap->private = vcpu->kvm; @@ -546,15 +554,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | - KVM_SYNC_CRS; + KVM_SYNC_CRS | + KVM_SYNC_ARCH0 | + KVM_SYNC_PFAULT; return 0; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - /* Nothing todo */ -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_ctl(&vcpu->arch.host_fpregs.fpc); @@ -607,6 +612,14 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) return 0; } +static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) +{ + if (!test_vfacility(76)) + return; + + vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; +} + void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) { free_page(vcpu->arch.sie_block->cbrlo); @@ -653,6 +666,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; get_cpu_id(&vcpu->arch.cpu_id); vcpu->arch.cpu_id.version = 0xff; + + kvm_s390_vcpu_crypto_setup(vcpu); + return rc; } @@ -1049,6 +1065,11 @@ retry: goto retry; } + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + vcpu->arch.sie_block->ihcpu = 0xffff; + goto retry; + } + if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { if (!ibs_enabled(vcpu)) { trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); @@ -1085,18 +1106,8 @@ retry: */ long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) { - struct mm_struct *mm = current->mm; - hva_t hva; - long rc; - - hva = gmap_fault(gpa, vcpu->arch.gmap); - if (IS_ERR_VALUE(hva)) - return (long)hva; - down_read(&mm->mmap_sem); - rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL); - up_read(&mm->mmap_sem); - - return rc < 0 ? rc : 0; + return gmap_fault(vcpu->arch.gmap, gpa, + writable ? FAULT_FLAG_WRITE : 0); } static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, @@ -1191,8 +1202,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) if (test_cpu_flag(CIF_MCCK_PENDING)) s390_handle_mcck(); - if (!kvm_is_ucontrol(vcpu->kvm)) - kvm_s390_deliver_pending_interrupts(vcpu); + if (!kvm_is_ucontrol(vcpu->kvm)) { + rc = kvm_s390_deliver_pending_interrupts(vcpu); + if (rc) + return rc; + } rc = kvm_s390_handle_requests(vcpu); if (rc) @@ -1296,6 +1310,48 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return rc; } +static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; + vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; + if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) + kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); + if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { + memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); + /* some control register changes require a tlb flush */ + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + } + if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { + vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm; + vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; + vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; + vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; + vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; + } + if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { + vcpu->arch.pfault_token = kvm_run->s.regs.pft; + vcpu->arch.pfault_select = kvm_run->s.regs.pfs; + vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; + } + kvm_run->kvm_dirty_regs = 0; +} + +static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; + kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; + kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); + memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); + kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm; + kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; + kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; + kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; + kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; + kvm_run->s.regs.pft = vcpu->arch.pfault_token; + kvm_run->s.regs.pfs = vcpu->arch.pfault_select; + kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int rc; @@ -1317,17 +1373,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return -EINVAL; } - vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; - vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; - if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { - kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; - kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); - } - if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { - kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; - memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); - kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); - } + sync_regs(vcpu, kvm_run); might_fault(); rc = __vcpu_run(vcpu); @@ -1357,10 +1403,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = 0; } - kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; - kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; - kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); - memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); + store_regs(vcpu, kvm_run); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -1489,7 +1532,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) * Another VCPU might have used IBS while we were offline. * Let's play safe and flush the VCPU at startup. */ - vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); spin_unlock(&vcpu->kvm->arch.start_stop_lock); return; } @@ -1644,9 +1687,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_S390_VCPU_FAULT: { - r = gmap_fault(arg, vcpu->arch.gmap); - if (!IS_ERR_VALUE(r)) - r = 0; + r = gmap_fault(vcpu->arch.gmap, arg, 0); break; } case KVM_ENABLE_CAP: @@ -1677,21 +1718,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) -{ -} - int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm) -{ -} - /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -1737,15 +1769,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, return; } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ -} - -void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} - static int __init kvm_s390_init(void) { int ret; @@ -1764,7 +1787,7 @@ static int __init kvm_s390_init(void) return -ENOMEM; } memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); - vfacilities[0] &= 0xff82fff3f4fc2000UL; + vfacilities[0] &= 0xff82fffbf47c2000UL; vfacilities[1] &= 0x005c000000000000UL; return 0; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 3862fa2cefe0..244d02303182 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -70,7 +70,7 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu) static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) { vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; - vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); } @@ -138,8 +138,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); -void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); -void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); +int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); void kvm_s390_clear_float_irqs(struct kvm *kvm); int __must_check kvm_s390_inject_vm(struct kvm *kvm, @@ -228,6 +227,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int psw_extint_disabled(struct kvm_vcpu *vcpu); void kvm_s390_destroy_adapters(struct kvm *kvm); int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu); +extern struct kvm_device_ops kvm_flic_ops; /* implemented in guestdbg.c */ void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index f89c1cd67751..72bb2dd8b9cd 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -352,13 +352,6 @@ static int handle_stfl(struct kvm_vcpu *vcpu) return 0; } -static void handle_new_psw(struct kvm_vcpu *vcpu) -{ - /* Check whether the new psw is enabled for machine checks. */ - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK) - kvm_s390_deliver_pending_machine_checks(vcpu); -} - #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL #define PSW_ADDR_24 0x0000000000ffffffUL @@ -405,7 +398,6 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; if (!is_valid_psw(gpsw)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - handle_new_psw(vcpu); return 0; } @@ -427,7 +419,6 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gpsw = new_psw; if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - handle_new_psw(vcpu); return 0; } @@ -738,7 +729,7 @@ static int handle_essa(struct kvm_vcpu *vcpu) /* invalid entry */ break; /* try to free backing */ - __gmap_zap(cbrle, gmap); + __gmap_zap(gmap, cbrle); } up_read(&gmap->mm->mmap_sem); if (i < entries) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 3f3b35403d0a..a2b81d6ce8a5 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -442,18 +442,15 @@ static inline int do_exception(struct pt_regs *regs, int access) down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE - gmap = (struct gmap *) - ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); + gmap = (current->flags & PF_VCPU) ? + (struct gmap *) S390_lowcore.gmap : NULL; if (gmap) { - address = __gmap_fault(address, gmap); + current->thread.gmap_addr = address; + address = __gmap_translate(gmap, address); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; } - if (address == -ENOMEM) { - fault = VM_FAULT_OOM; - goto out_up; - } if (gmap->pfault_enabled) flags |= FAULT_FLAG_RETRY_NOWAIT; } @@ -530,6 +527,20 @@ retry: goto retry; } } +#ifdef CONFIG_PGSTE + if (gmap) { + address = __gmap_link(gmap, current->thread.gmap_addr, + address); + if (address == -EFAULT) { + fault = VM_FAULT_BADMAP; + goto out_up; + } + if (address == -ENOMEM) { + fault = VM_FAULT_OOM; + goto out_up; + } + } +#endif fault = 0; out_up: up_read(&mm->mmap_sem); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 0c1073ed1e84..c7235e01fd67 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -43,6 +43,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); static void __init setup_zero_pages(void) { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5404a6261db9..296b61a4af59 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -145,30 +145,56 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) /** * gmap_alloc - allocate a guest address space * @mm: pointer to the parent mm_struct + * @limit: maximum size of the gmap address space * * Returns a guest address space structure. */ -struct gmap *gmap_alloc(struct mm_struct *mm) +struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) { struct gmap *gmap; struct page *page; unsigned long *table; - + unsigned long etype, atype; + + if (limit < (1UL << 31)) { + limit = (1UL << 31) - 1; + atype = _ASCE_TYPE_SEGMENT; + etype = _SEGMENT_ENTRY_EMPTY; + } else if (limit < (1UL << 42)) { + limit = (1UL << 42) - 1; + atype = _ASCE_TYPE_REGION3; + etype = _REGION3_ENTRY_EMPTY; + } else if (limit < (1UL << 53)) { + limit = (1UL << 53) - 1; + atype = _ASCE_TYPE_REGION2; + etype = _REGION2_ENTRY_EMPTY; + } else { + limit = -1UL; + atype = _ASCE_TYPE_REGION1; + etype = _REGION1_ENTRY_EMPTY; + } gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); if (!gmap) goto out; INIT_LIST_HEAD(&gmap->crst_list); + INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); + INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); + spin_lock_init(&gmap->guest_table_lock); gmap->mm = mm; page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); if (!page) goto out_free; + page->index = 0; list_add(&page->lru, &gmap->crst_list); table = (unsigned long *) page_to_phys(page); - crst_table_init(table, _REGION1_ENTRY_EMPTY); + crst_table_init(table, etype); gmap->table = table; - gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | __pa(table); + gmap->asce = atype | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); + gmap->asce_end = limit; + down_write(&mm->mmap_sem); list_add(&gmap->list, &mm->context.gmap_list); + up_write(&mm->mmap_sem); return gmap; out_free: @@ -178,36 +204,38 @@ out: } EXPORT_SYMBOL_GPL(gmap_alloc); -static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) -{ - struct gmap_pgtable *mp; - struct gmap_rmap *rmap; - struct page *page; - - if (*table & _SEGMENT_ENTRY_INVALID) - return 0; - page = pfn_to_page(*table >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - list_for_each_entry(rmap, &mp->mapper, list) { - if (rmap->entry != table) - continue; - list_del(&rmap->list); - kfree(rmap); - break; - } - *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; - return 1; -} - static void gmap_flush_tlb(struct gmap *gmap) { if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | - _ASCE_TYPE_REGION1); + __tlb_flush_asce(gmap->mm, gmap->asce); else __tlb_flush_global(); } +static void gmap_radix_tree_free(struct radix_tree_root *root) +{ + struct radix_tree_iter iter; + unsigned long indices[16]; + unsigned long index; + void **slot; + int i, nr; + + /* A radix tree is freed by deleting all of its entries */ + index = 0; + do { + nr = 0; + radix_tree_for_each_slot(slot, root, &iter, index) { + indices[nr] = iter.index; + if (++nr == 16) + break; + } + for (i = 0; i < nr; i++) { + index = indices[i]; + radix_tree_delete(root, index); + } + } while (nr > 0); +} + /** * gmap_free - free a guest address space * @gmap: pointer to the guest address space structure @@ -215,31 +243,21 @@ static void gmap_flush_tlb(struct gmap *gmap) void gmap_free(struct gmap *gmap) { struct page *page, *next; - unsigned long *table; - int i; - /* Flush tlb. */ if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | - _ASCE_TYPE_REGION1); + __tlb_flush_asce(gmap->mm, gmap->asce); else __tlb_flush_global(); /* Free all segment & region tables. */ - down_read(&gmap->mm->mmap_sem); - spin_lock(&gmap->mm->page_table_lock); - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { - table = (unsigned long *) page_to_phys(page); - if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) - /* Remove gmap rmap structures for segment table. */ - for (i = 0; i < PTRS_PER_PMD; i++, table++) - gmap_unlink_segment(gmap, table); + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) __free_pages(page, ALLOC_ORDER); - } - spin_unlock(&gmap->mm->page_table_lock); - up_read(&gmap->mm->mmap_sem); + gmap_radix_tree_free(&gmap->guest_to_host); + gmap_radix_tree_free(&gmap->host_to_guest); + down_write(&gmap->mm->mmap_sem); list_del(&gmap->list); + up_write(&gmap->mm->mmap_sem); kfree(gmap); } EXPORT_SYMBOL_GPL(gmap_free); @@ -267,42 +285,97 @@ EXPORT_SYMBOL_GPL(gmap_disable); /* * gmap_alloc_table is assumed to be called with mmap_sem held */ -static int gmap_alloc_table(struct gmap *gmap, - unsigned long *table, unsigned long init) - __releases(&gmap->mm->page_table_lock) - __acquires(&gmap->mm->page_table_lock) +static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, + unsigned long init, unsigned long gaddr) { struct page *page; unsigned long *new; /* since we dont free the gmap table until gmap_free we can unlock */ - spin_unlock(&gmap->mm->page_table_lock); page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); - spin_lock(&gmap->mm->page_table_lock); if (!page) return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); + spin_lock(&gmap->mm->page_table_lock); if (*table & _REGION_ENTRY_INVALID) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); - } else + page->index = gaddr; + page = NULL; + } + spin_unlock(&gmap->mm->page_table_lock); + if (page) __free_pages(page, ALLOC_ORDER); return 0; } /** + * __gmap_segment_gaddr - find virtual address from segment pointer + * @entry: pointer to a segment table entry in the guest address space + * + * Returns the virtual address in the guest address space for the segment + */ +static unsigned long __gmap_segment_gaddr(unsigned long *entry) +{ + struct page *page; + unsigned long offset; + + offset = (unsigned long) entry / sizeof(unsigned long); + offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; + page = pmd_to_page((pmd_t *) entry); + return page->index + offset; +} + +/** + * __gmap_unlink_by_vmaddr - unlink a single segment via a host address + * @gmap: pointer to the guest address space structure + * @vmaddr: address in the host process address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) +{ + unsigned long *entry; + int flush = 0; + + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); + if (entry) { + flush = (*entry != _SEGMENT_ENTRY_INVALID); + *entry = _SEGMENT_ENTRY_INVALID; + } + spin_unlock(&gmap->guest_table_lock); + return flush; +} + +/** + * __gmap_unmap_by_gaddr - unmap a single segment via a guest address + * @gmap: pointer to the guest address space structure + * @gaddr: address in the guest address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + + vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; +} + +/** * gmap_unmap_segment - unmap segment from the guest address space * @gmap: pointer to the guest address space structure - * @addr: address in the guest address space + * @to: address in the guest address space * @len: length of the memory area to unmap * * Returns 0 if the unmap succeeded, -EINVAL if not. */ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) { - unsigned long *table; unsigned long off; int flush; @@ -312,31 +385,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) return -EINVAL; flush = 0; - down_read(&gmap->mm->mmap_sem); - spin_lock(&gmap->mm->page_table_lock); - for (off = 0; off < len; off += PMD_SIZE) { - /* Walk the guest addr space page table */ - table = gmap->table + (((to + off) >> 53) & 0x7ff); - if (*table & _REGION_ENTRY_INVALID) - goto out; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 42) & 0x7ff); - if (*table & _REGION_ENTRY_INVALID) - goto out; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 31) & 0x7ff); - if (*table & _REGION_ENTRY_INVALID) - goto out; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 20) & 0x7ff); - - /* Clear segment table entry in guest address space. */ - flush |= gmap_unlink_segment(gmap, table); - *table = _SEGMENT_ENTRY_INVALID; - } -out: - spin_unlock(&gmap->mm->page_table_lock); - up_read(&gmap->mm->mmap_sem); + down_write(&gmap->mm->mmap_sem); + for (off = 0; off < len; off += PMD_SIZE) + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + up_write(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); return 0; @@ -348,87 +400,47 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment); * @gmap: pointer to the guest address space structure * @from: source address in the parent address space * @to: target address in the guest address space + * @len: length of the memory area to map * * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. */ int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len) { - unsigned long *table; unsigned long off; int flush; if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; - if (len == 0 || from + len > TASK_MAX_SIZE || - from + len < from || to + len < to) + if (len == 0 || from + len < from || to + len < to || + from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) return -EINVAL; flush = 0; - down_read(&gmap->mm->mmap_sem); - spin_lock(&gmap->mm->page_table_lock); + down_write(&gmap->mm->mmap_sem); for (off = 0; off < len; off += PMD_SIZE) { - /* Walk the gmap address space page table */ - table = gmap->table + (((to + off) >> 53) & 0x7ff); - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) - goto out_unmap; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 42) & 0x7ff); - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) - goto out_unmap; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 31) & 0x7ff); - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) - goto out_unmap; - table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); - table = table + (((to + off) >> 20) & 0x7ff); - - /* Store 'from' address in an invalid segment table entry. */ - flush |= gmap_unlink_segment(gmap, table); - *table = (from + off) | (_SEGMENT_ENTRY_INVALID | - _SEGMENT_ENTRY_PROTECT); + /* Remove old translation */ + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + /* Store new translation */ + if (radix_tree_insert(&gmap->guest_to_host, + (to + off) >> PMD_SHIFT, + (void *) from + off)) + break; } - spin_unlock(&gmap->mm->page_table_lock); - up_read(&gmap->mm->mmap_sem); + up_write(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); - return 0; - -out_unmap: - spin_unlock(&gmap->mm->page_table_lock); - up_read(&gmap->mm->mmap_sem); + if (off >= len) + return 0; gmap_unmap_segment(gmap, to, len); return -ENOMEM; } EXPORT_SYMBOL_GPL(gmap_map_segment); -static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) -{ - unsigned long *table; - - table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) - return ERR_PTR(-EFAULT); - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) - return ERR_PTR(-EFAULT); - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) - return ERR_PTR(-EFAULT); - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 20) & 0x7ff); - return table; -} - /** * __gmap_translate - translate a guest address to a user space address - * @address: guest address * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address * * Returns user space address which corresponds to the guest address or * -EFAULT if no such mapping exists. @@ -436,168 +448,161 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) * The mmap_sem of the mm that belongs to the address space must be held * when this function gets called. */ -unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) +unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) { - unsigned long *segment_ptr, vmaddr, segment; - struct gmap_pgtable *mp; - struct page *page; + unsigned long vmaddr; - current->thread.gmap_addr = address; - segment_ptr = gmap_table_walk(address, gmap); - if (IS_ERR(segment_ptr)) - return PTR_ERR(segment_ptr); - /* Convert the gmap address to an mm address. */ - segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INVALID)) { - page = pfn_to_page(segment >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - return mp->vmaddr | (address & ~PMD_MASK); - } else if (segment & _SEGMENT_ENTRY_PROTECT) { - vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; - return vmaddr | (address & ~PMD_MASK); - } - return -EFAULT; + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); + return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; } EXPORT_SYMBOL_GPL(__gmap_translate); /** * gmap_translate - translate a guest address to a user space address - * @address: guest address * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address * * Returns user space address which corresponds to the guest address or * -EFAULT if no such mapping exists. * This function does not establish potentially missing page table entries. */ -unsigned long gmap_translate(unsigned long address, struct gmap *gmap) +unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) { unsigned long rc; down_read(&gmap->mm->mmap_sem); - rc = __gmap_translate(address, gmap); + rc = __gmap_translate(gmap, gaddr); up_read(&gmap->mm->mmap_sem); return rc; } EXPORT_SYMBOL_GPL(gmap_translate); -static int gmap_connect_pgtable(unsigned long address, unsigned long segment, - unsigned long *segment_ptr, struct gmap *gmap) +/** + * gmap_unlink - disconnect a page table from the gmap shadow tables + * @gmap: pointer to guest mapping meta data structure + * @table: pointer to the host page table + * @vmaddr: vm address associated with the host page table + */ +static void gmap_unlink(struct mm_struct *mm, unsigned long *table, + unsigned long vmaddr) +{ + struct gmap *gmap; + int flush; + + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); + if (flush) + gmap_flush_tlb(gmap); + } +} + +/** + * gmap_link - set up shadow page tables to connect a host to a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @vmaddr: vm address + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) { - unsigned long vmaddr; - struct vm_area_struct *vma; - struct gmap_pgtable *mp; - struct gmap_rmap *rmap; struct mm_struct *mm; - struct page *page; + unsigned long *table; + spinlock_t *ptl; pgd_t *pgd; pud_t *pud; pmd_t *pmd; + int rc; - mm = gmap->mm; - vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; - vma = find_vma(mm, vmaddr); - if (!vma || vma->vm_start > vmaddr) - return -EFAULT; + /* Create higher level tables in the gmap page table */ + table = gmap->table; + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { + table += (gaddr >> 53) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, + gaddr & 0xffe0000000000000)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { + table += (gaddr >> 42) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, + gaddr & 0xfffffc0000000000)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { + table += (gaddr >> 31) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, + gaddr & 0xffffffff80000000)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + table += (gaddr >> 20) & 0x7ff; /* Walk the parent mm page table */ + mm = gmap->mm; pgd = pgd_offset(mm, vmaddr); - pud = pud_alloc(mm, pgd, vmaddr); - if (!pud) - return -ENOMEM; - pmd = pmd_alloc(mm, pud, vmaddr); - if (!pmd) - return -ENOMEM; - if (!pmd_present(*pmd) && - __pte_alloc(mm, vma, pmd, vmaddr)) - return -ENOMEM; + VM_BUG_ON(pgd_none(*pgd)); + pud = pud_offset(pgd, vmaddr); + VM_BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, vmaddr); + VM_BUG_ON(pmd_none(*pmd)); /* large pmds cannot yet be handled */ if (pmd_large(*pmd)) return -EFAULT; - /* pmd now points to a valid segment table entry. */ - rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); - if (!rmap) - return -ENOMEM; /* Link gmap segment table entry location to page table. */ - page = pmd_page(*pmd); - mp = (struct gmap_pgtable *) page->index; - rmap->gmap = gmap; - rmap->entry = segment_ptr; - rmap->vmaddr = address & PMD_MASK; - spin_lock(&mm->page_table_lock); - if (*segment_ptr == segment) { - list_add(&rmap->list, &mp->mapper); - /* Set gmap segment table entry to page table. */ - *segment_ptr = pmd_val(*pmd) & PAGE_MASK; - rmap = NULL; - } - spin_unlock(&mm->page_table_lock); - kfree(rmap); - return 0; -} - -static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) -{ - struct gmap_rmap *rmap, *next; - struct gmap_pgtable *mp; - struct page *page; - int flush; - - flush = 0; - spin_lock(&mm->page_table_lock); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - list_for_each_entry_safe(rmap, next, &mp->mapper, list) { - *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | - _SEGMENT_ENTRY_PROTECT); - list_del(&rmap->list); - kfree(rmap); - flush = 1; - } - spin_unlock(&mm->page_table_lock); - if (flush) - __tlb_flush_global(); + rc = radix_tree_preload(GFP_KERNEL); + if (rc) + return rc; + ptl = pmd_lock(mm, pmd); + spin_lock(&gmap->guest_table_lock); + if (*table == _SEGMENT_ENTRY_INVALID) { + rc = radix_tree_insert(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT, table); + if (!rc) + *table = pmd_val(*pmd); + } else + rc = 0; + spin_unlock(&gmap->guest_table_lock); + spin_unlock(ptl); + radix_tree_preload_end(); + return rc; } -/* - * this function is assumed to be called with mmap_sem held +/** + * gmap_fault - resolve a fault on a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @fault_flags: flags to pass down to handle_mm_fault() + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. */ -unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +int gmap_fault(struct gmap *gmap, unsigned long gaddr, + unsigned int fault_flags) { - unsigned long *segment_ptr, segment; - struct gmap_pgtable *mp; - struct page *page; + unsigned long vmaddr; int rc; - current->thread.gmap_addr = address; - segment_ptr = gmap_table_walk(address, gmap); - if (IS_ERR(segment_ptr)) - return -EFAULT; - /* Convert the gmap address to an mm address. */ - while (1) { - segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INVALID)) { - /* Page table is present */ - page = pfn_to_page(segment >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - return mp->vmaddr | (address & ~PMD_MASK); - } - if (!(segment & _SEGMENT_ENTRY_PROTECT)) - /* Nothing mapped in the gmap address space. */ - break; - rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); - if (rc) - return rc; - } - return -EFAULT; -} - -unsigned long gmap_fault(unsigned long address, struct gmap *gmap) -{ - unsigned long rc; - down_read(&gmap->mm->mmap_sem); - rc = __gmap_fault(address, gmap); + vmaddr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(vmaddr)) { + rc = vmaddr; + goto out_up; + } + if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) { + rc = -EFAULT; + goto out_up; + } + rc = __gmap_link(gmap, gaddr, vmaddr); +out_up: up_read(&gmap->mm->mmap_sem); - return rc; } EXPORT_SYMBOL_GPL(gmap_fault); @@ -617,17 +622,24 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) free_swap_and_cache(entry); } -/** - * The mm->mmap_sem lock must be held +/* + * this function is assumed to be called with mmap_sem held */ -static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) +void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { - unsigned long ptev, pgstev; + unsigned long vmaddr, ptev, pgstev; + pte_t *ptep, pte; spinlock_t *ptl; pgste_t pgste; - pte_t *ptep, pte; - ptep = get_locked_pte(mm, address, &ptl); + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (!vmaddr) + return; + vmaddr |= gaddr & ~PMD_MASK; + /* Get pointer to the page table entry */ + ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); if (unlikely(!ptep)) return; pte = *ptep; @@ -639,87 +651,34 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) ptev = pte_val(pte); if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { - gmap_zap_swap_entry(pte_to_swp_entry(pte), mm); - pte_clear(mm, address, ptep); + gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm); + pte_clear(gmap->mm, vmaddr, ptep); } pgste_set_unlock(ptep, pgste); out_pte: pte_unmap_unlock(*ptep, ptl); } - -/* - * this function is assumed to be called with mmap_sem held - */ -void __gmap_zap(unsigned long address, struct gmap *gmap) -{ - unsigned long *table, *segment_ptr; - unsigned long segment, pgstev, ptev; - struct gmap_pgtable *mp; - struct page *page; - - segment_ptr = gmap_table_walk(address, gmap); - if (IS_ERR(segment_ptr)) - return; - segment = *segment_ptr; - if (segment & _SEGMENT_ENTRY_INVALID) - return; - page = pfn_to_page(segment >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - address = mp->vmaddr | (address & ~PMD_MASK); - /* Page table is present */ - table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN); - table = table + ((address >> 12) & 0xff); - pgstev = table[PTRS_PER_PTE]; - ptev = table[0]; - /* quick check, checked again with locks held */ - if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || - ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) - gmap_zap_unused(gmap->mm, address); -} EXPORT_SYMBOL_GPL(__gmap_zap); -void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) +void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) { - - unsigned long *table, address, size; + unsigned long gaddr, vmaddr, size; struct vm_area_struct *vma; - struct gmap_pgtable *mp; - struct page *page; down_read(&gmap->mm->mmap_sem); - address = from; - while (address < to) { - /* Walk the gmap address space page table */ - table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) { - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) { - address = (address + PMD_SIZE) & PMD_MASK; + for (gaddr = from; gaddr < to; + gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (!vmaddr) continue; - } - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INVALID)) { - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - page = pfn_to_page(*table >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - vma = find_vma(gmap->mm, mp->vmaddr); - size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); - zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), - size, NULL); - address = (address + PMD_SIZE) & PMD_MASK; + vmaddr |= gaddr & ~PMD_MASK; + /* Find vma in the parent mm */ + vma = find_vma(gmap->mm, vmaddr); + size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); + zap_page_range(vma, vmaddr, size, NULL); } up_read(&gmap->mm->mmap_sem); } @@ -755,7 +714,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); /** * gmap_ipte_notify - mark a range of ptes for invalidation notification * @gmap: pointer to guest mapping meta data structure - * @start: virtual address in the guest address space + * @gaddr: virtual address in the guest address space * @len: size of area * * Returns 0 if for each page in the given range a gmap mapping exists and @@ -763,7 +722,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); * for one or more pages -EFAULT is returned. If no memory could be allocated * -ENOMEM is returned. This function establishes missing page table entries. */ -int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) +int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) { unsigned long addr; spinlock_t *ptl; @@ -771,12 +730,12 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) pgste_t pgste; int rc = 0; - if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) + if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) return -EINVAL; down_read(&gmap->mm->mmap_sem); while (len) { /* Convert gmap address and connect the page tables */ - addr = __gmap_fault(start, gmap); + addr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(addr)) { rc = addr; break; @@ -786,6 +745,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) rc = -EFAULT; break; } + rc = __gmap_link(gmap, gaddr, addr); + if (rc) + break; /* Walk the process page table, lock and get pte pointer */ ptep = get_locked_pte(gmap->mm, addr, &ptl); if (unlikely(!ptep)) @@ -796,7 +758,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) pgste = pgste_get_lock(ptep); pgste_val(pgste) |= PGSTE_IN_BIT; pgste_set_unlock(ptep, pgste); - start += PAGE_SIZE; + gaddr += PAGE_SIZE; len -= PAGE_SIZE; } spin_unlock(ptl); @@ -809,28 +771,30 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify); /** * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. * @mm: pointer to the process mm_struct + * @addr: virtual address in the process address space * @pte: pointer to the page table entry * * This function is assumed to be called with the page table lock held * for the pte to notify. */ -void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte) +void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) { - unsigned long segment_offset; + unsigned long offset, gaddr; + unsigned long *table; struct gmap_notifier *nb; - struct gmap_pgtable *mp; - struct gmap_rmap *rmap; - struct page *page; + struct gmap *gmap; - segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); - segment_offset = segment_offset * (4096 / sizeof(pte_t)); - page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; + offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + offset = offset * (4096 / sizeof(pte_t)); spin_lock(&gmap_notifier_lock); - list_for_each_entry(rmap, &mp->mapper, list) { + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + table = radix_tree_lookup(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (!table) + continue; + gaddr = __gmap_segment_gaddr(table) + offset; list_for_each_entry(nb, &gmap_notifier_list, list) - nb->notifier_call(rmap->gmap, - rmap->vmaddr + segment_offset); + nb->notifier_call(gmap, gaddr); } spin_unlock(&gmap_notifier_lock); } @@ -841,29 +805,18 @@ static inline int page_table_with_pgste(struct page *page) return atomic_read(&page->_mapcount) == 0; } -static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, - unsigned long vmaddr) +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) { struct page *page; unsigned long *table; - struct gmap_pgtable *mp; page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; - mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); - if (!mp) { - __free_page(page); - return NULL; - } if (!pgtable_page_ctor(page)) { - kfree(mp); __free_page(page); return NULL; } - mp->vmaddr = vmaddr & PMD_MASK; - INIT_LIST_HEAD(&mp->mapper); - page->index = (unsigned long) mp; atomic_set(&page->_mapcount, 0); table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); @@ -874,14 +827,10 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, static inline void page_table_free_pgste(unsigned long *table) { struct page *page; - struct gmap_pgtable *mp; page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - BUG_ON(!list_empty(&mp->mapper)); pgtable_page_dtor(page); atomic_set(&page->_mapcount, -1); - kfree(mp); __free_page(page); } @@ -994,13 +943,13 @@ retry: } if (!(pte_val(*ptep) & _PAGE_INVALID) && (pte_val(*ptep) & _PAGE_PROTECT)) { - pte_unmap_unlock(*ptep, ptl); - if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { - up_read(&mm->mmap_sem); - return -EFAULT; - } - goto retry; + pte_unmap_unlock(*ptep, ptl); + if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { + up_read(&mm->mmap_sem); + return -EFAULT; } + goto retry; + } new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | @@ -1038,8 +987,7 @@ static inline int page_table_with_pgste(struct page *page) return 0; } -static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, - unsigned long vmaddr) +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) { return NULL; } @@ -1053,8 +1001,8 @@ static inline void page_table_free_pgste(unsigned long *table) { } -static inline void gmap_disconnect_pgtable(struct mm_struct *mm, - unsigned long *table) +static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, + unsigned long vmaddr) { } @@ -1074,14 +1022,14 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) /* * page table entry allocation/free routines. */ -unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) +unsigned long *page_table_alloc(struct mm_struct *mm) { unsigned long *uninitialized_var(table); struct page *uninitialized_var(page); unsigned int mask, bit; if (mm_has_pgste(mm)) - return page_table_alloc_pgste(mm, vmaddr); + return page_table_alloc_pgste(mm); /* Allocate fragments of a 4K page as 1K/2K page table */ spin_lock_bh(&mm->context.list_lock); mask = FRAG_MASK; @@ -1123,10 +1071,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) unsigned int bit, mask; page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (page_table_with_pgste(page)) { - gmap_disconnect_pgtable(mm, table); + if (page_table_with_pgste(page)) return page_table_free_pgste(table); - } /* Free 1K/2K page table fragment of a 4K page */ bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); spin_lock_bh(&mm->context.list_lock); @@ -1158,7 +1104,8 @@ static void __page_table_free_rcu(void *table, unsigned bit) } } -void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, + unsigned long vmaddr) { struct mm_struct *mm; struct page *page; @@ -1167,7 +1114,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) mm = tlb->mm; page = pfn_to_page(__pa(table) >> PAGE_SHIFT); if (page_table_with_pgste(page)) { - gmap_disconnect_pgtable(mm, table); + gmap_unlink(mm, table, vmaddr); table = (unsigned long *) (__pa(table) | FRAG_MASK); tlb_remove_table(tlb, table); return; @@ -1303,7 +1250,7 @@ again: if (page_table_with_pgste(page)) continue; /* Allocate new page table with pgstes */ - new = page_table_alloc_pgste(mm, addr); + new = page_table_alloc_pgste(mm); if (!new) return -ENOMEM; @@ -1318,7 +1265,7 @@ again: /* Establish new table */ pmd_populate(mm, pmd, (pte_t *) new); /* Free old table with rcu, there might be a walker! */ - page_table_free_rcu(tlb, table); + page_table_free_rcu(tlb, table, addr); new = NULL; } spin_unlock(ptl); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index fe9012a49aa5..fdbd7888cb07 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -65,7 +65,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address) pte_t *pte; if (slab_is_available()) - pte = (pte_t *) page_table_alloc(&init_mm, address); + pte = (pte_t *) page_table_alloc(&init_mm); else pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t), PTRS_PER_PTE * sizeof(pte_t)); diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 4ac8cae5727c..366e1b599a7b 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -22,17 +22,14 @@ choice config ARCH_SCORE7 bool "SCORE7 processor" select SYS_SUPPORTS_32BIT_KERNEL - select GENERIC_HAS_IOMAP config MACH_SPCT6600 bool "SPCT6600 series based machines" select SYS_SUPPORTS_32BIT_KERNEL - select GENERIC_HAS_IOMAP config SCORE_SIM bool "Score simulator" select SYS_SUPPORTS_32BIT_KERNEL - select GENERIC_HAS_IOMAP endchoice endmenu diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index 6a96b9a2f7a5..bbd4c2298708 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -30,6 +30,7 @@ CONFIG_PCI_DEBUG=y CONFIG_PCCARD=y CONFIG_YENTA=y CONFIG_HOTPLUG_PCI=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index e741b1e36acd..df25ae774ee0 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -25,6 +25,7 @@ CONFIG_CMDLINE_OVERWRITE=y CONFIG_CMDLINE="console=ttySC1,115200 ip=dhcp root=/dev/nfs rw nfsroot=/nfs/rootfs,rsize=1024,wsize=1024 earlyprintk=sh-sci.1" CONFIG_PCCARD=y CONFIG_BINFMT_MISC=y +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=y diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c index bf8daf9d9c9b..37458f38b220 100644 --- a/arch/sh/mm/gup.c +++ b/arch/sh/mm/gup.c @@ -105,6 +105,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); get_page(page); + __flush_anon_page(page, addr); + flush_dcache_page(page); pages[*nr] = page; (*nr)++; diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 9d8521b8c854..6b68f12f29db 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -29,6 +29,7 @@ CONFIG_PCI=y CONFIG_PCI_MSI=y CONFIG_SUN_OPENPROMFS=m CONFIG_BINFMT_MISC=m +CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_XFRM_USER=m diff --git a/arch/sparc/net/bpf_jit_asm.S b/arch/sparc/net/bpf_jit_asm.S index 9d016c7017f7..8c83f4b8eb15 100644 --- a/arch/sparc/net/bpf_jit_asm.S +++ b/arch/sparc/net/bpf_jit_asm.S @@ -6,10 +6,12 @@ #define SAVE_SZ 176 #define SCRATCH_OFF STACK_BIAS + 128 #define BE_PTR(label) be,pn %xcc, label +#define SIGN_EXTEND(reg) sra reg, 0, reg #else #define SAVE_SZ 96 #define SCRATCH_OFF 72 #define BE_PTR(label) be label +#define SIGN_EXTEND(reg) #endif #define SKF_MAX_NEG_OFF (-0x200000) /* SKF_LL_OFF from filter.h */ @@ -135,6 +137,7 @@ bpf_slow_path_byte_msh: save %sp, -SAVE_SZ, %sp; \ mov %i0, %o0; \ mov r_OFF, %o1; \ + SIGN_EXTEND(%o1); \ call bpf_internal_load_pointer_neg_helper; \ mov (LEN), %o2; \ mov %o0, r_TMP; \ diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 1f76c22a6a75..ece4af0575e9 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -184,7 +184,7 @@ do { \ */ #define emit_alu_K(OPCODE, K) \ do { \ - if (K) { \ + if (K || OPCODE == AND || OPCODE == MUL) { \ unsigned int _insn = OPCODE; \ _insn |= RS1(r_A) | RD(r_A); \ if (is_simm13(K)) { \ @@ -234,12 +234,18 @@ do { BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u8)); \ __emit_load8(BASE, STRUCT, FIELD, DEST); \ } while (0) -#define emit_ldmem(OFF, DEST) \ -do { *prog++ = LD32I | RS1(FP) | S13(-(OFF)) | RD(DEST); \ +#ifdef CONFIG_SPARC64 +#define BIAS (STACK_BIAS - 4) +#else +#define BIAS (-4) +#endif + +#define emit_ldmem(OFF, DEST) \ +do { *prog++ = LD32I | RS1(SP) | S13(BIAS - (OFF)) | RD(DEST); \ } while (0) -#define emit_stmem(OFF, SRC) \ -do { *prog++ = LD32I | RS1(FP) | S13(-(OFF)) | RD(SRC); \ +#define emit_stmem(OFF, SRC) \ +do { *prog++ = ST32I | RS1(SP) | S13(BIAS - (OFF)) | RD(SRC); \ } while (0) #ifdef CONFIG_SMP @@ -615,10 +621,11 @@ void bpf_jit_compile(struct bpf_prog *fp) case BPF_ANC | SKF_AD_VLAN_TAG: case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: emit_skb_load16(vlan_tci, r_A); - if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { - emit_andi(r_A, VLAN_VID_MASK, r_A); + if (code != (BPF_ANC | SKF_AD_VLAN_TAG)) { + emit_alu_K(SRL, 12); + emit_andi(r_A, 1, r_A); } else { - emit_loadimm(VLAN_TAG_PRESENT, r_TMP); + emit_loadimm(~VLAN_TAG_PRESENT, r_TMP); emit_and(r_A, r_TMP, r_A); } break; @@ -630,15 +637,19 @@ void bpf_jit_compile(struct bpf_prog *fp) emit_loadimm(K, r_X); break; case BPF_LD | BPF_MEM: + seen |= SEEN_MEM; emit_ldmem(K * 4, r_A); break; case BPF_LDX | BPF_MEM: + seen |= SEEN_MEM | SEEN_XREG; emit_ldmem(K * 4, r_X); break; case BPF_ST: + seen |= SEEN_MEM; emit_stmem(K * 4, r_A); break; case BPF_STX: + seen |= SEEN_MEM | SEEN_XREG; emit_stmem(K * 4, r_X); break; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 778178f4c7d1..e4b1f431c7ed 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,6 +23,7 @@ config X86 def_bool y select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_FAST_MULTIPLIER select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select HAVE_AOUT if X86_32 @@ -136,6 +137,7 @@ config X86 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI select ACPI_LEGACY_TABLES_LOOKUP if ACPI + select X86_FEATURE_NAMES if PROC_FS config INSTRUCTION_DECODER def_bool y @@ -313,6 +315,17 @@ config SMP If you don't know what to do here, say N. +config X86_FEATURE_NAMES + bool "Processor feature human-readable names" if EMBEDDED + default y + ---help--- + This option compiles in a table of x86 feature bits and corresponding + names. This is required to support /proc/cpuinfo and a few kernel + messages. You can disable this to save space, at the expense of + making those few kernel messages show numeric feature bits instead. + + If in doubt, say Y. + config X86_X2APIC bool "Support x2apic" depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 60087ca37679..5692d6ac0f18 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -253,12 +253,6 @@ archclean: $(Q)$(MAKE) $(clean)=arch/x86/tools $(Q)$(MAKE) $(clean)=arch/x86/purgatory -PHONY += kvmconfig -kvmconfig: - $(if $(wildcard $(objtree)/.config),, $(error You need an existing .config for this target)) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config $(srctree)/arch/x86/configs/kvm_guest.config - $(Q)yes "" | $(MAKE) -f $(srctree)/Makefile oldconfig - define archhelp echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' echo ' install - Install kernel using' @@ -272,5 +266,4 @@ define archhelp echo ' bzdisk/fdimage*/isoimage also accept:' echo ' FDARGS="..." arguments for the booted kernel' echo ' FDINITRD=file initrd for the booted kernel' - echo ' kvmconfig - Enable additional options for guest kernel support' endef diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index dbe8dd2fe247..5b016e2498f3 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -35,19 +35,22 @@ setup-y += video-vesa.o setup-y += video-bios.o targets += $(setup-y) -hostprogs-y := mkcpustr tools/build +hostprogs-y := tools/build +hostprogs-$(CONFIG_X86_FEATURE_NAMES) += mkcpustr HOST_EXTRACFLAGS += -I$(srctree)/tools/include \ -include include/generated/autoconf.h \ -D__EXPORTED_HEADERS__ +ifdef CONFIG_X86_FEATURE_NAMES $(obj)/cpu.o: $(obj)/cpustr.h quiet_cmd_cpustr = CPUSTR $@ cmd_cpustr = $(obj)/mkcpustr > $@ -targets += cpustr.h +targets += cpustr.h $(obj)/cpustr.h: $(obj)/mkcpustr FORCE $(call if_changed,cpustr) +endif # --------------------------------------------------------------------------- diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 7a801a310e37..704f58aa79cd 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -26,18 +26,18 @@ LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include -VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ - $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ - $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o +vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ + $(obj)/string.o $(obj)/cmdline.o \ + $(obj)/piggy.o $(obj)/cpuflags.o + +vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o +vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone -ifeq ($(CONFIG_EFI_STUB), y) - VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ - $(objtree)/drivers/firmware/efi/libstub/lib.a -endif +vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o -$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) FORCE $(call if_changed,ld) @: @@ -45,7 +45,7 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) -targets += $(patsubst $(obj)/%,%,$(VMLINUX_OBJS)) vmlinux.bin.all vmlinux.relocs +targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs CMD_RELOCS = arch/x86/tools/relocs quiet_cmd_relocs = RELOCS $@ diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index fc6091abedb7..7c68808edeb7 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -1,6 +1,5 @@ #include "misc.h" -#ifdef CONFIG_RANDOMIZE_BASE #include <asm/msr.h> #include <asm/archrandom.h> #include <asm/e820.h> @@ -183,12 +182,27 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, static bool mem_avoid_overlap(struct mem_vector *img) { int i; + struct setup_data *ptr; for (i = 0; i < MEM_AVOID_MAX; i++) { if (mem_overlaps(img, &mem_avoid[i])) return true; } + /* Avoid all entries in the setup_data linked list. */ + ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data; + while (ptr) { + struct mem_vector avoid; + + avoid.start = (u64)ptr; + avoid.size = sizeof(*ptr) + ptr->len; + + if (mem_overlaps(img, &avoid)) + return true; + + ptr = (struct setup_data *)(unsigned long)ptr->next; + } + return false; } @@ -320,5 +334,3 @@ unsigned char *choose_kernel_location(unsigned char *input, out: return (unsigned char *)choice; } - -#endif /* CONFIG_RANDOMIZE_BASE */ diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c index d3d003cb5481..261e81fb9582 100644 --- a/arch/x86/boot/compressed/early_serial_console.c +++ b/arch/x86/boot/compressed/early_serial_console.c @@ -1,9 +1,5 @@ #include "misc.h" -#ifdef CONFIG_EARLY_PRINTK - int early_serial_base; #include "../early_serial_console.c" - -#endif diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index f277184e2ac1..de8eebd6f67c 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -19,7 +19,10 @@ static efi_system_table_t *sys_table; -struct efi_config *efi_early; +static struct efi_config *efi_early; + +#define efi_call_early(f, ...) \ + efi_early->call(efi_early->f, __VA_ARGS__); #define BOOT_SERVICES(bits) \ static void setup_boot_services##bits(struct efi_config *c) \ @@ -265,21 +268,25 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) offset = offsetof(typeof(*out), output_string); output_string = efi_early->text_output + offset; + out = (typeof(out))(unsigned long)efi_early->text_output; func = (u64 *)output_string; - efi_early->call(*func, efi_early->text_output, str); + efi_early->call(*func, out, str); } else { struct efi_simple_text_output_protocol_32 *out; u32 *func; offset = offsetof(typeof(*out), output_string); output_string = efi_early->text_output + offset; + out = (typeof(out))(unsigned long)efi_early->text_output; func = (u32 *)output_string; - efi_early->call(*func, efi_early->text_output, str); + efi_early->call(*func, out, str); } } +#include "../../../../drivers/firmware/efi/libstub/efi-stub-helper.c" + static void find_bits(unsigned long mask, u8 *pos, u8 *size) { u8 first, len; @@ -360,7 +367,7 @@ free_struct: return status; } -static efi_status_t +static void setup_efi_pci32(struct boot_params *params, void **pci_handle, unsigned long size) { @@ -403,8 +410,6 @@ setup_efi_pci32(struct boot_params *params, void **pci_handle, data = (struct setup_data *)rom; } - - return status; } static efi_status_t @@ -463,7 +468,7 @@ free_struct: } -static efi_status_t +static void setup_efi_pci64(struct boot_params *params, void **pci_handle, unsigned long size) { @@ -506,11 +511,18 @@ setup_efi_pci64(struct boot_params *params, void **pci_handle, data = (struct setup_data *)rom; } - - return status; } -static efi_status_t setup_efi_pci(struct boot_params *params) +/* + * There's no way to return an informative status from this function, + * because any analysis (and printing of error messages) needs to be + * done directly at the EFI function call-site. + * + * For example, EFI_INVALID_PARAMETER could indicate a bug or maybe we + * just didn't find any PCI devices, but there's no way to tell outside + * the context of the call. + */ +static void setup_efi_pci(struct boot_params *params) { efi_status_t status; void **pci_handle = NULL; @@ -527,7 +539,7 @@ static efi_status_t setup_efi_pci(struct boot_params *params) size, (void **)&pci_handle); if (status != EFI_SUCCESS) - return status; + return; status = efi_call_early(locate_handle, EFI_LOCATE_BY_PROTOCOL, &pci_proto, @@ -538,13 +550,12 @@ static efi_status_t setup_efi_pci(struct boot_params *params) goto free_handle; if (efi_early->is64) - status = setup_efi_pci64(params, pci_handle, size); + setup_efi_pci64(params, pci_handle, size); else - status = setup_efi_pci32(params, pci_handle, size); + setup_efi_pci32(params, pci_handle, size); free_handle: efi_call_early(free_pool, pci_handle); - return status; } static void @@ -1032,7 +1043,6 @@ struct boot_params *make_boot_params(struct efi_config *c) int i; unsigned long ramdisk_addr; unsigned long ramdisk_size; - unsigned long initrd_addr_max; efi_early = c; sys_table = (efi_system_table_t *)(unsigned long)efi_early->table; @@ -1095,15 +1105,20 @@ struct boot_params *make_boot_params(struct efi_config *c) memset(sdt, 0, sizeof(*sdt)); - if (hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) - initrd_addr_max = -1UL; - else - initrd_addr_max = hdr->initrd_addr_max; - status = handle_cmdline_files(sys_table, image, (char *)(unsigned long)hdr->cmd_line_ptr, - "initrd=", initrd_addr_max, + "initrd=", hdr->initrd_addr_max, &ramdisk_addr, &ramdisk_size); + + if (status != EFI_SUCCESS && + hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) { + efi_printk(sys_table, "Trying to load files to higher address\n"); + status = handle_cmdline_files(sys_table, image, + (char *)(unsigned long)hdr->cmd_line_ptr, + "initrd=", -1UL, + &ramdisk_addr, &ramdisk_size); + } + if (status != EFI_SUCCESS) goto fail2; hdr->ramdisk_image = ramdisk_addr & 0xffffffff; @@ -1376,10 +1391,7 @@ struct boot_params *efi_main(struct efi_config *c, setup_graphics(boot_params); - status = setup_efi_pci(boot_params); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "setup_efi_pci() failed!\n"); - } + setup_efi_pci(boot_params); status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*gdt), (void **)&gdt); diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index d487e727f1ec..c88c31ecad12 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -103,4 +103,20 @@ struct efi_uga_draw_protocol { void *blt; }; +struct efi_config { + u64 image_handle; + u64 table; + u64 allocate_pool; + u64 allocate_pages; + u64 get_memory_map; + u64 free_pool; + u64 free_pages; + u64 locate_handle; + u64 handle_protocol; + u64 exit_boot_services; + u64 text_output; + efi_status_t (*call)(unsigned long, ...); + bool is64; +} __packed; + #endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 6ec6bb6e9957..29207f69ae8c 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -16,7 +16,9 @@ */ #include "boot.h" +#ifdef CONFIG_X86_FEATURE_NAMES #include "cpustr.h" +#endif static char *cpu_name(int level) { @@ -32,11 +34,48 @@ static char *cpu_name(int level) } } +static void show_cap_strs(u32 *err_flags) +{ + int i, j; +#ifdef CONFIG_X86_FEATURE_NAMES + const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs; + for (i = 0; i < NCAPINTS; i++) { + u32 e = err_flags[i]; + for (j = 0; j < 32; j++) { + if (msg_strs[0] < i || + (msg_strs[0] == i && msg_strs[1] < j)) { + /* Skip to the next string */ + msg_strs += 2; + while (*msg_strs++) + ; + } + if (e & 1) { + if (msg_strs[0] == i && + msg_strs[1] == j && + msg_strs[2]) + printf("%s ", msg_strs+2); + else + printf("%d:%d ", i, j); + } + e >>= 1; + } + } +#else + for (i = 0; i < NCAPINTS; i++) { + u32 e = err_flags[i]; + for (j = 0; j < 32; j++) { + if (e & 1) + printf("%d:%d ", i, j); + e >>= 1; + } + } +#endif +} + int validate_cpu(void) { u32 *err_flags; int cpu_level, req_level; - const unsigned char *msg_strs; check_cpu(&cpu_level, &req_level, &err_flags); @@ -49,34 +88,9 @@ int validate_cpu(void) } if (err_flags) { - int i, j; puts("This kernel requires the following features " "not present on the CPU:\n"); - - msg_strs = (const unsigned char *)x86_cap_strs; - - for (i = 0; i < NCAPINTS; i++) { - u32 e = err_flags[i]; - - for (j = 0; j < 32; j++) { - if (msg_strs[0] < i || - (msg_strs[0] == i && msg_strs[1] < j)) { - /* Skip to the next string */ - msg_strs += 2; - while (*msg_strs++) - ; - } - if (e & 1) { - if (msg_strs[0] == i && - msg_strs[1] == j && - msg_strs[2]) - printf("%s ", msg_strs+2); - else - printf("%d:%d ", i, j); - } - e >>= 1; - } - } + show_cap_strs(err_flags); putchar('\n'); return -1; } else { diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config new file mode 100644 index 000000000000..4e2ecfa23c15 --- /dev/null +++ b/arch/x86/configs/tiny.config @@ -0,0 +1 @@ +CONFIG_NOHIGHMEM=y diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 888950f29fd9..a7ccd57f19e4 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -481,7 +481,7 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx, crypto_inc(ctrblk, AES_BLOCK_SIZE); } -#ifdef CONFIG_AS_AVX +#if 0 /* temporary disabled due to failing crypto tests */ static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv) { @@ -1522,7 +1522,7 @@ static int __init aesni_init(void) aesni_gcm_dec_tfm = aesni_gcm_dec; } aesni_ctr_enc_tfm = aesni_ctr_enc; -#ifdef CONFIG_AS_AVX +#if 0 /* temporary disabled due to failing crypto tests */ if (cpu_has_avx) { /* optimize performance of ctr mode encryption transform */ aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index afcd35d331de..cfe3b954d5e4 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -497,8 +497,6 @@ static __always_inline int fls64(__u64 x) #include <asm-generic/bitops/sched.h> -#define ARCH_HAS_FAST_MULTIPLIER 1 - #include <asm/arch_hweight.h> #include <asm-generic/bitops/const_hweight.h> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bb9b258d60e7..094292a63e74 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -202,6 +202,7 @@ #define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */ #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ @@ -250,8 +251,15 @@ #include <asm/asm.h> #include <linux/bitops.h> +#ifdef CONFIG_X86_FEATURE_NAMES extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; +#define X86_CAP_FMT "%s" +#define x86_cap_flag(flag) x86_cap_flags[flag] +#else +#define X86_CAP_FMT "%d:%d" +#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31) +#endif /* * In order to save room, we index into this array by doing diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 044a2fd3c5fe..0ec241ede5a2 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -159,30 +159,6 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( } #endif /* CONFIG_EFI_MIXED */ - -/* arch specific definitions used by the stub code */ - -struct efi_config { - u64 image_handle; - u64 table; - u64 allocate_pool; - u64 allocate_pages; - u64 get_memory_map; - u64 free_pool; - u64 free_pages; - u64 locate_handle; - u64 handle_protocol; - u64 exit_boot_services; - u64 text_output; - efi_status_t (*call)(unsigned long, ...); - bool is64; -} __packed; - -extern struct efi_config *efi_early; - -#define efi_call_early(f, ...) \ - efi_early->call(efi_early->f, __VA_ARGS__); - extern bool efi_reboot_required(void); #else diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index b0910f97a3ea..ffb1733ac91f 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -106,14 +106,14 @@ enum fixed_addresses { __end_of_permanent_fixed_addresses, /* - * 256 temporary boot-time mappings, used by early_ioremap(), + * 512 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. * - * If necessary we round it up to the next 256 pages boundary so + * If necessary we round it up to the next 512 pages boundary so * that we can have a single pgd entry and a single pte table: */ #define NR_FIX_BTMAPS 64 -#define FIX_BTMAPS_SLOTS 4 +#define FIX_BTMAPS_SLOTS 8 #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) FIX_BTMAP_END = (__end_of_permanent_fixed_addresses ^ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 478c490f3654..1733ab49ac5e 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -239,6 +239,7 @@ static inline int mp_find_ioapic(u32 gsi) { return 0; } static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } static inline void mp_unmap_irq(int irq) { } +static inline bool mp_should_keep_irq(struct device *dev) { return 1; } static inline int save_ioapic_entries(void) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7c492ed9087b..7d603a71ab3a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm; -struct kvm_async_pf; - enum kvm_reg { VCPU_REGS_RAX = 0, VCPU_REGS_RCX = 1, @@ -266,7 +262,8 @@ struct kvm_mmu { struct x86_exception *fault); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, struct x86_exception *exception); - gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); + gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); @@ -481,6 +478,7 @@ struct kvm_vcpu_arch { u64 mmio_gva; unsigned access; gfn_t mmio_gfn; + u64 mmio_gen; struct kvm_pmu pmu; @@ -576,11 +574,10 @@ struct kvm_arch { struct kvm_apic_map *apic_map; unsigned int tss_addr; - struct page *apic_access_page; + bool apic_access_page_done; gpa_t wall_clock; - struct page *ept_identity_pagetable; bool ept_identity_pagetable_done; gpa_t ept_identity_map_addr; @@ -665,8 +662,8 @@ struct msr_data { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - int (*hardware_enable)(void *dummy); - void (*hardware_disable)(void *dummy); + int (*hardware_enable)(void); + void (*hardware_disable)(void); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ @@ -710,7 +707,6 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - void (*fpu_activate)(struct kvm_vcpu *vcpu); void (*fpu_deactivate)(struct kvm_vcpu *vcpu); void (*tlb_flush)(struct kvm_vcpu *vcpu); @@ -740,6 +736,7 @@ struct kvm_x86_ops { void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); @@ -772,6 +769,8 @@ struct kvm_x86_ops { bool (*mpx_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + + void (*sched_in)(struct kvm_vcpu *kvm, int cpu); }; struct kvm_arch_async_pf { @@ -895,7 +894,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); -void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); static inline int __kvm_irq_line_state(unsigned long *irq_state, @@ -917,7 +915,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu); int fx_init(struct kvm_vcpu *vcpu); -void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); @@ -926,7 +923,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); +gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, @@ -946,7 +944,8 @@ void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); void kvm_enable_tdp(void); void kvm_disable_tdp(void); -static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) +static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception) { return gpa; } @@ -1037,7 +1036,7 @@ asmlinkage void kvm_spurious_fault(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -int kvm_age_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); @@ -1046,6 +1045,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu); +void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); +void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address); void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c7678e43465b..e62cf897f781 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -2,6 +2,7 @@ #define _ASM_X86_KVM_PARA_H #include <asm/processor.h> +#include <asm/alternative.h> #include <uapi/asm/kvm_para.h> extern void kvmclock_init(void); @@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -/* This instruction is vmcall. On non-VT architectures, it will generate a - * trap that we will then rewrite to the appropriate instruction. +#ifdef CONFIG_DEBUG_RODATA +#define KVM_HYPERCALL \ + ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) +#else +/* On AMD processors, vmcall will generate a trap that we will + * then rewrite to the appropriate instruction. */ #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" +#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 5be9063545d2..3874693c0e53 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -19,6 +19,7 @@ extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; +extern pte_t level1_fixmap_pgt[512]; extern pgd_t init_level4_pgt[]; #define swapper_pg_dir init_level4_pgt diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 7fd54f09b011..77dcab277710 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -13,10 +13,13 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o scattered.o topology.o -obj-y += proc.o capflags.o powerflags.o common.o +obj-y += common.o obj-y += rdrand.o obj-y += match.o +obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o + obj-$(CONFIG_X86_32) += bugs.o obj-$(CONFIG_X86_64) += bugs_64.o @@ -48,6 +51,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o +ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ @@ -56,3 +60,4 @@ cpufeature = $(src)/../../include/asm/cpufeature.h targets += capflags.c $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) +endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 60e5497681f5..813d29d00a17 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c) } #endif + /* + * This is only needed to tell the kernel whether to use VMCALL + * and VMMCALL. VMMCALL is never executed except under virt, so + * we can set it unconditionally. + */ + set_cpu_cap(c, X86_FEATURE_VMMCALL); + /* F16h erratum 793, CVE-2013-6885 */ if (c->x86 == 0x16 && c->x86_model <= 0xf) msr_set_bit(MSR_AMD64_LS_CFG, 15); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e4ab2b42bd6f..c649f236e288 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -346,8 +346,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) continue; printk(KERN_WARNING - "CPU: CPU feature %s disabled, no CPUID level 0x%x\n", - x86_cap_flags[df->feature], df->level); + "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", + x86_cap_flag(df->feature), df->level); } } diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index f304773285ae..f1314d0bcf0a 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -338,8 +338,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) * a relative jump. */ rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; - if (abs(rel) > 0x7fffffff) + if (abs(rel) > 0x7fffffff) { + __arch_remove_optimized_kprobe(op, 0); return -ERANGE; + } buf = (u8 *)op->optinsn.insn; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2d872e08fab9..42a2dca984b3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1284,6 +1284,9 @@ static void remove_siblinginfo(int cpu) for_each_cpu(sibling, cpu_sibling_mask(cpu)) cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); + for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) + cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); + cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(cpu_sibling_mask(cpu)); cpumask_clear(cpu_core_mask(cpu)); c->phys_proc_id = 0; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 38a0afe83c6b..976e3a57f9ea 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -53,14 +53,14 @@ u64 kvm_supported_xcr0(void) return xcr0; } -void kvm_update_cpuid(struct kvm_vcpu *vcpu) +int kvm_update_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; struct kvm_lapic *apic = vcpu->arch.apic; best = kvm_find_cpuid_entry(vcpu, 1, 0); if (!best) - return; + return 0; /* Update OSXSAVE bit */ if (cpu_has_xsave && best->function == 0x1) { @@ -88,7 +88,17 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu) xstate_required_size(vcpu->arch.xcr0); } + /* + * The existing code assumes virtual address is 48-bit in the canonical + * address checks; exit if it is ever changed. + */ + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && ((best->eax & 0xff00) >> 8) != 48 && + ((best->eax & 0xff00) >> 8) != 0) + return -EINVAL; + kvm_pmu_cpuid_update(vcpu); + return 0; } static int is_efer_nx(void) @@ -112,8 +122,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) break; } } - if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { - entry->edx &= ~(1 << 20); + if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) { + entry->edx &= ~bit(X86_FEATURE_NX); printk(KERN_INFO "kvm: guest NX capability removed\n"); } } @@ -151,10 +161,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, } vcpu->arch.cpuid_nent = cpuid->nent; cpuid_fix_nx_cap(vcpu); - r = 0; kvm_apic_set_version(vcpu); kvm_x86_ops->cpuid_update(vcpu); - kvm_update_cpuid(vcpu); + r = kvm_update_cpuid(vcpu); out_free: vfree(cpuid_entries); @@ -178,9 +187,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, vcpu->arch.cpuid_nent = cpuid->nent; kvm_apic_set_version(vcpu); kvm_x86_ops->cpuid_update(vcpu); - kvm_update_cpuid(vcpu); - return 0; - + r = kvm_update_cpuid(vcpu); out: return r; } @@ -767,6 +774,12 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) if (!best) best = check_cpuid_limit(vcpu, function, index); + /* + * Perfmon not yet supported for L2 guest. + */ + if (is_guest_mode(vcpu) && function == 0xa) + best = NULL; + if (best) { *eax = best->eax; *ebx = best->ebx; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index a5380590ab0e..4452eedfaedd 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -3,7 +3,7 @@ #include "x86.h" -void kvm_update_cpuid(struct kvm_vcpu *vcpu); +int kvm_update_cpuid(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, @@ -88,6 +88,14 @@ static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu) return best && (best->ecx & bit(X86_FEATURE_X2APIC)); } +static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0, 0); + return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx; +} + static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 03954f7900f5..a46207a05835 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -527,6 +527,7 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, u32 error, bool valid) { + WARN_ON(vec > 0x1f); ctxt->exception.vector = vec; ctxt->exception.error_code = error; ctxt->exception.error_code_valid = valid; @@ -1468,7 +1469,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, return ret; err_code = selector & 0xfffc; - err_vec = GP_VECTOR; + err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR; /* can't load system descriptor into segment selector */ if (seg <= VCPU_SREG_GS && !seg_desc.s) @@ -1503,6 +1504,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (rpl > cpl || dpl != cpl) goto exception; } + /* in long-mode d/b must be clear if l is set */ + if (seg_desc.d && seg_desc.l) { + u64 efer = 0; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (efer & EFER_LMA) + goto exception; + } + /* CS(RPL) <- CPL */ selector = (selector & 0xfffc) | cpl; break; @@ -1549,8 +1559,7 @@ load: ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); return X86EMUL_CONTINUE; exception: - emulate_exception(ctxt, err_vec, err_code, true); - return X86EMUL_PROPAGATE_FAULT; + return emulate_exception(ctxt, err_vec, err_code, true); } static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, @@ -2723,8 +2732,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, if (!next_tss_desc.p || ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || desc_limit < 0x2b)) { - emulate_ts(ctxt, tss_selector & 0xfffc); - return X86EMUL_PROPAGATE_FAULT; + return emulate_ts(ctxt, tss_selector & 0xfffc); } if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { @@ -3016,7 +3024,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) ctxt->dst.val = swab64(ctxt->src.val); break; default: - return X86EMUL_PROPAGATE_FAULT; + BUG(); } return X86EMUL_CONTINUE; } @@ -3140,12 +3148,8 @@ static int em_clts(struct x86_emulate_ctxt *ctxt) static int em_vmcall(struct x86_emulate_ctxt *ctxt) { - int rc; - - if (ctxt->modrm_mod != 3 || ctxt->modrm_rm != 1) - return X86EMUL_UNHANDLEABLE; + int rc = ctxt->ops->fix_hypercall(ctxt); - rc = ctxt->ops->fix_hypercall(ctxt); if (rc != X86EMUL_CONTINUE) return rc; @@ -3563,6 +3567,12 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) +static const struct opcode group7_rm0[] = { + N, + I(SrcNone | Priv | EmulateOnUD, em_vmcall), + N, N, N, N, N, N, +}; + static const struct opcode group7_rm1[] = { DI(SrcNone | Priv, monitor), DI(SrcNone | Priv, mwait), @@ -3656,7 +3666,7 @@ static const struct group_dual group7 = { { II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), }, { - I(SrcNone | Priv | EmulateOnUD, em_vmcall), + EXT(0, group7_rm0), EXT(0, group7_rm1), N, EXT(0, group7_rm3), II(SrcNone | DstMem | Mov, em_smsw, smsw), N, @@ -3687,14 +3697,18 @@ static const struct gprefix pfx_0f_6f_0f_7f = { I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), }; -static const struct gprefix pfx_vmovntpx = { - I(0, em_mov), N, N, N, +static const struct gprefix pfx_0f_2b = { + I(0, em_mov), I(0, em_mov), N, N, }; static const struct gprefix pfx_0f_28_0f_29 = { I(Aligned, em_mov), I(Aligned, em_mov), N, N, }; +static const struct gprefix pfx_0f_e7 = { + N, I(Sse, em_mov), N, N, +}; + static const struct escape escape_d9 = { { N, N, N, N, N, N, N, I(DstMem, em_fnstcw), }, { @@ -3901,7 +3915,7 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29), GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29), - N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx), + N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b), N, N, N, N, /* 0x30 - 0x3F */ II(ImplicitOps | Priv, em_wrmsr, wrmsr), @@ -3965,7 +3979,8 @@ static const struct opcode twobyte_table[256] = { /* 0xD0 - 0xDF */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0xE0 - 0xEF */ - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7), + N, N, N, N, N, N, N, N, /* 0xF0 - 0xFF */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N }; @@ -4829,8 +4844,10 @@ writeback: ctxt->eip = ctxt->_eip; done: - if (rc == X86EMUL_PROPAGATE_FAULT) + if (rc == X86EMUL_PROPAGATE_FAULT) { + WARN_ON(ctxt->exception.vector > 0x1f); ctxt->have_exception = true; + } if (rc == X86EMUL_INTERCEPTED) return EMULATION_INTERCEPTED; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 08e8a899e005..b8345dd41b25 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -112,17 +112,6 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap) struct static_key_deferred apic_hw_disabled __read_mostly; struct static_key_deferred apic_sw_disabled __read_mostly; -static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) -{ - if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { - if (val & APIC_SPIV_APIC_ENABLED) - static_key_slow_dec_deferred(&apic_sw_disabled); - else - static_key_slow_inc(&apic_sw_disabled.key); - } - apic_set_reg(apic, APIC_SPIV, val); -} - static inline int apic_enabled(struct kvm_lapic *apic) { return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); @@ -210,6 +199,20 @@ out: kvm_vcpu_request_scan_ioapic(kvm); } +static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) +{ + u32 prev = kvm_apic_get_reg(apic, APIC_SPIV); + + apic_set_reg(apic, APIC_SPIV, val); + if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) { + if (val & APIC_SPIV_APIC_ENABLED) { + static_key_slow_dec_deferred(&apic_sw_disabled); + recalculate_apic_map(apic->vcpu->kvm); + } else + static_key_slow_inc(&apic_sw_disabled.key); + } +} + static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) { apic_set_reg(apic, APIC_ID, id << 24); @@ -706,6 +709,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int result = 0; struct kvm_vcpu *vcpu = apic->vcpu; + trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, + trig_mode, vector); switch (delivery_mode) { case APIC_DM_LOWEST: vcpu->arch.apic_arb_prio++; @@ -727,8 +732,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } - trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, - trig_mode, vector, false); break; case APIC_DM_REMRD: @@ -1352,6 +1355,9 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) return; hrtimer_cancel(&apic->lapic_timer.timer); + /* Inject here so clearing tscdeadline won't override new value */ + if (apic_has_pending_timer(vcpu)) + kvm_inject_apic_timer_irqs(vcpu); apic->lapic_timer.tscdeadline = data; start_apic_timer(apic); } @@ -1639,6 +1645,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) if (atomic_read(&apic->lapic_timer.pending) > 0) { kvm_apic_local_deliver(apic, APIC_LVTT); + if (apic_lvtt_tscdeadline(apic)) + apic->lapic_timer.tscdeadline = 0; atomic_set(&apic->lapic_timer.pending, 0); } } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 931467881da7..3201e93ebd07 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -199,16 +199,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); /* - * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number, - * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation - * number. + * the low bit of the generation number is always presumed to be zero. + * This disables mmio caching during memslot updates. The concept is + * similar to a seqcount but instead of retrying the access we just punt + * and ignore the cache. + * + * spte bits 3-11 are used as bits 1-9 of the generation number, + * the bits 52-61 are used as bits 10-19 of the generation number. */ -#define MMIO_SPTE_GEN_LOW_SHIFT 3 +#define MMIO_SPTE_GEN_LOW_SHIFT 2 #define MMIO_SPTE_GEN_HIGH_SHIFT 52 -#define MMIO_GEN_SHIFT 19 -#define MMIO_GEN_LOW_SHIFT 9 -#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) +#define MMIO_GEN_SHIFT 20 +#define MMIO_GEN_LOW_SHIFT 10 +#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) #define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) @@ -236,12 +240,7 @@ static unsigned int get_mmio_spte_generation(u64 spte) static unsigned int kvm_current_mmio_generation(struct kvm *kvm) { - /* - * Init kvm generation close to MMIO_MAX_GEN to easily test the - * code of handling generation number wrap-around. - */ - return (kvm_memslots(kvm)->generation + - MMIO_MAX_GEN - 150) & MMIO_GEN_MASK; + return kvm_memslots(kvm)->generation & MMIO_GEN_MASK; } static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, @@ -296,11 +295,6 @@ static bool check_mmio_spte(struct kvm *kvm, u64 spte) return likely(kvm_gen == spte_gen); } -static inline u64 rsvd_bits(int s, int e) -{ - return ((1ULL << (e - s + 1)) - 1) << s; -} - void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask) { @@ -1180,7 +1174,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) * Write-protect on the specified @sptep, @pt_protect indicates whether * spte write-protection is caused by protecting shadow page table. * - * Note: write protection is difference between drity logging and spte + * Note: write protection is difference between dirty logging and spte * protection: * - for dirty logging, the spte can be set to writable at anytime if * its dirty bitmap is properly set. @@ -1268,7 +1262,8 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1276,7 +1271,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, while ((sptep = rmap_get_first(*rmapp, &iter))) { BUG_ON(!(*sptep & PT_PRESENT_MASK)); - rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n", + sptep, *sptep, gfn, level); drop_spte(kvm, sptep); need_tlb_flush = 1; @@ -1286,7 +1282,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, } static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1300,7 +1297,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { BUG_ON(!is_shadow_present_pte(*sptep)); - rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep); + rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", + sptep, *sptep, gfn, level); need_flush = 1; @@ -1334,6 +1332,8 @@ static int kvm_handle_hva_range(struct kvm *kvm, int (*handler)(struct kvm *kvm, unsigned long *rmapp, struct kvm_memory_slot *slot, + gfn_t gfn, + int level, unsigned long data)) { int j; @@ -1363,6 +1363,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { unsigned long idx, idx_end; unsigned long *rmapp; + gfn_t gfn = gfn_start; /* * {idx(page_j) | page_j intersects with @@ -1373,8 +1374,10 @@ static int kvm_handle_hva_range(struct kvm *kvm, rmapp = __gfn_to_rmap(gfn_start, j, memslot); - for (; idx <= idx_end; ++idx) - ret |= handler(kvm, rmapp++, memslot, data); + for (; idx <= idx_end; + ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j))) + ret |= handler(kvm, rmapp++, memslot, + gfn, j, data); } } @@ -1385,6 +1388,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, unsigned long data, int (*handler)(struct kvm *kvm, unsigned long *rmapp, struct kvm_memory_slot *slot, + gfn_t gfn, int level, unsigned long data)) { return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); @@ -1406,24 +1410,14 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) } static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) { u64 *sptep; struct rmap_iterator uninitialized_var(iter); int young = 0; - /* - * In case of absence of EPT Access and Dirty Bits supports, - * emulate the accessed bit for EPT, by checking if this page has - * an EPT mapping, and clearing it if it does. On the next access, - * a new EPT mapping will be established. - * This has some overhead, but not as much as the cost of swapping - * out actively used pages or breaking up actively used hugepages. - */ - if (!shadow_accessed_mask) { - young = kvm_unmap_rmapp(kvm, rmapp, slot, data); - goto out; - } + BUG_ON(!shadow_accessed_mask); for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { @@ -1435,14 +1429,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, (unsigned long *)sptep); } } -out: - /* @data has hva passed to kvm_age_hva(). */ - trace_kvm_age_page(data, slot, young); + trace_kvm_age_page(gfn, level, slot, young); return young; } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, + int level, unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1480,13 +1473,33 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); + kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0); kvm_flush_remote_tlbs(vcpu->kvm); } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp); + /* + * In case of absence of EPT Access and Dirty Bits supports, + * emulate the accessed bit for EPT, by checking if this page has + * an EPT mapping, and clearing it if it does. On the next access, + * a new EPT mapping will be established. + * This has some overhead, but not as much as the cost of swapping + * out actively used pages or breaking up actively used hugepages. + */ + if (!shadow_accessed_mask) { + /* + * We are holding the kvm->mmu_lock, and we are blowing up + * shadow PTEs. MMU notifier consumers need to be kept at bay. + * This is correct as long as we don't decouple the mmu_lock + * protected regions (like invalidate_range_start|end does). + */ + kvm->mmu_notifier_seq++; + return kvm_handle_hva_range(kvm, start, end, 0, + kvm_unmap_rmapp); + } + + return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) @@ -1749,7 +1762,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return 1; } - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -1802,7 +1815,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); if (flush) - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } struct mmu_page_path { @@ -2536,7 +2549,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, true, host_writable)) { if (write_fault) *emulate = 1; - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } if (unlikely(is_mmio_spte(*sptep) && emulate)) @@ -3163,7 +3176,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - vcpu_clear_mmio_info(vcpu, ~0ul); + vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; @@ -3206,7 +3219,7 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, { if (exception) exception->error_code = 0; - return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); + return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception); } static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) @@ -3450,13 +3463,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, context->nx = false; } -void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) -{ - ++vcpu->stat.tlb_flush; - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); -} -EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); - void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) { mmu_free_roots(vcpu); @@ -3518,6 +3524,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int maxphyaddr = cpuid_maxphyaddr(vcpu); u64 exb_bit_rsvd = 0; u64 gbpages_bit_rsvd = 0; + u64 nonleaf_bit8_rsvd = 0; context->bad_mt_xwr = 0; @@ -3525,6 +3532,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, exb_bit_rsvd = rsvd_bits(63, 63); if (!guest_cpuid_has_gbpages(vcpu)) gbpages_bit_rsvd = rsvd_bits(7, 7); + + /* + * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for + * leaf entries) on AMD CPUs only. + */ + if (guest_cpuid_is_amd(vcpu)) + nonleaf_bit8_rsvd = rsvd_bits(8, 8); + switch (context->root_level) { case PT32_ROOT_LEVEL: /* no rsvd bits for 2 level 4K page table entries */ @@ -3559,9 +3574,9 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, break; case PT64_ROOT_LEVEL: context->rsvd_bits_mask[0][3] = exb_bit_rsvd | - rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7); + nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); context->rsvd_bits_mask[0][2] = exb_bit_rsvd | - gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); + nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); context->rsvd_bits_mask[0][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51); context->rsvd_bits_mask[0][0] = exb_bit_rsvd | @@ -3962,7 +3977,7 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, if (remote_flush) kvm_flush_remote_tlbs(vcpu->kvm); else if (local_flush) - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, @@ -4223,7 +4238,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { vcpu->arch.mmu.invlpg(vcpu, gva); - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); ++vcpu->stat.invlpg; } EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); @@ -4433,7 +4448,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) * The very rare case: if the generation-number is round, * zap all shadow pages. */ - if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { + if (unlikely(kvm_current_mmio_generation(kvm) == 0)) { printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_invalidate_zap_all_pages(kvm); } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index b982112d2ca5..bde8ee725754 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -56,6 +56,11 @@ #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) +static inline u64 rsvd_bits(int s, int e) +{ + return ((1ULL << (e - s + 1)) - 1) << s; +} + int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 410776528265..806d58e3c320 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -298,8 +298,7 @@ retry_walk: } #endif walker->max_level = walker->level; - ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || - (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); + ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); accessed_dirty = PT_GUEST_ACCESSED_MASK; pt_access = pte_access = ACC_ALL; @@ -321,9 +320,22 @@ retry_walk: walker->pte_gpa[walker->level - 1] = pte_gpa; real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), - PFERR_USER_MASK|PFERR_WRITE_MASK); + PFERR_USER_MASK|PFERR_WRITE_MASK, + &walker->fault); + + /* + * FIXME: This can happen if emulation (for of an INS/OUTS + * instruction) triggers a nested page fault. The exit + * qualification / exit info field will incorrectly have + * "guest page access" as the nested page fault's cause, + * instead of "guest page structure access". To fix this, + * the x86_exception struct should be augmented with enough + * information to fix the exit_qualification or exit_info_1 + * fields. + */ if (unlikely(real_gfn == UNMAPPED_GVA)) - goto error; + return 0; + real_gfn = gpa_to_gfn(real_gfn); host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, @@ -364,7 +376,7 @@ retry_walk: if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); - real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access); + real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault); if (real_gpa == UNMAPPED_GVA) return 0; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 3dd6accb64ec..8e6b7d869d2f 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -15,6 +15,7 @@ #include <linux/types.h> #include <linux/kvm_host.h> #include <linux/perf_event.h> +#include <asm/perf_event.h> #include "x86.h" #include "cpuid.h" #include "lapic.h" @@ -463,7 +464,8 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_cpuid_entry2 *entry; - unsigned bitmap_len; + union cpuid10_eax eax; + union cpuid10_edx edx; pmu->nr_arch_gp_counters = 0; pmu->nr_arch_fixed_counters = 0; @@ -475,25 +477,27 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); if (!entry) return; + eax.full = entry->eax; + edx.full = entry->edx; - pmu->version = entry->eax & 0xff; + pmu->version = eax.split.version_id; if (!pmu->version) return; - pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, - INTEL_PMC_MAX_GENERIC); - pmu->counter_bitmask[KVM_PMC_GP] = - ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; - bitmap_len = (entry->eax >> 24) & 0xff; - pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); + pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, + INTEL_PMC_MAX_GENERIC); + pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; + pmu->available_event_types = ~entry->ebx & + ((1ull << eax.split.mask_length) - 1); if (pmu->version == 1) { pmu->nr_arch_fixed_counters = 0; } else { - pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), + pmu->nr_arch_fixed_counters = + min_t(int, edx.split.num_counters_fixed, INTEL_PMC_MAX_FIXED); pmu->counter_bitmask[KVM_PMC_FIXED] = - ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; + ((u64)1 << edx.split.bit_width_fixed) - 1; } pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ddf742768ecf..f7f6a4a157a6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -622,7 +622,7 @@ static int has_svm(void) return 1; } -static void svm_hardware_disable(void *garbage) +static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) @@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage) amd_pmu_disable_virt(); } -static int svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void) { struct svm_cpu_data *sd; @@ -1257,7 +1257,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->asid_generation = 0; init_vmcb(svm); - svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; + svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_bsp(&svm->vcpu)) svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; @@ -1974,10 +1975,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->control.exit_code = SVM_EXIT_NPF; - svm->vmcb->control.exit_code_hi = 0; - svm->vmcb->control.exit_info_1 = fault->error_code; - svm->vmcb->control.exit_info_2 = fault->address; + if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) { + /* + * TODO: track the cause of the nested page fault, and + * correctly fill in the high bits of exit_info_1. + */ + svm->vmcb->control.exit_code = SVM_EXIT_NPF; + svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_info_1 = (1ULL << 32); + svm->vmcb->control.exit_info_2 = fault->address; + } + + svm->vmcb->control.exit_info_1 &= ~0xffffffffULL; + svm->vmcb->control.exit_info_1 |= fault->error_code; + + /* + * The present bit is always zero for page structure faults on real + * hardware. + */ + if (svm->vmcb->control.exit_info_1 & (2ULL << 32)) + svm->vmcb->control.exit_info_1 &= ~1; nested_svm_vmexit(svm); } @@ -3031,7 +3048,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } -u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) +static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); return vmcb->control.tsc_offset + @@ -4305,6 +4322,10 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu) local_irq_enable(); } +static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -4349,7 +4370,6 @@ static struct kvm_x86_ops svm_x86_ops = { .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .fpu_activate = svm_fpu_activate, .fpu_deactivate = svm_fpu_deactivate, .tlb_flush = svm_flush_tlb, @@ -4406,6 +4426,8 @@ static struct kvm_x86_ops svm_x86_ops = { .check_intercept = svm_check_intercept, .handle_external_intr = svm_handle_external_intr, + + .sched_in = svm_sched_in, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index e850a7d332be..6b06ab8748dd 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -415,15 +415,14 @@ TRACE_EVENT(kvm_apic_ipi, ); TRACE_EVENT(kvm_apic_accept_irq, - TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced), - TP_ARGS(apicid, dm, tm, vec, coalesced), + TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec), + TP_ARGS(apicid, dm, tm, vec), TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) __field( __u8, tm ) __field( __u8, vec ) - __field( bool, coalesced ) ), TP_fast_assign( @@ -431,14 +430,12 @@ TRACE_EVENT(kvm_apic_accept_irq, __entry->dm = dm; __entry->tm = tm; __entry->vec = vec; - __entry->coalesced = coalesced; ), - TP_printk("apicid %x vec %u (%s|%s)%s", + TP_printk("apicid %x vec %u (%s|%s)", __entry->apicid, __entry->vec, __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), - __entry->tm ? "level" : "edge", - __entry->coalesced ? " (coalesced)" : "") + __entry->tm ? "level" : "edge") ); TRACE_EVENT(kvm_eoi, @@ -850,6 +847,36 @@ TRACE_EVENT(kvm_track_tsc, #endif /* CONFIG_X86_64 */ +TRACE_EVENT(kvm_ple_window, + TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), + TP_ARGS(grow, vcpu_id, new, old), + + TP_STRUCT__entry( + __field( bool, grow ) + __field( unsigned int, vcpu_id ) + __field( int, new ) + __field( int, old ) + ), + + TP_fast_assign( + __entry->grow = grow; + __entry->vcpu_id = vcpu_id; + __entry->new = new; + __entry->old = old; + ), + + TP_printk("vcpu %u: ple_window %d (%s %d)", + __entry->vcpu_id, + __entry->new, + __entry->grow ? "grow" : "shrink", + __entry->old) +); + +#define trace_kvm_ple_window_grow(vcpu_id, new, old) \ + trace_kvm_ple_window(true, vcpu_id, new, old) +#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \ + trace_kvm_ple_window(false, vcpu_id, new, old) + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bfe11cf124a1..04fa1b8298c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO); * Time is measured based on a counter that runs at the same rate as the TSC, * refer SDM volume 3b section 21.6.13 & 22.1.3. */ -#define KVM_VMX_DEFAULT_PLE_GAP 128 -#define KVM_VMX_DEFAULT_PLE_WINDOW 4096 +#define KVM_VMX_DEFAULT_PLE_GAP 128 +#define KVM_VMX_DEFAULT_PLE_WINDOW 4096 +#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2 +#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0 +#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \ + INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW + static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; module_param(ple_gap, int, S_IRUGO); static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; module_param(ple_window, int, S_IRUGO); +/* Default doubles per-vcpu window every exit. */ +static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW; +module_param(ple_window_grow, int, S_IRUGO); + +/* Default resets per-vcpu window every exit to ple_window. */ +static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK; +module_param(ple_window_shrink, int, S_IRUGO); + +/* Default is to compute the maximum so we can never overflow. */ +static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; +static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; +module_param(ple_window_max, int, S_IRUGO); + extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 @@ -379,6 +397,7 @@ struct nested_vmx { * we must keep them pinned while L2 runs. */ struct page *apic_access_page; + struct page *virtual_apic_page; u64 msr_ia32_feature_control; struct hrtimer preemption_timer; @@ -484,6 +503,10 @@ struct vcpu_vmx { /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; + + /* Dynamic PLE window. */ + int ple_window; + bool ple_window_dirty; }; enum segment_cache_field { @@ -533,6 +556,7 @@ static int max_shadow_read_only_fields = ARRAY_SIZE(shadow_read_only_fields); static unsigned long shadow_read_write_fields[] = { + TPR_THRESHOLD, GUEST_RIP, GUEST_RSP, GUEST_CR0, @@ -743,6 +767,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); +static int alloc_identity_pagetable(struct kvm *kvm); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2135,7 +2160,7 @@ static u64 guest_read_tsc(void) * Like guest_read_tsc, but always returns L1's notion of the timestamp * counter, even if a nested guest (L2) is currently running. */ -u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) +static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { u64 tsc_offset; @@ -2330,7 +2355,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | - CPU_BASED_PAUSE_EXITING | + CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; /* * We can allow some features even when not supported by the @@ -2601,6 +2626,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + return 1; vmcs_write64(GUEST_IA32_PAT, data); vcpu->arch.pat = data; break; @@ -2704,7 +2731,7 @@ static void kvm_cpu_vmxon(u64 addr) : "memory", "cc"); } -static int hardware_enable(void *garbage) +static int hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2768,7 +2795,7 @@ static void kvm_cpu_vmxoff(void) asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); } -static void hardware_disable(void *garbage) +static void hardware_disable(void) { if (vmm_exclusive) { vmclear_local_loaded_vmcss(); @@ -3107,9 +3134,17 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_unrestricted_guest()) enable_unrestricted_guest = 0; - if (!cpu_has_vmx_flexpriority()) + if (!cpu_has_vmx_flexpriority()) { flexpriority_enabled = 0; + /* + * set_apic_access_page_addr() is used to reload apic access + * page upon invalidation. No need to do anything if the + * processor does not have the APIC_ACCESS_ADDR VMCS field. + */ + kvm_x86_ops->set_apic_access_page_addr = NULL; + } + if (!cpu_has_vmx_tpr_shadow()) kvm_x86_ops->update_cr8_intercept = NULL; @@ -3905,7 +3940,7 @@ static int init_rmode_tss(struct kvm *kvm) { gfn_t fn; u16 data = 0; - int r, idx, ret = 0; + int idx, r; idx = srcu_read_lock(&kvm->srcu); fn = kvm->arch.tss_addr >> PAGE_SHIFT; @@ -3927,32 +3962,32 @@ static int init_rmode_tss(struct kvm *kvm) r = kvm_write_guest_page(kvm, fn, &data, RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, sizeof(u8)); - if (r < 0) - goto out; - - ret = 1; out: srcu_read_unlock(&kvm->srcu, idx); - return ret; + return r; } static int init_rmode_identity_map(struct kvm *kvm) { - int i, idx, r, ret; + int i, idx, r = 0; pfn_t identity_map_pfn; u32 tmp; if (!enable_ept) - return 1; - if (unlikely(!kvm->arch.ept_identity_pagetable)) { - printk(KERN_ERR "EPT: identity-mapping pagetable " - "haven't been allocated!\n"); return 0; - } + + /* Protect kvm->arch.ept_identity_pagetable_done. */ + mutex_lock(&kvm->slots_lock); + if (likely(kvm->arch.ept_identity_pagetable_done)) - return 1; - ret = 0; + goto out2; + identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; + + r = alloc_identity_pagetable(kvm); + if (r < 0) + goto out2; + idx = srcu_read_lock(&kvm->srcu); r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); if (r < 0) @@ -3967,10 +4002,13 @@ static int init_rmode_identity_map(struct kvm *kvm) goto out; } kvm->arch.ept_identity_pagetable_done = true; - ret = 1; + out: srcu_read_unlock(&kvm->srcu, idx); - return ret; + +out2: + mutex_unlock(&kvm->slots_lock); + return r; } static void seg_setup(int seg) @@ -3995,23 +4033,28 @@ static int alloc_apic_access_page(struct kvm *kvm) int r = 0; mutex_lock(&kvm->slots_lock); - if (kvm->arch.apic_access_page) + if (kvm->arch.apic_access_page_done) goto out; kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; kvm_userspace_mem.flags = 0; - kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; + kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE; kvm_userspace_mem.memory_size = PAGE_SIZE; r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); if (r) goto out; - page = gfn_to_page(kvm, 0xfee00); + page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); if (is_error_page(page)) { r = -EFAULT; goto out; } - kvm->arch.apic_access_page = page; + /* + * Do not pin the page in memory, so that memory hot-unplug + * is able to migrate it. + */ + put_page(page); + kvm->arch.apic_access_page_done = true; out: mutex_unlock(&kvm->slots_lock); return r; @@ -4019,31 +4062,20 @@ out: static int alloc_identity_pagetable(struct kvm *kvm) { - struct page *page; + /* Called with kvm->slots_lock held. */ + struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; - mutex_lock(&kvm->slots_lock); - if (kvm->arch.ept_identity_pagetable) - goto out; + BUG_ON(kvm->arch.ept_identity_pagetable_done); + kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; kvm_userspace_mem.flags = 0; kvm_userspace_mem.guest_phys_addr = kvm->arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); - if (r) - goto out; - - page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); - if (is_error_page(page)) { - r = -EFAULT; - goto out; - } - kvm->arch.ept_identity_pagetable = page; -out: - mutex_unlock(&kvm->slots_lock); return r; } @@ -4402,7 +4434,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) if (ple_gap) { vmcs_write32(PLE_GAP, ple_gap); - vmcs_write32(PLE_WINDOW, ple_window); + vmx->ple_window = ple_window; + vmx->ple_window_dirty = true; } vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); @@ -4477,7 +4510,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(&vmx->vcpu, 0); - apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; + apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_bsp(&vmx->vcpu)) apic_base_msr.data |= MSR_IA32_APICBASE_BSP; apic_base_msr.host_initiated = true; @@ -4537,9 +4570,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(TPR_THRESHOLD, 0); } - if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) - vmcs_write64(APIC_ACCESS_ADDR, - page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); + kvm_vcpu_reload_apic_access_page(vcpu); if (vmx_vm_has_apicv(vcpu->kvm)) memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); @@ -4729,10 +4760,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) if (ret) return ret; kvm->arch.tss_addr = addr; - if (!init_rmode_tss(kvm)) - return -ENOMEM; - - return 0; + return init_rmode_tss(kvm); } static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) @@ -5521,17 +5549,18 @@ static u64 ept_rsvd_mask(u64 spte, int level) for (i = 51; i > boot_cpu_data.x86_phys_bits; i--) mask |= (1ULL << i); - if (level > 2) + if (level == 4) /* bits 7:3 reserved */ mask |= 0xf8; - else if (level == 2) { - if (spte & (1ULL << 7)) - /* 2MB ref, bits 20:12 reserved */ - mask |= 0x1ff000; - else - /* bits 6:3 reserved */ - mask |= 0x78; - } + else if (spte & (1ULL << 7)) + /* + * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively, + * level == 1 if the hypervisor is using the ignored bit 7. + */ + mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE; + else if (level > 1) + /* bits 6:3 reserved */ + mask |= 0x78; return mask; } @@ -5561,7 +5590,8 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, WARN_ON(1); } - if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) { + /* bits 5:3 are _not_ reserved for large page or leaf page */ + if ((rsvd_bits & 0x38) == 0) { u64 ept_mem_type = (spte & 0x38) >> 3; if (ept_mem_type == 2 || ept_mem_type == 3 || @@ -5676,12 +5706,85 @@ out: return ret; } +static int __grow_ple_window(int val) +{ + if (ple_window_grow < 1) + return ple_window; + + val = min(val, ple_window_actual_max); + + if (ple_window_grow < ple_window) + val *= ple_window_grow; + else + val += ple_window_grow; + + return val; +} + +static int __shrink_ple_window(int val, int modifier, int minimum) +{ + if (modifier < 1) + return ple_window; + + if (modifier < ple_window) + val /= modifier; + else + val -= modifier; + + return max(val, minimum); +} + +static void grow_ple_window(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int old = vmx->ple_window; + + vmx->ple_window = __grow_ple_window(old); + + if (vmx->ple_window != old) + vmx->ple_window_dirty = true; + + trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old); +} + +static void shrink_ple_window(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int old = vmx->ple_window; + + vmx->ple_window = __shrink_ple_window(old, + ple_window_shrink, ple_window); + + if (vmx->ple_window != old) + vmx->ple_window_dirty = true; + + trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old); +} + +/* + * ple_window_actual_max is computed to be one grow_ple_window() below + * ple_window_max. (See __grow_ple_window for the reason.) + * This prevents overflows, because ple_window_max is int. + * ple_window_max effectively rounded down to a multiple of ple_window_grow in + * this process. + * ple_window_max is also prevented from setting vmx->ple_window < ple_window. + */ +static void update_ple_window_actual_max(void) +{ + ple_window_actual_max = + __shrink_ple_window(max(ple_window_max, ple_window), + ple_window_grow, INT_MIN); +} + /* * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE * exiting, so only get here on cpu with PAUSE-Loop-Exiting. */ static int handle_pause(struct kvm_vcpu *vcpu) { + if (ple_gap) + grow_ple_window(vcpu); + skip_emulated_instruction(vcpu); kvm_vcpu_on_spin(vcpu); @@ -6146,7 +6249,11 @@ static void free_nested(struct vcpu_vmx *vmx) /* Unpin physical memory we referred to in current vmcs02 */ if (vmx->nested.apic_access_page) { nested_release_page(vmx->nested.apic_access_page); - vmx->nested.apic_access_page = 0; + vmx->nested.apic_access_page = NULL; + } + if (vmx->nested.virtual_apic_page) { + nested_release_page(vmx->nested.virtual_apic_page); + vmx->nested.virtual_apic_page = NULL; } nested_free_all_saved_vmcss(vmx); @@ -6617,7 +6724,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) switch (type) { case VMX_EPT_EXTENT_GLOBAL: kvm_mmu_sync_roots(vcpu); - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); nested_vmx_succeed(vcpu); break; default: @@ -6892,6 +6999,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_TASK_SWITCH: return 1; case EXIT_REASON_CPUID: + if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa) + return 0; return 1; case EXIT_REASON_HLT: return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); @@ -6936,7 +7045,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_MCE_DURING_VMENTRY: return 0; case EXIT_REASON_TPR_BELOW_THRESHOLD: - return 1; + return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); case EXIT_REASON_APIC_ACCESS: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); @@ -7057,6 +7166,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + if (is_guest_mode(vcpu) && + nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) + return; + if (irr == -1 || tpr < irr) { vmcs_write32(TPR_THRESHOLD, 0); return; @@ -7094,6 +7209,29 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) vmx_set_msr_bitmap(vcpu); } +static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* + * Currently we do not handle the nested case where L2 has an + * APIC access page of its own; that page is still pinned. + * Hence, we skip the case where the VCPU is in guest mode _and_ + * L1 prepared an APIC access page for L2. + * + * For the case where L1 and L2 share the same APIC access page + * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear + * in the vmcs12), this function will only update either the vmcs01 + * or the vmcs02. If the former, the vmcs02 will be updated by + * prepare_vmcs02. If the latter, the vmcs01 will be updated in + * the next L2->L1 exit. + */ + if (!is_guest_mode(vcpu) || + !nested_cpu_has2(vmx->nested.current_vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + vmcs_write64(APIC_ACCESS_ADDR, hpa); +} + static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) { u16 status; @@ -7387,6 +7525,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return; + if (vmx->ple_window_dirty) { + vmx->ple_window_dirty = false; + vmcs_write32(PLE_WINDOW, vmx->ple_window); + } + if (vmx->nested.sync_shadow_vmcs) { copy_vmcs12_to_shadow(vmx); vmx->nested.sync_shadow_vmcs = false; @@ -7642,10 +7785,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!kvm->arch.ept_identity_map_addr) kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; - err = -ENOMEM; - if (alloc_identity_pagetable(kvm) != 0) - goto free_vmcs; - if (!init_rmode_identity_map(kvm)) + err = init_rmode_identity_map(kvm); + if (err) goto free_vmcs; } @@ -7824,6 +7965,55 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, kvm_inject_page_fault(vcpu, fault); } +static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + /* TODO: Also verify bits beyond physical address width are 0 */ + if (!PAGE_ALIGNED(vmcs12->apic_access_addr)) + return false; + + /* + * Translate L1 physical address to host physical + * address for vmcs02. Keep the page pinned, so this + * physical address remains valid. We keep a reference + * to it so we can release it later. + */ + if (vmx->nested.apic_access_page) /* shouldn't happen */ + nested_release_page(vmx->nested.apic_access_page); + vmx->nested.apic_access_page = + nested_get_page(vcpu, vmcs12->apic_access_addr); + } + + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { + /* TODO: Also verify bits beyond physical address width are 0 */ + if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr)) + return false; + + if (vmx->nested.virtual_apic_page) /* shouldn't happen */ + nested_release_page(vmx->nested.virtual_apic_page); + vmx->nested.virtual_apic_page = + nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); + + /* + * Failing the vm entry is _not_ what the processor does + * but it's basically the only possibility we have. + * We could still enter the guest if CR8 load exits are + * enabled, CR8 store exits are enabled, and virtualize APIC + * access is disabled; in this case the processor would never + * use the TPR shadow and we could simply clear the bit from + * the execution control. But such a configuration is useless, + * so let's keep the code simple. + */ + if (!vmx->nested.virtual_apic_page) + return false; + } + + return true; +} + static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) { u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; @@ -7849,7 +8039,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) /* * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it - * with L0's requirements for its guest (a.k.a. vmsc01), so we can run the L2 + * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 * guest in a way that will both be appropriate to L1's requests, and our * needs. In addition to modifying the active vmcs (which is vmcs02), this * function also has additional necessary side-effects, like setting various @@ -7970,16 +8160,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) { /* - * Translate L1 physical address to host physical - * address for vmcs02. Keep the page pinned, so this - * physical address remains valid. We keep a reference - * to it so we can release it later. - */ - if (vmx->nested.apic_access_page) /* shouldn't happen */ - nested_release_page(vmx->nested.apic_access_page); - vmx->nested.apic_access_page = - nested_get_page(vcpu, vmcs12->apic_access_addr); - /* * If translation failed, no matter: This feature asks * to exit when accessing the given address, and if it * can never be accessed, this feature won't do @@ -7994,8 +8174,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmcs_write64(APIC_ACCESS_ADDR, - page_to_phys(vcpu->kvm->arch.apic_access_page)); + kvm_vcpu_reload_apic_access_page(vcpu); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); @@ -8024,6 +8203,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; exec_control &= ~CPU_BASED_TPR_SHADOW; exec_control |= vmcs12->cpu_based_vm_exec_control; + + if (exec_control & CPU_BASED_TPR_SHADOW) { + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, + page_to_phys(vmx->nested.virtual_apic_page)); + vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); + } + /* * Merging of IO and MSR bitmaps not currently supported. * Rather, exit every time. @@ -8185,8 +8371,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; } - if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && - !PAGE_ALIGNED(vmcs12->apic_access_addr)) { + if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { /*TODO: Also verify bits beyond physical address width are 0*/ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; @@ -8790,10 +8975,20 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, /* Unpin physical memory we referred to in vmcs02 */ if (vmx->nested.apic_access_page) { nested_release_page(vmx->nested.apic_access_page); - vmx->nested.apic_access_page = 0; + vmx->nested.apic_access_page = NULL; + } + if (vmx->nested.virtual_apic_page) { + nested_release_page(vmx->nested.virtual_apic_page); + vmx->nested.virtual_apic_page = NULL; } /* + * We are now running in L2, mmu_notifier will force to reload the + * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1. + */ + kvm_vcpu_reload_apic_access_page(vcpu); + + /* * Exiting from L2 to L1, we're now back to L1 which thinks it just * finished a VMLAUNCH or VMRESUME instruction, so we need to set the * success or failure flag accordingly. @@ -8846,6 +9041,12 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, return X86EMUL_CONTINUE; } +static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ + if (ple_gap) + shrink_ple_window(vcpu); +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -8890,7 +9091,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, - .fpu_activate = vmx_fpu_activate, .fpu_deactivate = vmx_fpu_deactivate, .tlb_flush = vmx_flush_tlb, @@ -8913,6 +9113,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, .vm_has_apicv = vmx_vm_has_apicv, .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, @@ -8951,6 +9152,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .mpx_supported = vmx_mpx_supported, .check_nested_events = vmx_check_nested_events, + + .sched_in = vmx_sched_in, }; static int __init vmx_init(void) @@ -9065,6 +9268,8 @@ static int __init vmx_init(void) } else kvm_disable_tdp(); + update_ple_window_actual_max(); + return 0; out7: @@ -9098,7 +9303,7 @@ static void __exit vmx_exit(void) free_page((unsigned long)vmx_vmread_bitmap); #ifdef CONFIG_KEXEC - rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); + RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); synchronize_rcu(); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f1e22d3b286..5430e4b0af29 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) } EXPORT_SYMBOL_GPL(kvm_set_shared_msr); -static void drop_user_return_notifiers(void *ignore) +static void drop_user_return_notifiers(void) { unsigned int cpu = smp_processor_id(); struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); @@ -408,12 +408,14 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) } EXPORT_SYMBOL_GPL(kvm_inject_page_fault); -void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) +static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { if (mmu_is_nested(vcpu) && !fault->nested_page_fault) vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); else vcpu->arch.mmu.inject_page_fault(vcpu, fault); + + return fault->nested_page_fault; } void kvm_inject_nmi(struct kvm_vcpu *vcpu) @@ -457,11 +459,12 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t ngfn, void *data, int offset, int len, u32 access) { + struct x86_exception exception; gfn_t real_gfn; gpa_t ngpa; ngpa = gfn_to_gpa(ngfn); - real_gfn = mmu->translate_gpa(vcpu, ngpa, access); + real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception); if (real_gfn == UNMAPPED_GVA) return -EFAULT; @@ -726,7 +729,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { kvm_mmu_sync_roots(vcpu); - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -1518,7 +1521,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) pvclock_update_vm_gtod_copy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) - set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); /* guest entries allowed */ kvm_for_each_vcpu(i, vcpu, kvm) @@ -1661,7 +1664,7 @@ static void kvmclock_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; kvm_for_each_vcpu(i, vcpu, kvm) { - set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); kvm_vcpu_kick(vcpu); } } @@ -1670,7 +1673,7 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) { struct kvm *kvm = v->kvm; - set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); schedule_delayed_work(&kvm->arch.kvmclock_update_work, KVMCLOCK_UPDATE_DELAY); } @@ -1723,9 +1726,10 @@ static bool valid_mtrr_type(unsigned t) return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ } -static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) +bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) { int i; + u64 mask; if (!msr_mtrr_valid(msr)) return false; @@ -1747,14 +1751,31 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) } /* variable MTRRs */ - return valid_mtrr_type(data & 0xff); + WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR)); + + mask = (~0ULL) << cpuid_maxphyaddr(vcpu); + if ((msr & 1) == 0) { + /* MTRR base */ + if (!valid_mtrr_type(data & 0xff)) + return false; + mask |= 0xf00; + } else + /* MTRR mask */ + mask |= 0x7ff; + if (data & mask) { + kvm_inject_gp(vcpu, 0); + return false; + } + + return true; } +EXPORT_SYMBOL_GPL(kvm_mtrr_valid); static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; - if (!mtrr_valid(vcpu, msr, data)) + if (!kvm_mtrr_valid(vcpu, msr, data)) return 1; if (msr == MSR_MTRRdefType) { @@ -1805,7 +1826,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; default: if (msr >= MSR_IA32_MC0_CTL && - msr < MSR_IA32_MC0_CTL + 4 * bank_num) { + msr < MSR_IA32_MCx_CTL(bank_num)) { u32 offset = msr - MSR_IA32_MC0_CTL; /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to @@ -2164,7 +2185,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: - case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: + case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: return set_msr_mce(vcpu, msr, data); /* Performance counters are not protected by a CPUID bit, @@ -2330,7 +2351,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) break; default: if (msr >= MSR_IA32_MC0_CTL && - msr < MSR_IA32_MC0_CTL + 4 * bank_num) { + msr < MSR_IA32_MCx_CTL(bank_num)) { u32 offset = msr - MSR_IA32_MC0_CTL; data = vcpu->arch.mce_banks[offset]; break; @@ -2419,7 +2440,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_K7_HWCR: case MSR_VM_HSAVE_PA: case MSR_K7_EVNTSEL0: + case MSR_K7_EVNTSEL1: + case MSR_K7_EVNTSEL2: + case MSR_K7_EVNTSEL3: case MSR_K7_PERFCTR0: + case MSR_K7_PERFCTR1: + case MSR_K7_PERFCTR2: + case MSR_K7_PERFCTR3: case MSR_K8_INT_PENDING_MSG: case MSR_AMD64_NB_CFG: case MSR_FAM10H_MMIO_CONF_BASE: @@ -2505,7 +2532,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_MCG_CAP: case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: - case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: + case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: return get_msr_mce(vcpu, msr, pdata); case MSR_K7_CLK_CTL: /* @@ -2823,7 +2850,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (unlikely(vcpu->arch.tsc_offset_adjustment)) { adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); vcpu->arch.tsc_offset_adjustment = 0; - set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); } if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { @@ -4040,16 +4067,16 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, kvm_x86_ops->get_segment(vcpu, var, seg); } -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) +gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception) { gpa_t t_gpa; - struct x86_exception exception; BUG_ON(!mmu_is_nested(vcpu)); /* NPT walks are always user-walks */ access |= PFERR_USER_MASK; - t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception); + t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception); return t_gpa; } @@ -4906,16 +4933,18 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) } } -static void inject_emulated_exception(struct kvm_vcpu *vcpu) +static bool inject_emulated_exception(struct kvm_vcpu *vcpu) { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; if (ctxt->exception.vector == PF_VECTOR) - kvm_propagate_fault(vcpu, &ctxt->exception); - else if (ctxt->exception.error_code_valid) + return kvm_propagate_fault(vcpu, &ctxt->exception); + + if (ctxt->exception.error_code_valid) kvm_queue_exception_e(vcpu, ctxt->exception.vector, ctxt->exception.error_code); else kvm_queue_exception(vcpu, ctxt->exception.vector); + return false; } static void init_emulate_ctxt(struct kvm_vcpu *vcpu) @@ -4972,7 +5001,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); - if (!is_guest_mode(vcpu)) { + if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; @@ -5224,6 +5253,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, ctxt->interruptibility = 0; ctxt->have_exception = false; + ctxt->exception.vector = -1; ctxt->perm_ok = false; ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; @@ -5276,8 +5306,9 @@ restart: } if (ctxt->have_exception) { - inject_emulated_exception(vcpu); r = EMULATE_DONE; + if (inject_emulated_exception(vcpu)) + return r; } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) { /* FIXME: return into emulator if single-stepping. */ @@ -5545,7 +5576,7 @@ static void kvm_set_mmio_spte_mask(void) * entry to generate page fault with PFER.RSV = 1. */ /* Mask the reserved physical address bits. */ - mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr; + mask = rsvd_bits(maxphyaddr, 51); /* Bit 62 is always reserved for 32bit host. */ mask |= 0x3ull << 62; @@ -5576,7 +5607,7 @@ static void pvclock_gtod_update_fn(struct work_struct *work) spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) - set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); + kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); atomic_set(&kvm_guest_has_master_clock, 0); spin_unlock(&kvm_lock); } @@ -5989,6 +6020,44 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_apic_update_tmr(vcpu, tmr); } +static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) +{ + ++vcpu->stat.tlb_flush; + kvm_x86_ops->tlb_flush(vcpu); +} + +void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) +{ + struct page *page = NULL; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + if (!kvm_x86_ops->set_apic_access_page_addr) + return; + + page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); + + /* + * Do not pin apic access page in memory, the MMU notifier + * will call us again if it is migrated or swapped out. + */ + put_page(page); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); + +void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ + /* + * The physical address of apic access page is stored in the VMCS. + * Update it when it becomes invalid. + */ + if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT)) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); +} + /* * Returns 1 to let __vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the @@ -6018,7 +6087,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) kvm_mmu_sync_roots(vcpu); if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - kvm_x86_ops->tlb_flush(vcpu); + kvm_vcpu_flush_tlb(vcpu); if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; @@ -6049,6 +6118,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_deliver_pmi(vcpu); if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) vcpu_scan_ioapic(vcpu); + if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) + kvm_vcpu_reload_apic_access_page(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -6934,7 +7005,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) kvm_rip_write(vcpu, 0); } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { struct kvm *kvm; struct kvm_vcpu *vcpu; @@ -6945,7 +7016,7 @@ int kvm_arch_hardware_enable(void *garbage) bool stable, backwards_tsc = false; kvm_shared_msr_cpu_online(); - ret = kvm_x86_ops->hardware_enable(garbage); + ret = kvm_x86_ops->hardware_enable(); if (ret != 0) return ret; @@ -6954,7 +7025,7 @@ int kvm_arch_hardware_enable(void *garbage) list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (!stable && vcpu->cpu == smp_processor_id()) - set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); if (stable && vcpu->arch.last_host_tsc > local_tsc) { backwards_tsc = true; if (vcpu->arch.last_host_tsc > max_tsc) @@ -7008,8 +7079,7 @@ int kvm_arch_hardware_enable(void *garbage) kvm_for_each_vcpu(i, vcpu, kvm) { vcpu->arch.tsc_offset_adjustment += delta_cyc; vcpu->arch.last_host_tsc = local_tsc; - set_bit(KVM_REQ_MASTERCLOCK_UPDATE, - &vcpu->requests); + kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); } /* @@ -7026,10 +7096,10 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { - kvm_x86_ops->hardware_disable(garbage); - drop_user_return_notifiers(garbage); + kvm_x86_ops->hardware_disable(); + drop_user_return_notifiers(); } int kvm_arch_hardware_setup(void) @@ -7146,6 +7216,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) static_key_slow_dec(&kvm_no_apic_vcpu); } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ + kvm_x86_ops->sched_in(vcpu, cpu); +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { if (type) @@ -7237,10 +7312,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); kvm_free_vcpus(kvm); - if (kvm->arch.apic_access_page) - put_page(kvm->arch.apic_access_page); - if (kvm->arch.ept_identity_pagetable) - put_page(kvm->arch.ept_identity_pagetable); kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } @@ -7643,3 +7714,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 306a1b77581f..7cb9c45a5fe0 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -88,15 +88,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, vcpu->arch.mmio_gva = gva & PAGE_MASK; vcpu->arch.access = access; vcpu->arch.mmio_gfn = gfn; + vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; +} + +static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation; } /* - * Clear the mmio cache info for the given gva, - * specially, if gva is ~0ul, we clear all mmio cache info. + * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we + * clear all mmio cache info. */ +#define MMIO_GVA_ANY (~(gva_t)0) + static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) { - if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) + if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) return; vcpu->arch.mmio_gva = 0; @@ -104,7 +112,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) { - if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK)) + if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva && + vcpu->arch.mmio_gva == (gva & PAGE_MASK)) return true; return false; @@ -112,7 +121,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { - if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) + if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn && + vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) return true; return false; @@ -149,6 +159,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception); +bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); + #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ | XSTATE_BNDREGS | XSTATE_BNDCSR) extern u64 host_xcr0; diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 167ffcac16ed..95a427e57887 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -48,7 +48,9 @@ enum address_markers_idx { LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, +# ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, +# endif HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, @@ -71,7 +73,9 @@ static struct addr_marker address_markers[] = { { PAGE_OFFSET, "Low Kernel Mapping" }, { VMALLOC_START, "vmalloc() Area" }, { VMEMMAP_START, "Vmemmap" }, +# ifdef CONFIG_X86_ESPFIX64 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, +# endif { __START_KERNEL_map, "High Kernel Mapping" }, { MODULES_VADDR, "Modules" }, { MODULES_END, "End Modules" }, diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 25e7e1372bb2..919b91205cd4 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -31,7 +31,7 @@ #include <linux/sched.h> #include <asm/elf.h> -struct __read_mostly va_alignment va_align = { +struct va_alignment __read_mostly va_align = { .flags = -1, }; diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index c61ea57d1ba1..9a2b7101ae8a 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -326,27 +326,6 @@ static void pci_fixup_video(struct pci_dev *pdev) struct pci_bus *bus; u16 config; - if (!vga_default_device()) { - resource_size_t start, end; - int i; - - /* Does firmware framebuffer belong to us? */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) - continue; - - start = pci_resource_start(pdev, i); - end = pci_resource_end(pdev, i); - - if (!start || !end) - continue; - - if (screen_info.lfb_base >= start && - (screen_info.lfb_base + screen_info.lfb_size) < end) - vga_set_default_device(pdev); - } - } - /* Is VGA routed to us? */ bus = pdev->bus; while (bus) { @@ -371,8 +350,7 @@ static void pci_fixup_video(struct pci_dev *pdev) pci_read_config_word(pdev, PCI_COMMAND, &config); if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; - dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); - vga_set_default_device(pdev); + dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n"); } } } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index e8a1201c3293..16fb0099b7f2 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1866,12 +1866,11 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, * * We can construct this by grafting the Xen provided pagetable into * head_64.S's preconstructed pagetables. We copy the Xen L2's into - * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This - * means that only the kernel has a physical mapping to start with - - * but that's enough to get __va working. We need to fill in the rest - * of the physical mapping once some sort of allocator has been set - * up. - * NOTE: for PVH, the page tables are native. + * level2_ident_pgt, and level2_kernel_pgt. This means that only the + * kernel has a physical mapping to start with - but that's enough to + * get __va working. We need to fill in the rest of the physical + * mapping once some sort of allocator has been set up. NOTE: for + * PVH, the page tables are native. */ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { @@ -1902,8 +1901,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* L3_i[0] -> level2_ident_pgt */ convert_pfn_mfn(level3_ident_pgt); /* L3_k[510] -> level2_kernel_pgt - * L3_i[511] -> level2_fixmap_pgt */ + * L3_k[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); + + /* L3_k[511][506] -> level1_fixmap_pgt */ + convert_pfn_mfn(level2_fixmap_pgt); } /* We get [511][511] and have Xen's version of level2_kernel_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); @@ -1913,21 +1915,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) addr[1] = (unsigned long)l3; addr[2] = (unsigned long)l2; /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: - * Both L4[272][0] and L4[511][511] have entries that point to the same + * Both L4[272][0] and L4[511][510] have entries that point to the same * L2 (PMD) tables. Meaning that if you modify it in __va space * it will be also modified in the __ka space! (But if you just * modify the PMD table to point to other PTE's or none, then you * are OK - which is what cleanup_highmap does) */ copy_page(level2_ident_pgt, l2); - /* Graft it onto L4[511][511] */ + /* Graft it onto L4[511][510] */ copy_page(level2_kernel_pgt, l2); - /* Get [511][510] and graft that in level2_fixmap_pgt */ - l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - copy_page(level2_fixmap_pgt, l2); - /* Note that we don't do anything with level1_fixmap_pgt which - * we don't need. */ if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* Make pagetable pieces RO */ set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); @@ -1937,6 +1933,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); + set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); /* Pin down new L4 */ pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, |