aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Makefile2
-rw-r--r--arch/arm/boot/bootp/Makefile2
-rw-r--r--arch/arm/boot/compressed/Makefile2
-rw-r--r--arch/arm/configs/ep93xx_defconfig1
-rw-r--r--arch/arm/configs/versatile_defconfig1
-rw-r--r--arch/arm/crypto/sha1-armv7-neon.S39
-rw-r--r--arch/arm/include/asm/ftrace.h2
-rw-r--r--arch/arm/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm/include/asm/kvm_host.h23
-rw-r--r--arch/arm/include/asm/kvm_mmu.h11
-rw-r--r--arch/arm/include/asm/smp.h6
-rw-r--r--arch/arm/include/asm/syscall.h8
-rw-r--r--arch/arm/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm/kernel/atags_parse.c2
-rw-r--r--arch/arm/kernel/entry-armv.S108
-rw-r--r--arch/arm/kernel/entry-common.S2
-rw-r--r--arch/arm/kernel/entry-header.S63
-rw-r--r--arch/arm/kernel/fiq.c17
-rw-r--r--arch/arm/kernel/hw_breakpoint.c18
-rw-r--r--arch/arm/kernel/irq.c4
-rw-r--r--arch/arm/kernel/perf_event_cpu.c4
-rw-r--r--arch/arm/kernel/process.c62
-rw-r--r--arch/arm/kernel/return_address.c9
-rw-r--r--arch/arm/kernel/setup.c8
-rw-r--r--arch/arm/kernel/smp.c14
-rw-r--r--arch/arm/kernel/traps.c26
-rw-r--r--arch/arm/kernel/unwind.c24
-rw-r--r--arch/arm/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm/kvm/arm.c34
-rw-r--r--arch/arm/kvm/coproc.c2
-rw-r--r--arch/arm/kvm/guest.c2
-rw-r--r--arch/arm/kvm/mmu.c40
-rw-r--r--arch/arm/mach-at91/include/mach/at91_pio.h6
-rw-r--r--arch/arm/mach-sa1100/Kconfig2
-rw-r--r--arch/arm/mach-sa1100/h3xxx.c15
-rw-r--r--arch/arm/mm/cache-l2x0.c121
-rw-r--r--arch/arm/mm/idmap.c2
-rw-r--r--arch/arm/mm/init.c5
-rw-r--r--arch/arm/mm/mmu.c4
-rw-r--r--arch/arm/mm/proc-v7.S2
-rw-r--r--arch/arm64/Kconfig7
-rw-r--r--arch/arm64/Kconfig.debug11
-rw-r--r--arch/arm64/Makefile1
-rw-r--r--arch/arm64/include/asm/cacheflush.h4
-rw-r--r--arch/arm64/include/asm/cachetype.h20
-rw-r--r--arch/arm64/include/asm/cpu_ops.h7
-rw-r--r--arch/arm64/include/asm/cpuidle.h13
-rw-r--r--arch/arm64/include/asm/debug-monitors.h30
-rw-r--r--arch/arm64/include/asm/dma-mapping.h7
-rw-r--r--arch/arm64/include/asm/insn.h249
-rw-r--r--arch/arm64/include/asm/io.h2
-rw-r--r--arch/arm64/include/asm/kgdb.h2
-rw-r--r--arch/arm64/include/asm/kvm_arm.h13
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm64/include/asm/kvm_host.h24
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h18
-rw-r--r--arch/arm64/include/asm/percpu.h4
-rw-r--r--arch/arm64/include/asm/pgtable.h33
-rw-r--r--arch/arm64/include/asm/proc-fns.h2
-rw-r--r--arch/arm64/include/asm/suspend.h1
-rw-r--r--arch/arm64/include/asm/thread_info.h9
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/cpuidle.c31
-rw-r--r--arch/arm64/kernel/cpuinfo.c28
-rw-r--r--arch/arm64/kernel/efi-stub.c16
-rw-r--r--arch/arm64/kernel/entry.S1
-rw-r--r--arch/arm64/kernel/ftrace.c10
-rw-r--r--arch/arm64/kernel/head.S6
-rw-r--r--arch/arm64/kernel/insn.c671
-rw-r--r--arch/arm64/kernel/kgdb.c4
-rw-r--r--arch/arm64/kernel/perf_event.c2
-rw-r--r--arch/arm64/kernel/process.c30
-rw-r--r--arch/arm64/kernel/psci.c104
-rw-r--r--arch/arm64/kernel/return_address.c3
-rw-r--r--arch/arm64/kernel/setup.c11
-rw-r--r--arch/arm64/kernel/sleep.S47
-rw-r--r--arch/arm64/kernel/smp_spin_table.c22
-rw-r--r--arch/arm64/kernel/stacktrace.c3
-rw-r--r--arch/arm64/kernel/suspend.c48
-rw-r--r--arch/arm64/kernel/traps.c3
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm64/kvm/guest.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c2
-rw-r--r--arch/arm64/mm/Makefile2
-rw-r--r--arch/arm64/mm/dma-mapping.c33
-rw-r--r--arch/arm64/mm/init.c10
-rw-r--r--arch/arm64/mm/mmap.c2
-rw-r--r--arch/arm64/mm/mmu.c2
-rw-r--r--arch/arm64/mm/pageattr.c97
-rw-r--r--arch/arm64/mm/proc.S15
-rw-r--r--arch/arm64/net/Makefile4
-rw-r--r--arch/arm64/net/bpf_jit.h169
-rw-r--r--arch/arm64/net/bpf_jit_comp.c679
-rw-r--r--arch/blackfin/Kconfig2
-rw-r--r--arch/blackfin/mach-common/ints-priority.c2
-rw-r--r--arch/ia64/include/asm/kvm_host.h15
-rw-r--r--arch/ia64/kvm/kvm-ia64.c34
-rw-r--r--arch/m68k/Kconfig.devices2
-rw-r--r--arch/mips/cavium-octeon/Kconfig2
-rw-r--r--arch/mips/include/asm/kvm_host.h16
-rw-r--r--arch/mips/kvm/mips.c40
-rw-r--r--arch/mips/lantiq/falcon/sysctrl.c2
-rw-r--r--arch/mips/lantiq/xway/sysctrl.c2
-rw-r--r--arch/mips/pci/pci-lantiq.c2
-rw-r--r--arch/mn10300/Kconfig2
-rw-r--r--arch/powerpc/boot/simpleboot.c2
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h20
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h7
-rw-r--r--arch/powerpc/include/asm/kvm_host.h24
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h13
-rw-r--r--arch/powerpc/include/asm/reg_booke.h2
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h6
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S12
-rw-r--r--arch/powerpc/kernel/cputable.c5
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S4
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S26
-rw-r--r--arch/powerpc/kvm/book3s.c162
-rw-r--r--arch/powerpc/kvm/book3s.h3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c47
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S3
-rw-r--r--arch/powerpc/kvm/book3s_pr.c6
-rw-r--r--arch/powerpc/kvm/booke.c287
-rw-r--r--arch/powerpc/kvm/booke.h40
-rw-r--r--arch/powerpc/kvm/booke_emulate.c163
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S13
-rw-r--r--arch/powerpc/kvm/e500.h20
-rw-r--r--arch/powerpc/kvm/e500_emulate.c20
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c20
-rw-r--r--arch/powerpc/kvm/e500mc.c60
-rw-r--r--arch/powerpc/kvm/emulate.c17
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c134
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype6
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c4
-rw-r--r--arch/s390/include/asm/kvm_host.h34
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/pgtable.h72
-rw-r--r--arch/s390/include/asm/tlb.h2
-rw-r--r--arch/s390/include/uapi/asm/kvm.h10
-rw-r--r--arch/s390/kvm/diag.c28
-rw-r--r--arch/s390/kvm/gaccess.c3
-rw-r--r--arch/s390/kvm/interrupt.c152
-rw-r--r--arch/s390/kvm/kvm-s390.c179
-rw-r--r--arch/s390/kvm/kvm-s390.h6
-rw-r--r--arch/s390/kvm/priv.c11
-rw-r--r--arch/s390/mm/fault.c25
-rw-r--r--arch/s390/mm/pgtable.c707
-rw-r--r--arch/s390/mm/vmem.c2
-rw-r--r--arch/tile/Kconfig1
-rw-r--r--arch/tile/gxio/mpipe.c37
-rw-r--r--arch/tile/include/asm/sections.h3
-rw-r--r--arch/tile/include/asm/vdso.h20
-rw-r--r--arch/tile/include/uapi/arch/sim_def.h10
-rw-r--r--arch/tile/kernel/time.c61
-rw-r--r--arch/tile/kernel/traps.c2
-rw-r--r--arch/tile/kernel/vdso/vdso.lds.S2
-rw-r--r--arch/tile/kernel/vdso/vgettimeofday.c176
-rw-r--r--arch/tile/kernel/vmlinux.lds.S2
-rw-r--r--arch/tile/mm/init.c12
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/Makefile7
-rw-r--r--arch/x86/boot/Makefile7
-rw-r--r--arch/x86/boot/compressed/Makefile17
-rw-r--r--arch/x86/boot/compressed/aslr.c3
-rw-r--r--arch/x86/boot/compressed/early_serial_console.c4
-rw-r--r--arch/x86/boot/cpu.c68
-rw-r--r--arch/x86/configs/tiny.config1
-rw-r--r--arch/x86/include/asm/cpufeature.h8
-rw-r--r--arch/x86/include/asm/kvm_host.h32
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/kernel/cpu/Makefile7
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kvm/cpuid.c31
-rw-r--r--arch/x86/kvm/cpuid.h10
-rw-r--r--arch/x86/kvm/emulate.c51
-rw-r--r--arch/x86/kvm/lapic.c34
-rw-r--r--arch/x86/kvm/mmu.c139
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/paging_tmpl.h22
-rw-r--r--arch/x86/kvm/pmu.c24
-rw-r--r--arch/x86/kvm/svm.c40
-rw-r--r--arch/x86/kvm/trace.h41
-rw-r--r--arch/x86/kvm/vmx.c377
-rw-r--r--arch/x86/kvm/x86.c148
-rw-r--r--arch/x86/kvm/x86.h22
190 files changed, 5295 insertions, 1888 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 0ce9d0f71f2a..12bfc1fa51f0 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -157,6 +157,7 @@ machine-$(CONFIG_ARCH_EBSA110) += ebsa110
machine-$(CONFIG_ARCH_EFM32) += efm32
machine-$(CONFIG_ARCH_EP93XX) += ep93xx
machine-$(CONFIG_ARCH_EXYNOS) += exynos
+machine-$(CONFIG_ARCH_FOOTBRIDGE) += footbridge
machine-$(CONFIG_ARCH_GEMINI) += gemini
machine-$(CONFIG_ARCH_HIGHBANK) += highbank
machine-$(CONFIG_ARCH_HISI) += hisi
@@ -205,7 +206,6 @@ machine-$(CONFIG_ARCH_VEXPRESS) += vexpress
machine-$(CONFIG_ARCH_VT8500) += vt8500
machine-$(CONFIG_ARCH_W90X900) += w90x900
machine-$(CONFIG_ARCH_ZYNQ) += zynq
-machine-$(CONFIG_FOOTBRIDGE) += footbridge
machine-$(CONFIG_PLAT_SPEAR) += spear
# Platform directory name. This list is sorted alphanumerically
diff --git a/arch/arm/boot/bootp/Makefile b/arch/arm/boot/bootp/Makefile
index c394e305447c..5761f0039133 100644
--- a/arch/arm/boot/bootp/Makefile
+++ b/arch/arm/boot/bootp/Makefile
@@ -5,6 +5,8 @@
# architecture-specific flags and dependencies.
#
+GCOV_PROFILE := n
+
LDFLAGS_bootp :=-p --no-undefined -X \
--defsym initrd_phys=$(INITRD_PHYS) \
--defsym params_phys=$(PARAMS_PHYS) -T
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 76a50ecae1c3..3ea230aa94b7 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -37,6 +37,8 @@ ifeq ($(CONFIG_ARM_VIRT_EXT),y)
OBJS += hyp-stub.o
endif
+GCOV_PROFILE := n
+
#
# Architecture dependencies
#
diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig
index 1b650c85bdd0..72233b9c9d07 100644
--- a/arch/arm/configs/ep93xx_defconfig
+++ b/arch/arm/configs/ep93xx_defconfig
@@ -107,5 +107,6 @@ CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_USER=y
CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_PL01X=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_LIBCRC32C=y
diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig
index d52b4ffe2012..ea49d37564da 100644
--- a/arch/arm/configs/versatile_defconfig
+++ b/arch/arm/configs/versatile_defconfig
@@ -82,5 +82,6 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_USER=y
CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_PL01X=y
CONFIG_FONTS=y
CONFIG_FONT_ACORN_8x8=y
diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S
index 50013c0e2864..dcd01f3f0bb0 100644
--- a/arch/arm/crypto/sha1-armv7-neon.S
+++ b/arch/arm/crypto/sha1-armv7-neon.S
@@ -9,7 +9,7 @@
*/
#include <linux/linkage.h>
-
+#include <asm/assembler.h>
.syntax unified
.code 32
@@ -61,13 +61,13 @@
#define RT3 r12
#define W0 q0
-#define W1 q1
+#define W1 q7
#define W2 q2
#define W3 q3
#define W4 q4
-#define W5 q5
-#define W6 q6
-#define W7 q7
+#define W5 q6
+#define W6 q5
+#define W7 q1
#define tmp0 q8
#define tmp1 q9
@@ -79,6 +79,11 @@
#define qK3 q14
#define qK4 q15
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define ARM_LE(code...)
+#else
+#define ARM_LE(code...) code
+#endif
/* Round function macros. */
@@ -150,45 +155,45 @@
#define W_PRECALC_00_15() \
add RWK, sp, #(WK_offs(0)); \
\
- vld1.32 {tmp0, tmp1}, [RDATA]!; \
- vrev32.8 W0, tmp0; /* big => little */ \
- vld1.32 {tmp2, tmp3}, [RDATA]!; \
+ vld1.32 {W0, W7}, [RDATA]!; \
+ ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \
+ vld1.32 {W6, W5}, [RDATA]!; \
vadd.u32 tmp0, W0, curK; \
- vrev32.8 W7, tmp1; /* big => little */ \
- vrev32.8 W6, tmp2; /* big => little */ \
+ ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \
+ ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \
vadd.u32 tmp1, W7, curK; \
- vrev32.8 W5, tmp3; /* big => little */ \
+ ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \
vadd.u32 tmp2, W6, curK; \
vst1.32 {tmp0, tmp1}, [RWK]!; \
vadd.u32 tmp3, W5, curK; \
vst1.32 {tmp2, tmp3}, [RWK]; \
#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vld1.32 {tmp0, tmp1}, [RDATA]!; \
+ vld1.32 {W0, W7}, [RDATA]!; \
#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
add RWK, sp, #(WK_offs(0)); \
#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vrev32.8 W0, tmp0; /* big => little */ \
+ ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \
#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vld1.32 {tmp2, tmp3}, [RDATA]!; \
+ vld1.32 {W6, W5}, [RDATA]!; \
#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp0, W0, curK; \
#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vrev32.8 W7, tmp1; /* big => little */ \
+ ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \
#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vrev32.8 W6, tmp2; /* big => little */ \
+ ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \
#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp1, W7, curK; \
#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- vrev32.8 W5, tmp3; /* big => little */ \
+ ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \
#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp2, W6, curK; \
diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 39eb16b0066f..bfe2a2f5a644 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -45,7 +45,7 @@ void *return_address(unsigned int);
#else
-extern inline void *return_address(unsigned int level)
+static inline void *return_address(unsigned int level)
{
return NULL;
}
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 69b746955fca..b9db269c6e61 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -149,6 +149,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
{
+ return kvm_vcpu_get_hsr(vcpu) & HSR_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
+{
return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
}
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6dfb404f6c46..53036e21756b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
#ifndef __ARM_KVM_HOST_H__
#define __ARM_KVM_HOST_H__
+#include <linux/types.h>
+#include <linux/kvm_types.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
@@ -40,9 +42,8 @@
#include <kvm/arm_vgic.h>
-struct kvm_vcpu;
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
@@ -149,20 +150,17 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
-struct kvm_vcpu_init;
int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init);
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
u64 kvm_call_hyp(void *hypfn, ...);
void force_vm_exit(const cpumask_t *mask);
#define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
@@ -172,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
/* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
return 0;
}
@@ -182,12 +181,16 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
return 0;
}
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address)
+{
+}
+
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-struct kvm_one_reg;
int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
@@ -233,4 +236,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
int kvm_perf_init(void);
int kvm_perf_teardown(void);
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5cc0b0f5f72f..3f688b458143 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -78,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
flush_pmd_entry(pte);
}
-static inline bool kvm_is_write_fault(unsigned long hsr)
-{
- unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
- if (hsr_ec == HSR_EC_IABT)
- return false;
- else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR))
- return false;
- else
- return true;
-}
-
static inline void kvm_clean_pgd(pgd_t *pgd)
{
clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 2ec765c39ab4..18f5a554134f 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -49,12 +49,6 @@ extern void smp_init_cpus(void);
extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));
/*
- * Boot a secondary CPU, and assign it the specified idle task.
- * This also gives us the initial stack to use for this CPU.
- */
-extern int boot_secondary(unsigned int cpu, struct task_struct *);
-
-/*
* Called from platform specific assembly code, this is the
* secondary CPU entry point.
*/
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 4651f6999b7d..e86c985b8c7a 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -63,8 +63,8 @@ static inline void syscall_get_arguments(struct task_struct *task,
if (i + n > SYSCALL_MAX_ARGS) {
unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
+ pr_warn("%s called with max args %d, handling only %d\n",
+ __func__, i + n, SYSCALL_MAX_ARGS);
memset(args_bad, 0, n_bad * sizeof(args[0]));
n = SYSCALL_MAX_ARGS - i;
}
@@ -88,8 +88,8 @@ static inline void syscall_set_arguments(struct task_struct *task,
return;
if (i + n > SYSCALL_MAX_ARGS) {
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
+ pr_warn("%s called with max args %d, handling only %d\n",
+ __func__, i + n, SYSCALL_MAX_ARGS);
n = SYSCALL_MAX_ARGS - i;
}
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index e6ebdd3471e5..09ee408c1a67 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -173,6 +174,7 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 7807ef58a2ab..528f8af2addb 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -130,7 +130,7 @@ static int __init parse_tag_cmdline(const struct tag *tag)
strlcat(default_command_line, tag->u.cmdline.cmdline,
COMMAND_LINE_SIZE);
#elif defined(CONFIG_CMDLINE_FORCE)
- pr_warning("Ignoring tag cmdline (using the default kernel command line)\n");
+ pr_warn("Ignoring tag cmdline (using the default kernel command line)\n");
#else
strlcpy(default_command_line, tag->u.cmdline.cmdline,
COMMAND_LINE_SIZE);
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 36276cdccfbc..2f5555d307b3 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -146,7 +146,7 @@ ENDPROC(__und_invalid)
#define SPFIX(code...)
#endif
- .macro svc_entry, stack_hole=0
+ .macro svc_entry, stack_hole=0, trace=1
UNWIND(.fnstart )
UNWIND(.save {r0 - pc} )
sub sp, sp, #(S_FRAME_SIZE + \stack_hole - 4)
@@ -182,9 +182,11 @@ ENDPROC(__und_invalid)
@
stmia r7, {r2 - r6}
+ .if \trace
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
+ .endif
.endm
.align 5
@@ -295,6 +297,15 @@ __pabt_svc:
ENDPROC(__pabt_svc)
.align 5
+__fiq_svc:
+ svc_entry trace=0
+ mov r0, sp @ struct pt_regs *regs
+ bl handle_fiq_as_nmi
+ svc_exit_via_fiq
+ UNWIND(.fnend )
+ENDPROC(__fiq_svc)
+
+ .align 5
.LCcralign:
.word cr_alignment
#ifdef MULTI_DABORT
@@ -305,6 +316,46 @@ ENDPROC(__pabt_svc)
.word fp_enter
/*
+ * Abort mode handlers
+ */
+
+@
+@ Taking a FIQ in abort mode is similar to taking a FIQ in SVC mode
+@ and reuses the same macros. However in abort mode we must also
+@ save/restore lr_abt and spsr_abt to make nested aborts safe.
+@
+ .align 5
+__fiq_abt:
+ svc_entry trace=0
+
+ ARM( msr cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr cpsr_c, r0 )
+ mov r1, lr @ Save lr_abt
+ mrs r2, spsr @ Save spsr_abt, abort is now safe
+ ARM( msr cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr cpsr_c, r0 )
+ stmfd sp!, {r1 - r2}
+
+ add r0, sp, #8 @ struct pt_regs *regs
+ bl handle_fiq_as_nmi
+
+ ldmfd sp!, {r1 - r2}
+ ARM( msr cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr cpsr_c, r0 )
+ mov lr, r1 @ Restore lr_abt, abort is unsafe
+ msr spsr_cxsf, r2 @ Restore spsr_abt
+ ARM( msr cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr cpsr_c, r0 )
+
+ svc_exit_via_fiq
+ UNWIND(.fnend )
+ENDPROC(__fiq_abt)
+
+/*
* User mode handlers
*
* EABI note: sp_svc is always 64-bit aligned here, so should S_FRAME_SIZE
@@ -314,13 +365,16 @@ ENDPROC(__pabt_svc)
#error "sizeof(struct pt_regs) must be a multiple of 8"
#endif
- .macro usr_entry
+ .macro usr_entry, trace=1
UNWIND(.fnstart )
UNWIND(.cantunwind ) @ don't unwind the user space
sub sp, sp, #S_FRAME_SIZE
ARM( stmib sp, {r1 - r12} )
THUMB( stmia sp, {r0 - r12} )
+ ATRAP( mrc p15, 0, r7, c1, c0, 0)
+ ATRAP( ldr r8, .LCcralign)
+
ldmia r0, {r3 - r5}
add r0, sp, #S_PC @ here for interlock avoidance
mov r6, #-1 @ "" "" "" ""
@@ -328,6 +382,8 @@ ENDPROC(__pabt_svc)
str r3, [sp] @ save the "real" r0 copied
@ from the exception stack
+ ATRAP( ldr r8, [r8, #0])
+
@
@ We are now ready to fill in the remaining blanks on the stack:
@
@@ -341,20 +397,21 @@ ENDPROC(__pabt_svc)
ARM( stmdb r0, {sp, lr}^ )
THUMB( store_user_sp_lr r0, r1, S_SP - S_PC )
- @
@ Enable the alignment trap while in kernel mode
- @
- alignment_trap r0, .LCcralign
+ ATRAP( teq r8, r7)
+ ATRAP( mcrne p15, 0, r8, c1, c0, 0)
@
@ Clear FP to mark the first stack frame
@
zero_fp
+ .if \trace
#ifdef CONFIG_IRQSOFF_TRACER
bl trace_hardirqs_off
#endif
ct_user_exit save = 0
+ .endif
.endm
.macro kuser_cmpxchg_check
@@ -683,6 +740,17 @@ ENTRY(ret_from_exception)
ENDPROC(__pabt_usr)
ENDPROC(ret_from_exception)
+ .align 5
+__fiq_usr:
+ usr_entry trace=0
+ kuser_cmpxchg_check
+ mov r0, sp @ struct pt_regs *regs
+ bl handle_fiq_as_nmi
+ get_thread_info tsk
+ restore_user_regs fast = 0, offset = 0
+ UNWIND(.fnend )
+ENDPROC(__fiq_usr)
+
/*
* Register switch for ARMv3 and ARMv4 processors
* r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
@@ -1118,17 +1186,29 @@ vector_addrexcptn:
b vector_addrexcptn
/*=============================================================================
- * Undefined FIQs
+ * FIQ "NMI" handler
*-----------------------------------------------------------------------------
- * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC
- * MUST PRESERVE SVC SPSR, but need to switch to SVC mode to show our msg.
- * Basically to switch modes, we *HAVE* to clobber one register... brain
- * damage alert! I don't think that we can execute any code in here in any
- * other mode than FIQ... Ok you can switch to another mode, but you can't
- * get out of that mode without clobbering one register.
+ * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86
+ * systems.
*/
-vector_fiq:
- subs pc, lr, #4
+ vector_stub fiq, FIQ_MODE, 4
+
+ .long __fiq_usr @ 0 (USR_26 / USR_32)
+ .long __fiq_svc @ 1 (FIQ_26 / FIQ_32)
+ .long __fiq_svc @ 2 (IRQ_26 / IRQ_32)
+ .long __fiq_svc @ 3 (SVC_26 / SVC_32)
+ .long __fiq_svc @ 4
+ .long __fiq_svc @ 5
+ .long __fiq_svc @ 6
+ .long __fiq_abt @ 7
+ .long __fiq_svc @ 8
+ .long __fiq_svc @ 9
+ .long __fiq_svc @ a
+ .long __fiq_svc @ b
+ .long __fiq_svc @ c
+ .long __fiq_svc @ d
+ .long __fiq_svc @ e
+ .long __fiq_svc @ f
.globl vector_fiq_offset
.equ vector_fiq_offset, vector_fiq
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index e52fe5a2d843..6bb09d4abdea 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -366,7 +366,7 @@ ENTRY(vector_swi)
str r0, [sp, #S_OLD_R0] @ Save OLD_R0
#endif
zero_fp
- alignment_trap ip, __cr_alignment
+ alignment_trap r10, ip, __cr_alignment
enable_irq
ct_user_exit
get_thread_info tsk
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 2fdf8679b46e..4176df721bf0 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -37,11 +37,19 @@
#endif
.endm
- .macro alignment_trap, rtemp, label
#ifdef CONFIG_ALIGNMENT_TRAP
- ldr \rtemp, \label
- ldr \rtemp, [\rtemp]
- mcr p15, 0, \rtemp, c1, c0
+#define ATRAP(x...) x
+#else
+#define ATRAP(x...)
+#endif
+
+ .macro alignment_trap, rtmp1, rtmp2, label
+#ifdef CONFIG_ALIGNMENT_TRAP
+ mrc p15, 0, \rtmp2, c1, c0, 0
+ ldr \rtmp1, \label
+ ldr \rtmp1, [\rtmp1]
+ teq \rtmp1, \rtmp2
+ mcrne p15, 0, \rtmp1, c1, c0, 0
#endif
.endm
@@ -216,6 +224,34 @@
ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr
.endm
+ @
+ @ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit
+ @
+ @ This macro acts in a similar manner to svc_exit but switches to FIQ
+ @ mode to restore the final part of the register state.
+ @
+ @ We cannot use the normal svc_exit procedure because that would
+ @ clobber spsr_svc (FIQ could be delivered during the first few
+ @ instructions of vector_swi meaning its contents have not been
+ @ saved anywhere).
+ @
+ @ Note that, unlike svc_exit, this macro also does not allow a caller
+ @ supplied rpsr. This is because the FIQ exceptions are not re-entrant
+ @ and the handlers cannot call into the scheduler (meaning the value
+ @ on the stack remains correct).
+ @
+ .macro svc_exit_via_fiq
+ mov r0, sp
+ ldmib r0, {r1 - r14} @ abort is deadly from here onward (it will
+ @ clobber state restored below)
+ msr cpsr_c, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
+ add r8, r0, #S_PC
+ ldr r9, [r0, #S_PSR]
+ msr spsr_cxsf, r9
+ ldr r0, [r0, #S_R0]
+ ldmia r8, {pc}^
+ .endm
+
.macro restore_user_regs, fast = 0, offset = 0
ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr
ldr lr, [sp, #\offset + S_PC]! @ get pc
@@ -267,6 +303,25 @@
rfeia sp!
.endm
+ @
+ @ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit
+ @
+ @ For full details see non-Thumb implementation above.
+ @
+ .macro svc_exit_via_fiq
+ add r0, sp, #S_R2
+ ldr lr, [sp, #S_LR]
+ ldr sp, [sp, #S_SP] @ abort is deadly from here onward (it will
+ @ clobber state restored below)
+ ldmia r0, {r2 - r12}
+ mov r1, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
+ msr cpsr_c, r1
+ sub r0, #S_R2
+ add r8, r0, #S_PC
+ ldmia r0, {r0 - r1}
+ rfeia r8
+ .endm
+
#ifdef CONFIG_CPU_V7M
/*
* Note we don't need to do clrex here as clearing the local monitor is
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index 918875d96d5d..b37752a96652 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -52,7 +52,8 @@
(unsigned)&vector_fiq_offset; \
})
-static unsigned long no_fiq_insn;
+static unsigned long dfl_fiq_insn;
+static struct pt_regs dfl_fiq_regs;
/* Default reacquire function
* - we always relinquish FIQ control
@@ -60,8 +61,15 @@ static unsigned long no_fiq_insn;
*/
static int fiq_def_op(void *ref, int relinquish)
{
- if (!relinquish)
- set_fiq_handler(&no_fiq_insn, sizeof(no_fiq_insn));
+ if (!relinquish) {
+ /* Restore default handler and registers */
+ local_fiq_disable();
+ set_fiq_regs(&dfl_fiq_regs);
+ set_fiq_handler(&dfl_fiq_insn, sizeof(dfl_fiq_insn));
+ local_fiq_enable();
+
+ /* FIXME: notify irq controller to standard enable FIQs */
+ }
return 0;
}
@@ -150,6 +158,7 @@ EXPORT_SYMBOL(disable_fiq);
void __init init_FIQ(int start)
{
unsigned offset = FIQ_OFFSET;
- no_fiq_insn = *(unsigned long *)(0xffff0000 + offset);
+ dfl_fiq_insn = *(unsigned long *)(0xffff0000 + offset);
+ get_fiq_regs(&dfl_fiq_regs);
fiq_start = start;
}
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 4d963fb66e3f..b5b452f90f76 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -113,8 +113,8 @@ static u32 read_wb_reg(int n)
GEN_READ_WB_REG_CASES(ARM_OP2_WVR, val);
GEN_READ_WB_REG_CASES(ARM_OP2_WCR, val);
default:
- pr_warning("attempt to read from unknown breakpoint "
- "register %d\n", n);
+ pr_warn("attempt to read from unknown breakpoint register %d\n",
+ n);
}
return val;
@@ -128,8 +128,8 @@ static void write_wb_reg(int n, u32 val)
GEN_WRITE_WB_REG_CASES(ARM_OP2_WVR, val);
GEN_WRITE_WB_REG_CASES(ARM_OP2_WCR, val);
default:
- pr_warning("attempt to write to unknown breakpoint "
- "register %d\n", n);
+ pr_warn("attempt to write to unknown breakpoint register %d\n",
+ n);
}
isb();
}
@@ -292,7 +292,7 @@ int hw_breakpoint_slots(int type)
case TYPE_DATA:
return get_num_wrps();
default:
- pr_warning("unknown slot type: %d\n", type);
+ pr_warn("unknown slot type: %d\n", type);
return 0;
}
}
@@ -365,7 +365,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
}
if (i == max_slots) {
- pr_warning("Can't find any breakpoint slot\n");
+ pr_warn("Can't find any breakpoint slot\n");
return -EBUSY;
}
@@ -417,7 +417,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
}
if (i == max_slots) {
- pr_warning("Can't find any breakpoint slot\n");
+ pr_warn("Can't find any breakpoint slot\n");
return;
}
@@ -894,8 +894,8 @@ static int debug_reg_trap(struct pt_regs *regs, unsigned int instr)
{
int cpu = smp_processor_id();
- pr_warning("Debug register access (0x%x) caused undefined instruction on CPU %d\n",
- instr, cpu);
+ pr_warn("Debug register access (0x%x) caused undefined instruction on CPU %d\n",
+ instr, cpu);
/* Set the error flag for this CPU and skip the faulting instruction. */
cpumask_set_cpu(cpu, &debug_err_mask);
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 5c4d38e32a51..88de943eebd6 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -205,8 +205,8 @@ void migrate_irqs(void)
raw_spin_unlock(&desc->lock);
if (affinity_broken && printk_ratelimit())
- pr_warning("IRQ%u no longer affine to CPU%u\n", i,
- smp_processor_id());
+ pr_warn("IRQ%u no longer affine to CPU%u\n",
+ i, smp_processor_id());
}
local_irq_restore(flags);
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 4bf4cce759fe..eb2c4d55666b 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -146,8 +146,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
* continue. Otherwise, continue without this interrupt.
*/
if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
- pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
- irq, i);
+ pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+ irq, i);
continue;
}
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index a35f6ebbd2c2..a0a691d1cbee 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -306,7 +306,6 @@ void __show_regs(struct pt_regs *regs)
void show_regs(struct pt_regs * regs)
{
- printk("\n");
__show_regs(regs);
dump_stack();
}
@@ -474,19 +473,57 @@ int in_gate_area_no_mm(unsigned long addr)
const char *arch_vma_name(struct vm_area_struct *vma)
{
- return is_gate_vma(vma) ? "[vectors]" :
- (vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ?
- "[sigpage]" : NULL;
+ return is_gate_vma(vma) ? "[vectors]" : NULL;
+}
+
+/* If possible, provide a placement hint at a random offset from the
+ * stack for the signal page.
+ */
+static unsigned long sigpage_addr(const struct mm_struct *mm,
+ unsigned int npages)
+{
+ unsigned long offset;
+ unsigned long first;
+ unsigned long last;
+ unsigned long addr;
+ unsigned int slots;
+
+ first = PAGE_ALIGN(mm->start_stack);
+
+ last = TASK_SIZE - (npages << PAGE_SHIFT);
+
+ /* No room after stack? */
+ if (first > last)
+ return 0;
+
+ /* Just enough room? */
+ if (first == last)
+ return first;
+
+ slots = ((last - first) >> PAGE_SHIFT) + 1;
+
+ offset = get_random_int() % slots;
+
+ addr = first + (offset << PAGE_SHIFT);
+
+ return addr;
}
static struct page *signal_page;
extern struct page *get_signal_page(void);
+static const struct vm_special_mapping sigpage_mapping = {
+ .name = "[sigpage]",
+ .pages = &signal_page,
+};
+
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
unsigned long addr;
- int ret;
+ unsigned long hint;
+ int ret = 0;
if (!signal_page)
signal_page = get_signal_page();
@@ -494,18 +531,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
return -ENOMEM;
down_write(&mm->mmap_sem);
- addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+ hint = sigpage_addr(mm, 1);
+ addr = get_unmapped_area(NULL, hint, PAGE_SIZE, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
- ret = install_special_mapping(mm, addr, PAGE_SIZE,
+ vma = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
- &signal_page);
+ &sigpage_mapping);
+
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto up_fail;
+ }
- if (ret == 0)
- mm->context.sigpage = addr;
+ mm->context.sigpage = addr;
up_fail:
up_write(&mm->mmap_sem);
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
index fafedd86885d..98ea4b7eb406 100644
--- a/arch/arm/kernel/return_address.c
+++ b/arch/arm/kernel/return_address.c
@@ -59,15 +59,6 @@ void *return_address(unsigned int level)
#else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */
-#if defined(CONFIG_ARM_UNWIND)
-#warning "TODO: return_address should use unwind tables"
-#endif
-
-void *return_address(unsigned int level)
-{
- return NULL;
-}
-
#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */
EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 84db893dedc2..c03106378b49 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -133,6 +133,7 @@ struct stack {
u32 irq[3];
u32 abt[3];
u32 und[3];
+ u32 fiq[3];
} ____cacheline_aligned;
#ifndef CONFIG_CPU_V7M
@@ -470,7 +471,10 @@ void notrace cpu_init(void)
"msr cpsr_c, %5\n\t"
"add r14, %0, %6\n\t"
"mov sp, r14\n\t"
- "msr cpsr_c, %7"
+ "msr cpsr_c, %7\n\t"
+ "add r14, %0, %8\n\t"
+ "mov sp, r14\n\t"
+ "msr cpsr_c, %9"
:
: "r" (stk),
PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
@@ -479,6 +483,8 @@ void notrace cpu_init(void)
"I" (offsetof(struct stack, abt[0])),
PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE),
"I" (offsetof(struct stack, und[0])),
+ PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
+ "I" (offsetof(struct stack, fiq[0])),
PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
: "r14");
#endif
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 9388a3d479e1..39c74a2c3df9 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -95,6 +95,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
+ if (!smp_ops.smp_boot_secondary)
+ return -ENOSYS;
+
/*
* We need to tell the secondary core where to find
* its stack and the page tables.
@@ -113,7 +116,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
/*
* Now bring the CPU into our world.
*/
- ret = boot_secondary(cpu, idle);
+ ret = smp_ops.smp_boot_secondary(cpu, idle);
if (ret == 0) {
/*
* CPU was successfully started, wait for it
@@ -142,13 +145,6 @@ void __init smp_init_cpus(void)
smp_ops.smp_init_cpus();
}
-int boot_secondary(unsigned int cpu, struct task_struct *idle)
-{
- if (smp_ops.smp_boot_secondary)
- return smp_ops.smp_boot_secondary(cpu, idle);
- return -ENOSYS;
-}
-
int platform_can_cpu_hotplug(void)
{
#ifdef CONFIG_HOTPLUG_CPU
@@ -650,7 +646,7 @@ void smp_send_stop(void)
udelay(1);
if (num_online_cpus() > 1)
- pr_warning("SMP: failed to stop secondary CPUs\n");
+ pr_warn("SMP: failed to stop secondary CPUs\n");
}
/*
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index a964c9f40f87..0c8b10801d36 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -25,6 +25,7 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/irq.h>
#include <linux/atomic.h>
#include <asm/cacheflush.h>
@@ -460,10 +461,29 @@ die_sig:
arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6);
}
-asmlinkage void do_unexp_fiq (struct pt_regs *regs)
+/*
+ * Handle FIQ similarly to NMI on x86 systems.
+ *
+ * The runtime environment for NMIs is extremely restrictive
+ * (NMIs can pre-empt critical sections meaning almost all locking is
+ * forbidden) meaning this default FIQ handling must only be used in
+ * circumstances where non-maskability improves robustness, such as
+ * watchdog or debug logic.
+ *
+ * This handler is not appropriate for general purpose use in drivers
+ * platform code and can be overrideen using set_fiq_handler.
+ */
+asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs)
{
- printk("Hmm. Unexpected FIQ received, but trying to continue\n");
- printk("You may have a hardware problem...\n");
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ nmi_enter();
+
+ /* nop. FIQ handlers for special arch/arm features can be added here. */
+
+ nmi_exit();
+
+ set_irq_regs(old_regs);
}
/*
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index a61a1dfbb0db..cbb85c5fabf9 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -157,7 +157,7 @@ static const struct unwind_idx *search_index(unsigned long addr,
if (likely(start->addr_offset <= addr_prel31))
return start;
else {
- pr_warning("unwind: Unknown symbol address %08lx\n", addr);
+ pr_warn("unwind: Unknown symbol address %08lx\n", addr);
return NULL;
}
}
@@ -225,7 +225,7 @@ static unsigned long unwind_get_byte(struct unwind_ctrl_block *ctrl)
unsigned long ret;
if (ctrl->entries <= 0) {
- pr_warning("unwind: Corrupt unwind table\n");
+ pr_warn("unwind: Corrupt unwind table\n");
return 0;
}
@@ -333,8 +333,8 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
insn = (insn << 8) | unwind_get_byte(ctrl);
mask = insn & 0x0fff;
if (mask == 0) {
- pr_warning("unwind: 'Refuse to unwind' instruction %04lx\n",
- insn);
+ pr_warn("unwind: 'Refuse to unwind' instruction %04lx\n",
+ insn);
return -URC_FAILURE;
}
@@ -357,8 +357,8 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
unsigned long mask = unwind_get_byte(ctrl);
if (mask == 0 || mask & 0xf0) {
- pr_warning("unwind: Spare encoding %04lx\n",
- (insn << 8) | mask);
+ pr_warn("unwind: Spare encoding %04lx\n",
+ (insn << 8) | mask);
return -URC_FAILURE;
}
@@ -370,7 +370,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
ctrl->vrs[SP] += 0x204 + (uleb128 << 2);
} else {
- pr_warning("unwind: Unhandled instruction %02lx\n", insn);
+ pr_warn("unwind: Unhandled instruction %02lx\n", insn);
return -URC_FAILURE;
}
@@ -403,7 +403,7 @@ int unwind_frame(struct stackframe *frame)
idx = unwind_find_idx(frame->pc);
if (!idx) {
- pr_warning("unwind: Index not found %08lx\n", frame->pc);
+ pr_warn("unwind: Index not found %08lx\n", frame->pc);
return -URC_FAILURE;
}
@@ -422,8 +422,8 @@ int unwind_frame(struct stackframe *frame)
/* only personality routine 0 supported in the index */
ctrl.insn = &idx->insn;
else {
- pr_warning("unwind: Unsupported personality routine %08lx in the index at %p\n",
- idx->insn, idx);
+ pr_warn("unwind: Unsupported personality routine %08lx in the index at %p\n",
+ idx->insn, idx);
return -URC_FAILURE;
}
@@ -435,8 +435,8 @@ int unwind_frame(struct stackframe *frame)
ctrl.byte = 1;
ctrl.entries = 1 + ((*ctrl.insn & 0x00ff0000) >> 16);
} else {
- pr_warning("unwind: Unsupported personality routine %08lx at %p\n",
- *ctrl.insn, ctrl.insn);
+ pr_warn("unwind: Unsupported personality routine %08lx at %p\n",
+ *ctrl.insn, ctrl.insn);
return -URC_FAILURE;
}
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 6f57cb94367f..8e95aa47457a 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -219,8 +219,8 @@ SECTIONS
__data_loc = ALIGN(4); /* location in binary */
. = PAGE_OFFSET + TEXT_OFFSET;
#else
- __init_end = .;
. = ALIGN(THREAD_SIZE);
+ __init_end = .;
__data_loc = .;
#endif
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a99e0cdf8ba2..779605122f32 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -82,12 +82,12 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
/**
* kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
*/
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
{
return &kvm_arm_running_vcpu;
}
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
return 0;
}
@@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
int kvm_arch_hardware_setup(void)
{
return 0;
}
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
void kvm_arch_check_processor_compat(void *rtn)
{
*(int *)rtn = 0;
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
/**
* kvm_arch_init_vm - initializes a VM data structure
@@ -172,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm->vcpus[i] = NULL;
}
}
+
+ kvm_vgic_destroy(kvm);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -188,6 +179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ARM_PSCI:
case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_READONLY_MEM:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -253,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
+ kvm_vgic_vcpu_destroy(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@@ -268,26 +261,15 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
- int ret;
-
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu->arch.target = -1;
- /* Set up VGIC */
- ret = kvm_vgic_vcpu_init(vcpu);
- if (ret)
- return ret;
-
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
return 0;
}
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
vcpu->cpu = cpu;
@@ -428,9 +410,9 @@ static void update_vttbr(struct kvm *kvm)
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm->arch.pgd);
+ BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
- kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
- kvm->arch.vttbr |= vmid;
+ kvm->arch.vttbr = pgd_phys | vmid;
spin_unlock(&kvm_vmid_lock);
}
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 37a0fe1bb9bb..7928dbdf2102 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -791,7 +791,7 @@ static bool is_valid_cache(u32 val)
u32 level, ctype;
if (val >= CSSELR_MAX)
- return -ENOENT;
+ return false;
/* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
level = (val >> 1);
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 813e49258690..cc0b78769bd8 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -163,7 +163,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
if (ret != 0)
- return ret;
+ return -EFAULT;
return kvm_arm_timer_set_reg(vcpu, reg->id, val);
}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16e7994bf347..eea03069161b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -746,22 +746,29 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
return false;
}
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vcpu_trap_is_iabt(vcpu))
+ return false;
+
+ return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
- struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
{
int ret;
bool write_fault, writable, hugetlb = false, force_pte = false;
unsigned long mmu_seq;
gfn_t gfn = fault_ipa >> PAGE_SHIFT;
- unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
struct vm_area_struct *vma;
pfn_t pfn;
pgprot_t mem_type = PAGE_S2;
- write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+ write_fault = kvm_is_write_fault(vcpu);
if (fault_status == FSC_PERM && !write_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
@@ -863,7 +870,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
unsigned long fault_status;
phys_addr_t fault_ipa;
struct kvm_memory_slot *memslot;
- bool is_iabt;
+ unsigned long hva;
+ bool is_iabt, write_fault, writable;
gfn_t gfn;
int ret, idx;
@@ -874,17 +882,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- fault_status = kvm_vcpu_trap_get_fault(vcpu);
+ fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
- kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
- kvm_vcpu_trap_get_class(vcpu), fault_status);
+ kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+ kvm_vcpu_trap_get_class(vcpu),
+ (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+ (unsigned long)kvm_vcpu_get_hsr(vcpu));
return -EFAULT;
}
idx = srcu_read_lock(&vcpu->kvm->srcu);
gfn = fault_ipa >> PAGE_SHIFT;
- if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+ write_fault = kvm_is_write_fault(vcpu);
+ if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
if (is_iabt) {
/* Prefetch Abort on I/O address */
kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -892,13 +905,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- if (fault_status != FSC_FAULT) {
- kvm_err("Unsupported fault status on io memory: %#lx\n",
- fault_status);
- ret = -EFAULT;
- goto out_unlock;
- }
-
/*
* The IPA is reported as [MAX:12], so we need to
* complement it with the bottom 12 bits from the
@@ -910,9 +916,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- memslot = gfn_to_memslot(vcpu->kvm, gfn);
-
- ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
+ ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
out_unlock:
diff --git a/arch/arm/mach-at91/include/mach/at91_pio.h b/arch/arm/mach-at91/include/mach/at91_pio.h
index 732b11c37f1a..7b7366253ceb 100644
--- a/arch/arm/mach-at91/include/mach/at91_pio.h
+++ b/arch/arm/mach-at91/include/mach/at91_pio.h
@@ -71,4 +71,10 @@
#define ABCDSR_PERIPH_C 0x2
#define ABCDSR_PERIPH_D 0x3
+#define SAMA5D3_PIO_DRIVER1 0x118 /*PIO Driver 1 register offset*/
+#define SAMA5D3_PIO_DRIVER2 0x11C /*PIO Driver 2 register offset*/
+
+#define AT91SAM9X5_PIO_DRIVER1 0x114 /*PIO Driver 1 register offset*/
+#define AT91SAM9X5_PIO_DRIVER2 0x118 /*PIO Driver 2 register offset*/
+
#endif
diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig
index 04f9784ff0ed..c6f6ed1cbed0 100644
--- a/arch/arm/mach-sa1100/Kconfig
+++ b/arch/arm/mach-sa1100/Kconfig
@@ -58,6 +58,7 @@ config SA1100_H3100
bool "Compaq iPAQ H3100"
select ARM_SA1110_CPUFREQ
select HTC_EGPIO
+ select MFD_IPAQ_MICRO
help
Say Y here if you intend to run this kernel on the Compaq iPAQ
H3100 handheld computer. Information about this machine and the
@@ -69,6 +70,7 @@ config SA1100_H3600
bool "Compaq iPAQ H3600/H3700"
select ARM_SA1110_CPUFREQ
select HTC_EGPIO
+ select MFD_IPAQ_MICRO
help
Say Y here if you intend to run this kernel on the Compaq iPAQ
H3600 handheld computer. Information about this machine and the
diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c
index c79bf467fb7f..b1d4faa12f9a 100644
--- a/arch/arm/mach-sa1100/h3xxx.c
+++ b/arch/arm/mach-sa1100/h3xxx.c
@@ -25,6 +25,7 @@
#include <asm/mach/map.h>
#include <mach/h3xxx.h>
+#include <mach/irqs.h>
#include "generic.h"
@@ -244,9 +245,23 @@ static struct platform_device h3xxx_keys = {
},
};
+static struct resource h3xxx_micro_resources[] = {
+ DEFINE_RES_MEM(0x80010000, SZ_4K),
+ DEFINE_RES_MEM(0x80020000, SZ_4K),
+ DEFINE_RES_IRQ(IRQ_Ser1UART),
+};
+
+struct platform_device h3xxx_micro_asic = {
+ .name = "ipaq-h3xxx-micro",
+ .id = -1,
+ .resource = h3xxx_micro_resources,
+ .num_resources = ARRAY_SIZE(h3xxx_micro_resources),
+};
+
static struct platform_device *h3xxx_devices[] = {
&h3xxx_egpio,
&h3xxx_keys,
+ &h3xxx_micro_asic,
};
void __init h3xxx_mach_init(void)
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 5f2c988a06ac..55f9d6e0cc88 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/spinlock.h>
+#include <linux/log2.h>
#include <linux/io.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -945,6 +946,98 @@ static int l2_wt_override;
* pass it though the device tree */
static u32 cache_id_part_number_from_dt;
+/**
+ * l2x0_cache_size_of_parse() - read cache size parameters from DT
+ * @np: the device tree node for the l2 cache
+ * @aux_val: pointer to machine-supplied auxilary register value, to
+ * be augmented by the call (bits to be set to 1)
+ * @aux_mask: pointer to machine-supplied auxilary register mask, to
+ * be augmented by the call (bits to be set to 0)
+ * @associativity: variable to return the calculated associativity in
+ * @max_way_size: the maximum size in bytes for the cache ways
+ */
+static void __init l2x0_cache_size_of_parse(const struct device_node *np,
+ u32 *aux_val, u32 *aux_mask,
+ u32 *associativity,
+ u32 max_way_size)
+{
+ u32 mask = 0, val = 0;
+ u32 cache_size = 0, sets = 0;
+ u32 way_size_bits = 1;
+ u32 way_size = 0;
+ u32 block_size = 0;
+ u32 line_size = 0;
+
+ of_property_read_u32(np, "cache-size", &cache_size);
+ of_property_read_u32(np, "cache-sets", &sets);
+ of_property_read_u32(np, "cache-block-size", &block_size);
+ of_property_read_u32(np, "cache-line-size", &line_size);
+
+ if (!cache_size || !sets)
+ return;
+
+ /* All these l2 caches have the same line = block size actually */
+ if (!line_size) {
+ if (block_size) {
+ /* If linesize if not given, it is equal to blocksize */
+ line_size = block_size;
+ } else {
+ /* Fall back to known size */
+ pr_warn("L2C OF: no cache block/line size given: "
+ "falling back to default size %d bytes\n",
+ CACHE_LINE_SIZE);
+ line_size = CACHE_LINE_SIZE;
+ }
+ }
+
+ if (line_size != CACHE_LINE_SIZE)
+ pr_warn("L2C OF: DT supplied line size %d bytes does "
+ "not match hardware line size of %d bytes\n",
+ line_size,
+ CACHE_LINE_SIZE);
+
+ /*
+ * Since:
+ * set size = cache size / sets
+ * ways = cache size / (sets * line size)
+ * way size = cache size / (cache size / (sets * line size))
+ * way size = sets * line size
+ * associativity = ways = cache size / way size
+ */
+ way_size = sets * line_size;
+ *associativity = cache_size / way_size;
+
+ if (way_size > max_way_size) {
+ pr_err("L2C OF: set size %dKB is too large\n", way_size);
+ return;
+ }
+
+ pr_info("L2C OF: override cache size: %d bytes (%dKB)\n",
+ cache_size, cache_size >> 10);
+ pr_info("L2C OF: override line size: %d bytes\n", line_size);
+ pr_info("L2C OF: override way size: %d bytes (%dKB)\n",
+ way_size, way_size >> 10);
+ pr_info("L2C OF: override associativity: %d\n", *associativity);
+
+ /*
+ * Calculates the bits 17:19 to set for way size:
+ * 512KB -> 6, 256KB -> 5, ... 16KB -> 1
+ */
+ way_size_bits = ilog2(way_size >> 10) - 3;
+ if (way_size_bits < 1 || way_size_bits > 6) {
+ pr_err("L2C OF: cache way size illegal: %dKB is not mapped\n",
+ way_size);
+ return;
+ }
+
+ mask |= L2C_AUX_CTRL_WAY_SIZE_MASK;
+ val |= (way_size_bits << L2C_AUX_CTRL_WAY_SIZE_SHIFT);
+
+ *aux_val &= ~mask;
+ *aux_val |= val;
+ *aux_mask &= ~mask;
+}
+
static void __init l2x0_of_parse(const struct device_node *np,
u32 *aux_val, u32 *aux_mask)
{
@@ -952,6 +1045,7 @@ static void __init l2x0_of_parse(const struct device_node *np,
u32 tag = 0;
u32 dirty = 0;
u32 val = 0, mask = 0;
+ u32 assoc;
of_property_read_u32(np, "arm,tag-latency", &tag);
if (tag) {
@@ -974,6 +1068,15 @@ static void __init l2x0_of_parse(const struct device_node *np,
val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
}
+ l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K);
+ if (assoc > 8) {
+ pr_err("l2x0 of: cache setting yield too high associativity\n");
+ pr_err("l2x0 of: %d calculated, max 8\n", assoc);
+ } else {
+ mask |= L2X0_AUX_CTRL_ASSOC_MASK;
+ val |= (assoc << L2X0_AUX_CTRL_ASSOC_SHIFT);
+ }
+
*aux_val &= ~mask;
*aux_val |= val;
*aux_mask &= ~mask;
@@ -1021,6 +1124,7 @@ static void __init l2c310_of_parse(const struct device_node *np,
u32 data[3] = { 0, 0, 0 };
u32 tag[3] = { 0, 0, 0 };
u32 filter[2] = { 0, 0 };
+ u32 assoc;
of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
if (tag[0] && tag[1] && tag[2])
@@ -1047,6 +1151,23 @@ static void __init l2c310_of_parse(const struct device_node *np,
writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
l2x0_base + L310_ADDR_FILTER_START);
}
+
+ l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
+ switch (assoc) {
+ case 16:
+ *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+ *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16;
+ *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+ break;
+ case 8:
+ *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+ *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+ break;
+ default:
+ pr_err("PL310 OF: cache setting yield illegal associativity\n");
+ pr_err("PL310 OF: %d calculated, only 8 and 16 legal\n", assoc);
+ break;
+ }
}
static const struct l2c_init_data of_l2c310_data __initconst = {
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index c447ec70e868..e7a81cebbb2e 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -27,7 +27,7 @@ static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
if (pud_none_or_clear_bad(pud) || (pud_val(*pud) & L_PGD_SWAPPER)) {
pmd = pmd_alloc_one(&init_mm, addr);
if (!pmd) {
- pr_warning("Failed to allocate identity pmd.\n");
+ pr_warn("Failed to allocate identity pmd.\n");
return;
}
/*
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 659c75d808dc..9221645dd192 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -636,6 +636,11 @@ static int keep_initrd;
void free_initrd_mem(unsigned long start, unsigned long end)
{
if (!keep_initrd) {
+ if (start == initrd_start)
+ start = round_down(start, PAGE_SIZE);
+ if (end == initrd_end)
+ end = round_up(end, PAGE_SIZE);
+
poison_init_mem((void *)start, PAGE_ALIGN(end) - start);
free_reserved_area((void *)start, (void *)end, -1, "initrd");
}
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 8348ed6b2efe..9f98cec7fe1e 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -223,13 +223,13 @@ early_param("ecc", early_ecc);
static int __init early_cachepolicy(char *p)
{
- pr_warning("cachepolicy kernel parameter not supported without cp15\n");
+ pr_warn("cachepolicy kernel parameter not supported without cp15\n");
}
early_param("cachepolicy", early_cachepolicy);
static int __init noalign_setup(char *__unused)
{
- pr_warning("noalign kernel parameter not supported without cp15\n");
+ pr_warn("noalign kernel parameter not supported without cp15\n");
}
__setup("noalign", noalign_setup);
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index b5d67db20897..b3a947863ac7 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -570,7 +570,7 @@ __v7_ca15mp_proc_info:
__v7_b15mp_proc_info:
.long 0x420f00f0
.long 0xff0ffff0
- __v7_proc __v7_b15mp_setup, hwcaps = HWCAP_IDIV
+ __v7_proc __v7_b15mp_setup
.size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info
/*
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fd4e81a4e1ce..f0d3a2d85a5b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -35,6 +35,7 @@ config ARM64
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
+ select HAVE_BPF_JIT
select HAVE_C_RECORDMCOUNT
select HAVE_CC_STACKPROTECTOR
select HAVE_DEBUG_BUGVERBOSE
@@ -252,11 +253,11 @@ config SCHED_SMT
places. If unsure say N here.
config NR_CPUS
- int "Maximum number of CPUs (2-32)"
- range 2 32
+ int "Maximum number of CPUs (2-64)"
+ range 2 64
depends on SMP
# These have to remain sorted largest to smallest
- default "8"
+ default "64"
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 4ee8e90b7a45..0a12933e50ed 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -43,4 +43,15 @@ config ARM64_RANDOMIZE_TEXT_OFFSET
of TEXT_OFFSET and platforms must not require a specific
value.
+config DEBUG_SET_MODULE_RONX
+ bool "Set loadable kernel module data as NX and text as RO"
+ depends on MODULES
+ help
+ This option helps catch unintended modifications to loadable
+ kernel module's text and read-only data. It also prevents execution
+ of module data. Such protection may interfere with run-time code
+ patching and dynamic kernel tracing - and they might also protect
+ against certain classes of kernel exploits.
+ If in doubt, say "N".
+
endmenu
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2df5e5daeebe..59c86b6b3052 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -47,6 +47,7 @@ endif
export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/
+core-$(CONFIG_NET) += arch/arm64/net/
core-$(CONFIG_KVM) += arch/arm64/kvm/
core-$(CONFIG_XEN) += arch/arm64/xen/
core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index f2defe1c380c..689b6379188c 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -148,4 +148,8 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
{
}
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
#endif
diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h
index 7a2e0762cb40..4c631a0a3609 100644
--- a/arch/arm64/include/asm/cachetype.h
+++ b/arch/arm64/include/asm/cachetype.h
@@ -39,6 +39,26 @@
extern unsigned long __icache_flags;
+#define CCSIDR_EL1_LINESIZE_MASK 0x7
+#define CCSIDR_EL1_LINESIZE(x) ((x) & CCSIDR_EL1_LINESIZE_MASK)
+
+#define CCSIDR_EL1_NUMSETS_SHIFT 13
+#define CCSIDR_EL1_NUMSETS_MASK (0x7fff << CCSIDR_EL1_NUMSETS_SHIFT)
+#define CCSIDR_EL1_NUMSETS(x) \
+ (((x) & CCSIDR_EL1_NUMSETS_MASK) >> CCSIDR_EL1_NUMSETS_SHIFT)
+
+extern u64 __attribute_const__ icache_get_ccsidr(void);
+
+static inline int icache_get_linesize(void)
+{
+ return 16 << CCSIDR_EL1_LINESIZE(icache_get_ccsidr());
+}
+
+static inline int icache_get_numsets(void)
+{
+ return 1 + CCSIDR_EL1_NUMSETS(icache_get_ccsidr());
+}
+
/*
* Whilst the D-side always behaves as PIPT on AArch64, aliasing is
* permitted in the I-cache.
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index d7b4b38a8e86..6f8e2ef9094a 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -28,6 +28,8 @@ struct device_node;
* enable-method property.
* @cpu_init: Reads any data necessary for a specific enable-method from the
* devicetree, for a given cpu node and proposed logical id.
+ * @cpu_init_idle: Reads any data necessary to initialize CPU idle states from
+ * devicetree, for a given cpu node and proposed logical id.
* @cpu_prepare: Early one-time preparation step for a cpu. If there is a
* mechanism for doing so, tests whether it is possible to boot
* the given CPU.
@@ -47,6 +49,7 @@ struct device_node;
struct cpu_operations {
const char *name;
int (*cpu_init)(struct device_node *, unsigned int);
+ int (*cpu_init_idle)(struct device_node *, unsigned int);
int (*cpu_prepare)(unsigned int);
int (*cpu_boot)(unsigned int);
void (*cpu_postboot)(void);
@@ -61,7 +64,7 @@ struct cpu_operations {
};
extern const struct cpu_operations *cpu_ops[NR_CPUS];
-extern int __init cpu_read_ops(struct device_node *dn, int cpu);
-extern void __init cpu_read_bootcpu_ops(void);
+int __init cpu_read_ops(struct device_node *dn, int cpu);
+void __init cpu_read_bootcpu_ops(void);
#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h
new file mode 100644
index 000000000000..b52a9932e2b1
--- /dev/null
+++ b/arch/arm64/include/asm/cpuidle.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_CPUIDLE_H
+#define __ASM_CPUIDLE_H
+
+#ifdef CONFIG_CPU_IDLE
+extern int cpu_init_idle(unsigned int cpu);
+#else
+static inline int cpu_init_idle(unsigned int cpu)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7fb343779498..40ec68aa6870 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -48,11 +48,13 @@
/*
* #imm16 values used for BRK instruction generation
* Allowed values for kgbd are 0x400 - 0x7ff
+ * 0x100: for triggering a fault on purpose (reserved)
* 0x400: for dynamic BRK instruction
* 0x401: for compile time BRK instruction
*/
-#define KGDB_DYN_DGB_BRK_IMM 0x400
-#define KDBG_COMPILED_DBG_BRK_IMM 0x401
+#define FAULT_BRK_IMM 0x100
+#define KGDB_DYN_DBG_BRK_IMM 0x400
+#define KGDB_COMPILED_DBG_BRK_IMM 0x401
/*
* BRK instruction encoding
@@ -61,24 +63,30 @@
#define AARCH64_BREAK_MON 0xd4200000
/*
+ * BRK instruction for provoking a fault on purpose
+ * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
+ */
+#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
+
+/*
* Extract byte from BRK instruction
*/
-#define KGDB_DYN_DGB_BRK_INS_BYTE(x) \
+#define KGDB_DYN_DBG_BRK_INS_BYTE(x) \
((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff)
/*
* Extract byte from BRK #imm16
*/
-#define KGBD_DYN_DGB_BRK_IMM_BYTE(x) \
- (((((KGDB_DYN_DGB_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff)
+#define KGBD_DYN_DBG_BRK_IMM_BYTE(x) \
+ (((((KGDB_DYN_DBG_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff)
-#define KGDB_DYN_DGB_BRK_BYTE(x) \
- (KGDB_DYN_DGB_BRK_INS_BYTE(x) | KGBD_DYN_DGB_BRK_IMM_BYTE(x))
+#define KGDB_DYN_DBG_BRK_BYTE(x) \
+ (KGDB_DYN_DBG_BRK_INS_BYTE(x) | KGBD_DYN_DBG_BRK_IMM_BYTE(x))
-#define KGDB_DYN_BRK_INS_BYTE0 KGDB_DYN_DGB_BRK_BYTE(0)
-#define KGDB_DYN_BRK_INS_BYTE1 KGDB_DYN_DGB_BRK_BYTE(1)
-#define KGDB_DYN_BRK_INS_BYTE2 KGDB_DYN_DGB_BRK_BYTE(2)
-#define KGDB_DYN_BRK_INS_BYTE3 KGDB_DYN_DGB_BRK_BYTE(3)
+#define KGDB_DYN_BRK_INS_BYTE0 KGDB_DYN_DBG_BRK_BYTE(0)
+#define KGDB_DYN_BRK_INS_BYTE1 KGDB_DYN_DBG_BRK_BYTE(1)
+#define KGDB_DYN_BRK_INS_BYTE2 KGDB_DYN_DBG_BRK_BYTE(2)
+#define KGDB_DYN_BRK_INS_BYTE3 KGDB_DYN_DBG_BRK_BYTE(3)
#define CACHE_FLUSH_IS_SAFE 1
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index dc82e52acdb3..adeae3f6f0fc 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -52,6 +52,13 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
dev->archdata.dma_ops = ops;
}
+static inline int set_arch_dma_coherent_ops(struct device *dev)
+{
+ set_dma_ops(dev, &coherent_swiotlb_dma_ops);
+ return 0;
+}
+#define set_arch_dma_coherent_ops set_arch_dma_coherent_ops
+
#include <asm-generic/dma-mapping-common.h>
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index dc1f73b13e74..56a9e63b6c33 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -2,6 +2,8 @@
* Copyright (C) 2013 Huawei Ltd.
* Author: Jiang Liu <liuj97@gmail.com>
*
+ * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
@@ -64,12 +66,155 @@ enum aarch64_insn_imm_type {
AARCH64_INSN_IMM_14,
AARCH64_INSN_IMM_12,
AARCH64_INSN_IMM_9,
+ AARCH64_INSN_IMM_7,
+ AARCH64_INSN_IMM_6,
+ AARCH64_INSN_IMM_S,
+ AARCH64_INSN_IMM_R,
AARCH64_INSN_IMM_MAX
};
+enum aarch64_insn_register_type {
+ AARCH64_INSN_REGTYPE_RT,
+ AARCH64_INSN_REGTYPE_RN,
+ AARCH64_INSN_REGTYPE_RT2,
+ AARCH64_INSN_REGTYPE_RM,
+ AARCH64_INSN_REGTYPE_RD,
+ AARCH64_INSN_REGTYPE_RA,
+};
+
+enum aarch64_insn_register {
+ AARCH64_INSN_REG_0 = 0,
+ AARCH64_INSN_REG_1 = 1,
+ AARCH64_INSN_REG_2 = 2,
+ AARCH64_INSN_REG_3 = 3,
+ AARCH64_INSN_REG_4 = 4,
+ AARCH64_INSN_REG_5 = 5,
+ AARCH64_INSN_REG_6 = 6,
+ AARCH64_INSN_REG_7 = 7,
+ AARCH64_INSN_REG_8 = 8,
+ AARCH64_INSN_REG_9 = 9,
+ AARCH64_INSN_REG_10 = 10,
+ AARCH64_INSN_REG_11 = 11,
+ AARCH64_INSN_REG_12 = 12,
+ AARCH64_INSN_REG_13 = 13,
+ AARCH64_INSN_REG_14 = 14,
+ AARCH64_INSN_REG_15 = 15,
+ AARCH64_INSN_REG_16 = 16,
+ AARCH64_INSN_REG_17 = 17,
+ AARCH64_INSN_REG_18 = 18,
+ AARCH64_INSN_REG_19 = 19,
+ AARCH64_INSN_REG_20 = 20,
+ AARCH64_INSN_REG_21 = 21,
+ AARCH64_INSN_REG_22 = 22,
+ AARCH64_INSN_REG_23 = 23,
+ AARCH64_INSN_REG_24 = 24,
+ AARCH64_INSN_REG_25 = 25,
+ AARCH64_INSN_REG_26 = 26,
+ AARCH64_INSN_REG_27 = 27,
+ AARCH64_INSN_REG_28 = 28,
+ AARCH64_INSN_REG_29 = 29,
+ AARCH64_INSN_REG_FP = 29, /* Frame pointer */
+ AARCH64_INSN_REG_30 = 30,
+ AARCH64_INSN_REG_LR = 30, /* Link register */
+ AARCH64_INSN_REG_ZR = 31, /* Zero: as source register */
+ AARCH64_INSN_REG_SP = 31 /* Stack pointer: as load/store base reg */
+};
+
+enum aarch64_insn_variant {
+ AARCH64_INSN_VARIANT_32BIT,
+ AARCH64_INSN_VARIANT_64BIT
+};
+
+enum aarch64_insn_condition {
+ AARCH64_INSN_COND_EQ = 0x0, /* == */
+ AARCH64_INSN_COND_NE = 0x1, /* != */
+ AARCH64_INSN_COND_CS = 0x2, /* unsigned >= */
+ AARCH64_INSN_COND_CC = 0x3, /* unsigned < */
+ AARCH64_INSN_COND_MI = 0x4, /* < 0 */
+ AARCH64_INSN_COND_PL = 0x5, /* >= 0 */
+ AARCH64_INSN_COND_VS = 0x6, /* overflow */
+ AARCH64_INSN_COND_VC = 0x7, /* no overflow */
+ AARCH64_INSN_COND_HI = 0x8, /* unsigned > */
+ AARCH64_INSN_COND_LS = 0x9, /* unsigned <= */
+ AARCH64_INSN_COND_GE = 0xa, /* signed >= */
+ AARCH64_INSN_COND_LT = 0xb, /* signed < */
+ AARCH64_INSN_COND_GT = 0xc, /* signed > */
+ AARCH64_INSN_COND_LE = 0xd, /* signed <= */
+ AARCH64_INSN_COND_AL = 0xe, /* always */
+};
+
enum aarch64_insn_branch_type {
AARCH64_INSN_BRANCH_NOLINK,
AARCH64_INSN_BRANCH_LINK,
+ AARCH64_INSN_BRANCH_RETURN,
+ AARCH64_INSN_BRANCH_COMP_ZERO,
+ AARCH64_INSN_BRANCH_COMP_NONZERO,
+};
+
+enum aarch64_insn_size_type {
+ AARCH64_INSN_SIZE_8,
+ AARCH64_INSN_SIZE_16,
+ AARCH64_INSN_SIZE_32,
+ AARCH64_INSN_SIZE_64,
+};
+
+enum aarch64_insn_ldst_type {
+ AARCH64_INSN_LDST_LOAD_REG_OFFSET,
+ AARCH64_INSN_LDST_STORE_REG_OFFSET,
+ AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX,
+ AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
+ AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
+ AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
+};
+
+enum aarch64_insn_adsb_type {
+ AARCH64_INSN_ADSB_ADD,
+ AARCH64_INSN_ADSB_SUB,
+ AARCH64_INSN_ADSB_ADD_SETFLAGS,
+ AARCH64_INSN_ADSB_SUB_SETFLAGS
+};
+
+enum aarch64_insn_movewide_type {
+ AARCH64_INSN_MOVEWIDE_ZERO,
+ AARCH64_INSN_MOVEWIDE_KEEP,
+ AARCH64_INSN_MOVEWIDE_INVERSE
+};
+
+enum aarch64_insn_bitfield_type {
+ AARCH64_INSN_BITFIELD_MOVE,
+ AARCH64_INSN_BITFIELD_MOVE_UNSIGNED,
+ AARCH64_INSN_BITFIELD_MOVE_SIGNED
+};
+
+enum aarch64_insn_data1_type {
+ AARCH64_INSN_DATA1_REVERSE_16,
+ AARCH64_INSN_DATA1_REVERSE_32,
+ AARCH64_INSN_DATA1_REVERSE_64,
+};
+
+enum aarch64_insn_data2_type {
+ AARCH64_INSN_DATA2_UDIV,
+ AARCH64_INSN_DATA2_SDIV,
+ AARCH64_INSN_DATA2_LSLV,
+ AARCH64_INSN_DATA2_LSRV,
+ AARCH64_INSN_DATA2_ASRV,
+ AARCH64_INSN_DATA2_RORV,
+};
+
+enum aarch64_insn_data3_type {
+ AARCH64_INSN_DATA3_MADD,
+ AARCH64_INSN_DATA3_MSUB,
+};
+
+enum aarch64_insn_logic_type {
+ AARCH64_INSN_LOGIC_AND,
+ AARCH64_INSN_LOGIC_BIC,
+ AARCH64_INSN_LOGIC_ORR,
+ AARCH64_INSN_LOGIC_ORN,
+ AARCH64_INSN_LOGIC_EOR,
+ AARCH64_INSN_LOGIC_EON,
+ AARCH64_INSN_LOGIC_AND_SETFLAGS,
+ AARCH64_INSN_LOGIC_BIC_SETFLAGS
};
#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
@@ -78,13 +223,58 @@ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
{ return (val); }
+__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
+__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
+__AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000)
+__AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000)
+__AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000)
+__AARCH64_INSN_FUNCS(ldp_pre, 0x7FC00000, 0x29C00000)
+__AARCH64_INSN_FUNCS(add_imm, 0x7F000000, 0x11000000)
+__AARCH64_INSN_FUNCS(adds_imm, 0x7F000000, 0x31000000)
+__AARCH64_INSN_FUNCS(sub_imm, 0x7F000000, 0x51000000)
+__AARCH64_INSN_FUNCS(subs_imm, 0x7F000000, 0x71000000)
+__AARCH64_INSN_FUNCS(movn, 0x7F800000, 0x12800000)
+__AARCH64_INSN_FUNCS(sbfm, 0x7F800000, 0x13000000)
+__AARCH64_INSN_FUNCS(bfm, 0x7F800000, 0x33000000)
+__AARCH64_INSN_FUNCS(movz, 0x7F800000, 0x52800000)
+__AARCH64_INSN_FUNCS(ubfm, 0x7F800000, 0x53000000)
+__AARCH64_INSN_FUNCS(movk, 0x7F800000, 0x72800000)
+__AARCH64_INSN_FUNCS(add, 0x7F200000, 0x0B000000)
+__AARCH64_INSN_FUNCS(adds, 0x7F200000, 0x2B000000)
+__AARCH64_INSN_FUNCS(sub, 0x7F200000, 0x4B000000)
+__AARCH64_INSN_FUNCS(subs, 0x7F200000, 0x6B000000)
+__AARCH64_INSN_FUNCS(madd, 0x7FE08000, 0x1B000000)
+__AARCH64_INSN_FUNCS(msub, 0x7FE08000, 0x1B008000)
+__AARCH64_INSN_FUNCS(udiv, 0x7FE0FC00, 0x1AC00800)
+__AARCH64_INSN_FUNCS(sdiv, 0x7FE0FC00, 0x1AC00C00)
+__AARCH64_INSN_FUNCS(lslv, 0x7FE0FC00, 0x1AC02000)
+__AARCH64_INSN_FUNCS(lsrv, 0x7FE0FC00, 0x1AC02400)
+__AARCH64_INSN_FUNCS(asrv, 0x7FE0FC00, 0x1AC02800)
+__AARCH64_INSN_FUNCS(rorv, 0x7FE0FC00, 0x1AC02C00)
+__AARCH64_INSN_FUNCS(rev16, 0x7FFFFC00, 0x5AC00400)
+__AARCH64_INSN_FUNCS(rev32, 0x7FFFFC00, 0x5AC00800)
+__AARCH64_INSN_FUNCS(rev64, 0x7FFFFC00, 0x5AC00C00)
+__AARCH64_INSN_FUNCS(and, 0x7F200000, 0x0A000000)
+__AARCH64_INSN_FUNCS(bic, 0x7F200000, 0x0A200000)
+__AARCH64_INSN_FUNCS(orr, 0x7F200000, 0x2A000000)
+__AARCH64_INSN_FUNCS(orn, 0x7F200000, 0x2A200000)
+__AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000)
+__AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000)
+__AARCH64_INSN_FUNCS(ands, 0x7F200000, 0x6A000000)
+__AARCH64_INSN_FUNCS(bics, 0x7F200000, 0x6A200000)
__AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000)
__AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000)
+__AARCH64_INSN_FUNCS(cbz, 0xFE000000, 0x34000000)
+__AARCH64_INSN_FUNCS(cbnz, 0xFE000000, 0x35000000)
+__AARCH64_INSN_FUNCS(bcond, 0xFF000010, 0x54000000)
__AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001)
__AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002)
__AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003)
__AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000)
__AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F)
+__AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000)
+__AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000)
+__AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000)
#undef __AARCH64_INSN_FUNCS
@@ -97,8 +287,67 @@ u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
u32 insn, u64 imm);
u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
enum aarch64_insn_branch_type type);
+u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_branch_type type);
+u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_condition cond);
u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_op op);
u32 aarch64_insn_gen_nop(void);
+u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
+ enum aarch64_insn_branch_type type);
+u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_register offset,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
+ enum aarch64_insn_register reg2,
+ enum aarch64_insn_register base,
+ int offset,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ int imm, enum aarch64_insn_variant variant,
+ enum aarch64_insn_adsb_type type);
+u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ int immr, int imms,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_bitfield_type type);
+u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst,
+ int imm, int shift,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_movewide_type type);
+u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg,
+ int shift,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_adsb_type type);
+u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_data1_type type);
+u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_data2_type type);
+u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg1,
+ enum aarch64_insn_register reg2,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_data3_type type);
+u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg,
+ int shift,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_logic_type type);
bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index e0ecdcf6632d..f771e8bcad4a 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -243,7 +243,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
* (PHYS_OFFSET and PHYS_MASK taken into account).
*/
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
-extern int valid_phys_addr_range(unsigned long addr, size_t size);
+extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
extern int devmem_is_allowed(unsigned long pfn);
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
index 3c8aafc1082f..f69f69c8120c 100644
--- a/arch/arm64/include/asm/kgdb.h
+++ b/arch/arm64/include/asm/kgdb.h
@@ -29,7 +29,7 @@
static inline void arch_kgdb_breakpoint(void)
{
- asm ("brk %0" : : "I" (KDBG_COMPILED_DBG_BRK_IMM));
+ asm ("brk %0" : : "I" (KGDB_COMPILED_DBG_BRK_IMM));
}
extern void kgdb_handle_bus_error(void);
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520459ed..7fd3e27e3ccc 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -122,6 +122,17 @@
#define VTCR_EL2_T0SZ_MASK 0x3f
#define VTCR_EL2_T0SZ_40B 24
+/*
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together.
+ *
+ * The magic numbers used for VTTBR_X in this patch can be found in Tables
+ * D4-23 and D4-25 in ARM DDI 0487A.b.
+ */
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
@@ -149,7 +160,7 @@
#endif
#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
#define VTTBR_VMID_SHIFT (48LLU)
#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fdc3e21abd8d..5674a55b5518 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -174,6 +174,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
{
+ return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+{
return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e10c45a578e3..2012c4ba8d67 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
#ifndef __ARM64_KVM_HOST_H__
#define __ARM64_KVM_HOST_H__
+#include <linux/types.h>
+#include <linux/kvm_types.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
@@ -41,8 +43,7 @@
#define KVM_VCPU_MAX_FEATURES 3
-struct kvm_vcpu;
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
int kvm_arch_dev_ioctl_check_extension(long ext);
@@ -164,25 +165,23 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
-struct kvm_vcpu_init;
int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init);
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
#define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
/* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
return 0;
}
@@ -192,8 +191,13 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
return 0;
}
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address)
+{
+}
+
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
u64 kvm_call_hyp(void *hypfn, ...);
@@ -244,4 +248,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
}
}
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 8e138c7c53ac..a030d163840b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -59,10 +59,9 @@
#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
/*
- * Align KVM with the kernel's view of physical memory. Should be
- * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ * We currently only support a 40bit IPA.
*/
-#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT
+#define KVM_PHYS_SHIFT (40)
#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
@@ -93,19 +92,6 @@ void kvm_clear_hyp_idmap(void);
#define kvm_set_pte(ptep, pte) set_pte(ptep, pte)
#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
-static inline bool kvm_is_write_fault(unsigned long esr)
-{
- unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
-
- if (esr_ec == ESR_EL2_EC_IABT)
- return false;
-
- if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
- return false;
-
- return true;
-}
-
static inline void kvm_clean_pgd(pgd_t *pgd) {}
static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
static inline void kvm_clean_pte(pte_t *pte) {}
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 453a179469a3..5279e5733386 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -26,13 +26,13 @@ static inline void set_my_cpu_offset(unsigned long off)
static inline unsigned long __my_cpu_offset(void)
{
unsigned long off;
- register unsigned long *sp asm ("sp");
/*
* We want to allow caching the value, so avoid using volatile and
* instead use a fake stack read to hazard against barrier().
*/
- asm("mrs %0, tpidr_el1" : "=r" (off) : "Q" (*sp));
+ asm("mrs %0, tpidr_el1" : "=r" (off) :
+ "Q" (*(const unsigned long *)current_stack_pointer));
return off;
}
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index ffe1ba0506d1..d58e40cde88e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -149,46 +149,51 @@ extern struct page *empty_zero_page;
#define pte_valid_not_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
-static inline pte_t pte_wrprotect(pte_t pte)
+static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
{
- pte_val(pte) &= ~PTE_WRITE;
+ pte_val(pte) &= ~pgprot_val(prot);
return pte;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
{
- pte_val(pte) |= PTE_WRITE;
+ pte_val(pte) |= pgprot_val(prot);
return pte;
}
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_WRITE));
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_WRITE));
+}
+
static inline pte_t pte_mkclean(pte_t pte)
{
- pte_val(pte) &= ~PTE_DIRTY;
- return pte;
+ return clear_pte_bit(pte, __pgprot(PTE_DIRTY));
}
static inline pte_t pte_mkdirty(pte_t pte)
{
- pte_val(pte) |= PTE_DIRTY;
- return pte;
+ return set_pte_bit(pte, __pgprot(PTE_DIRTY));
}
static inline pte_t pte_mkold(pte_t pte)
{
- pte_val(pte) &= ~PTE_AF;
- return pte;
+ return clear_pte_bit(pte, __pgprot(PTE_AF));
}
static inline pte_t pte_mkyoung(pte_t pte)
{
- pte_val(pte) |= PTE_AF;
- return pte;
+ return set_pte_bit(pte, __pgprot(PTE_AF));
}
static inline pte_t pte_mkspecial(pte_t pte)
{
- pte_val(pte) |= PTE_SPECIAL;
- return pte;
+ return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
}
static inline void set_pte(pte_t *ptep, pte_t pte)
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 0c657bb54597..9a8fd84f8fb2 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -32,6 +32,8 @@ extern void cpu_cache_off(void);
extern void cpu_do_idle(void);
extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+void cpu_soft_restart(phys_addr_t cpu_reset,
+ unsigned long addr) __attribute__((noreturn));
extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index e9c149c042e0..456d67c1f0fa 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -21,6 +21,7 @@ struct sleep_save_sp {
phys_addr_t save_ptr_stash_phys;
};
+extern int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
extern void cpu_resume(void);
extern int cpu_suspend(unsigned long);
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 45108d802f5e..459bf8e53208 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -69,14 +69,19 @@ struct thread_info {
#define init_stack (init_thread_union.stack)
/*
+ * how to get the current stack pointer from C
+ */
+register unsigned long current_stack_pointer asm ("sp");
+
+/*
* how to get the thread information struct from C
*/
static inline struct thread_info *current_thread_info(void) __attribute_const__;
static inline struct thread_info *current_thread_info(void)
{
- register unsigned long sp asm ("sp");
- return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
+ return (struct thread_info *)
+ (current_stack_pointer & ~(THREAD_SIZE - 1));
}
#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index e633ff8cdec8..8e38878c87c6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -37,6 +37,7 @@
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -159,6 +160,7 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index df7ef8768fc2..6e9538c2d28a 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -26,6 +26,7 @@ arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o
+arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o
arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
arm64-obj-$(CONFIG_KGDB) += kgdb.o
arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
new file mode 100644
index 000000000000..19d17f51db37
--- /dev/null
+++ b/arch/arm64/kernel/cpuidle.c
@@ -0,0 +1,31 @@
+/*
+ * ARM64 CPU idle arch support
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/cpuidle.h>
+#include <asm/cpu_ops.h>
+
+int cpu_init_idle(unsigned int cpu)
+{
+ int ret = -EOPNOTSUPP;
+ struct device_node *cpu_node = of_cpu_device_node_get(cpu);
+
+ if (!cpu_node)
+ return -ENODEV;
+
+ if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle)
+ ret = cpu_ops[cpu]->cpu_init_idle(cpu_node, cpu);
+
+ of_node_put(cpu_node);
+ return ret;
+}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 177169623026..504fdaa8367e 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -20,8 +20,10 @@
#include <asm/cputype.h>
#include <linux/bitops.h>
+#include <linux/bug.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/preempt.h>
#include <linux/printk.h>
#include <linux/smp.h>
@@ -47,8 +49,18 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
unsigned int cpu = smp_processor_id();
u32 l1ip = CTR_L1IP(info->reg_ctr);
- if (l1ip != ICACHE_POLICY_PIPT)
- set_bit(ICACHEF_ALIASING, &__icache_flags);
+ if (l1ip != ICACHE_POLICY_PIPT) {
+ /*
+ * VIPT caches are non-aliasing if the VA always equals the PA
+ * in all bit positions that are covered by the index. This is
+ * the case if the size of a way (# of sets * line size) does
+ * not exceed PAGE_SIZE.
+ */
+ u32 waysize = icache_get_numsets() * icache_get_linesize();
+
+ if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
+ set_bit(ICACHEF_ALIASING, &__icache_flags);
+ }
if (l1ip == ICACHE_POLICY_AIVIVT)
set_bit(ICACHEF_AIVIVT, &__icache_flags);
@@ -190,3 +202,15 @@ void __init cpuinfo_store_boot_cpu(void)
boot_cpu_data = *info;
}
+
+u64 __attribute_const__ icache_get_ccsidr(void)
+{
+ u64 ccsidr;
+
+ WARN_ON(preemptible());
+
+ /* Select L1 I-cache and read its size ID register */
+ asm("msr csselr_el1, %1; isb; mrs %0, ccsidr_el1"
+ : "=r"(ccsidr) : "r"(1L));
+ return ccsidr;
+}
diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c
index 1317fef8dde9..d27dd982ff26 100644
--- a/arch/arm64/kernel/efi-stub.c
+++ b/arch/arm64/kernel/efi-stub.c
@@ -28,20 +28,16 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
kernel_size = _edata - _text;
if (*image_addr != (dram_base + TEXT_OFFSET)) {
kernel_memsize = kernel_size + (_end - _edata);
- status = efi_relocate_kernel(sys_table, image_addr,
- kernel_size, kernel_memsize,
- dram_base + TEXT_OFFSET,
- PAGE_SIZE);
+ status = efi_low_alloc(sys_table, kernel_memsize + TEXT_OFFSET,
+ SZ_2M, reserve_addr);
if (status != EFI_SUCCESS) {
pr_efi_err(sys_table, "Failed to relocate kernel\n");
return status;
}
- if (*image_addr != (dram_base + TEXT_OFFSET)) {
- pr_efi_err(sys_table, "Failed to alloc kernel memory\n");
- efi_free(sys_table, kernel_memsize, *image_addr);
- return EFI_LOAD_ERROR;
- }
- *image_size = kernel_memsize;
+ memcpy((void *)*reserve_addr + TEXT_OFFSET, (void *)*image_addr,
+ kernel_size);
+ *image_addr = *reserve_addr + TEXT_OFFSET;
+ *reserve_size = kernel_memsize + TEXT_OFFSET;
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f0b5e5120a87..726b910fe6ec 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -324,7 +324,6 @@ el1_dbg:
mrs x0, far_el1
mov x2, sp // struct pt_regs
bl do_debug_exception
- enable_dbg
kernel_exit 1
el1_inv:
// TODO: add support for undefined instructions in kernel mode
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 7924d73b6476..cf8556ae09d0 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -58,7 +58,8 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
u32 new;
pc = (unsigned long)&ftrace_call;
- new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true);
+ new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func,
+ AARCH64_INSN_BRANCH_LINK);
return ftrace_modify_code(pc, 0, new, false);
}
@@ -72,7 +73,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
u32 old, new;
old = aarch64_insn_gen_nop();
- new = aarch64_insn_gen_branch_imm(pc, addr, true);
+ new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
return ftrace_modify_code(pc, old, new, true);
}
@@ -86,7 +87,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long pc = rec->ip;
u32 old, new;
- old = aarch64_insn_gen_branch_imm(pc, addr, true);
+ old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_nop();
return ftrace_modify_code(pc, old, new, true);
@@ -154,7 +155,8 @@ static int ftrace_modify_graph_caller(bool enable)
u32 branch, nop;
branch = aarch64_insn_gen_branch_imm(pc,
- (unsigned long)ftrace_graph_caller, false);
+ (unsigned long)ftrace_graph_caller,
+ AARCH64_INSN_BRANCH_LINK);
nop = aarch64_insn_gen_nop();
if (enable)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 873069056229..0a6e4f924df8 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -151,7 +151,7 @@ optional_header:
.short 0x20b // PE32+ format
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
- .long _edata - stext // SizeOfCode
+ .long _end - stext // SizeOfCode
.long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
.long efi_stub_entry - efi_head // AddressOfEntryPoint
@@ -169,7 +169,7 @@ extra_header_fields:
.short 0 // MinorSubsystemVersion
.long 0 // Win32VersionValue
- .long _edata - efi_head // SizeOfImage
+ .long _end - efi_head // SizeOfImage
// Everything before the kernel image is considered part of the header
.long stext - efi_head // SizeOfHeaders
@@ -216,7 +216,7 @@ section_table:
.byte 0
.byte 0
.byte 0 // end of 0 padding of section name
- .long _edata - stext // VirtualSize
+ .long _end - stext // VirtualSize
.long stext - efi_head // VirtualAddress
.long _edata - stext // SizeOfRawData
.long stext - efi_head // PointerToRawData
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 92f36835486b..e007714ded04 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -2,6 +2,8 @@
* Copyright (C) 2013 Huawei Ltd.
* Author: Jiang Liu <liuj97@gmail.com>
*
+ * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
@@ -20,9 +22,14 @@
#include <linux/smp.h>
#include <linux/stop_machine.h>
#include <linux/uaccess.h>
+
#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
#include <asm/insn.h>
+#define AARCH64_INSN_SF_BIT BIT(31)
+#define AARCH64_INSN_N_BIT BIT(22)
+
static int aarch64_insn_encoding_class[] = {
AARCH64_INSN_CLS_UNKNOWN,
AARCH64_INSN_CLS_UNKNOWN,
@@ -251,6 +258,19 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
mask = BIT(9) - 1;
shift = 12;
break;
+ case AARCH64_INSN_IMM_7:
+ mask = BIT(7) - 1;
+ shift = 15;
+ break;
+ case AARCH64_INSN_IMM_6:
+ case AARCH64_INSN_IMM_S:
+ mask = BIT(6) - 1;
+ shift = 10;
+ break;
+ case AARCH64_INSN_IMM_R:
+ mask = BIT(6) - 1;
+ shift = 16;
+ break;
default:
pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
type);
@@ -264,10 +284,76 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
return insn;
}
-u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
- enum aarch64_insn_branch_type type)
+static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
+ u32 insn,
+ enum aarch64_insn_register reg)
+{
+ int shift;
+
+ if (reg < AARCH64_INSN_REG_0 || reg > AARCH64_INSN_REG_SP) {
+ pr_err("%s: unknown register encoding %d\n", __func__, reg);
+ return 0;
+ }
+
+ switch (type) {
+ case AARCH64_INSN_REGTYPE_RT:
+ case AARCH64_INSN_REGTYPE_RD:
+ shift = 0;
+ break;
+ case AARCH64_INSN_REGTYPE_RN:
+ shift = 5;
+ break;
+ case AARCH64_INSN_REGTYPE_RT2:
+ case AARCH64_INSN_REGTYPE_RA:
+ shift = 10;
+ break;
+ case AARCH64_INSN_REGTYPE_RM:
+ shift = 16;
+ break;
+ default:
+ pr_err("%s: unknown register type encoding %d\n", __func__,
+ type);
+ return 0;
+ }
+
+ insn &= ~(GENMASK(4, 0) << shift);
+ insn |= reg << shift;
+
+ return insn;
+}
+
+static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type,
+ u32 insn)
+{
+ u32 size;
+
+ switch (type) {
+ case AARCH64_INSN_SIZE_8:
+ size = 0;
+ break;
+ case AARCH64_INSN_SIZE_16:
+ size = 1;
+ break;
+ case AARCH64_INSN_SIZE_32:
+ size = 2;
+ break;
+ case AARCH64_INSN_SIZE_64:
+ size = 3;
+ break;
+ default:
+ pr_err("%s: unknown size encoding %d\n", __func__, type);
+ return 0;
+ }
+
+ insn &= ~GENMASK(31, 30);
+ insn |= size << 30;
+
+ return insn;
+}
+
+static inline long branch_imm_common(unsigned long pc, unsigned long addr,
+ long range)
{
- u32 insn;
long offset;
/*
@@ -276,23 +362,97 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
*/
BUG_ON((pc & 0x3) || (addr & 0x3));
+ offset = ((long)addr - (long)pc);
+ BUG_ON(offset < -range || offset >= range);
+
+ return offset;
+}
+
+u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_branch_type type)
+{
+ u32 insn;
+ long offset;
+
/*
* B/BL support [-128M, 128M) offset
* ARM64 virtual address arrangement guarantees all kernel and module
* texts are within +/-128M.
*/
- offset = ((long)addr - (long)pc);
- BUG_ON(offset < -SZ_128M || offset >= SZ_128M);
+ offset = branch_imm_common(pc, addr, SZ_128M);
- if (type == AARCH64_INSN_BRANCH_LINK)
+ switch (type) {
+ case AARCH64_INSN_BRANCH_LINK:
insn = aarch64_insn_get_bl_value();
- else
+ break;
+ case AARCH64_INSN_BRANCH_NOLINK:
insn = aarch64_insn_get_b_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn,
offset >> 2);
}
+u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_branch_type type)
+{
+ u32 insn;
+ long offset;
+
+ offset = branch_imm_common(pc, addr, SZ_1M);
+
+ switch (type) {
+ case AARCH64_INSN_BRANCH_COMP_ZERO:
+ insn = aarch64_insn_get_cbz_value();
+ break;
+ case AARCH64_INSN_BRANCH_COMP_NONZERO:
+ insn = aarch64_insn_get_cbnz_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
+ offset >> 2);
+}
+
+u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_condition cond)
+{
+ u32 insn;
+ long offset;
+
+ offset = branch_imm_common(pc, addr, SZ_1M);
+
+ insn = aarch64_insn_get_bcond_value();
+
+ BUG_ON(cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL);
+ insn |= cond;
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
+ offset >> 2);
+}
+
u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op)
{
return aarch64_insn_get_hint_value() | op;
@@ -302,3 +462,500 @@ u32 __kprobes aarch64_insn_gen_nop(void)
{
return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP);
}
+
+u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
+ enum aarch64_insn_branch_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_BRANCH_NOLINK:
+ insn = aarch64_insn_get_br_value();
+ break;
+ case AARCH64_INSN_BRANCH_LINK:
+ insn = aarch64_insn_get_blr_value();
+ break;
+ case AARCH64_INSN_BRANCH_RETURN:
+ insn = aarch64_insn_get_ret_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, reg);
+}
+
+u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_register offset,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_LDST_LOAD_REG_OFFSET:
+ insn = aarch64_insn_get_ldr_reg_value();
+ break;
+ case AARCH64_INSN_LDST_STORE_REG_OFFSET:
+ insn = aarch64_insn_get_str_reg_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_ldst_size(size, insn);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+ base);
+
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn,
+ offset);
+}
+
+u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
+ enum aarch64_insn_register reg2,
+ enum aarch64_insn_register base,
+ int offset,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_ldst_type type)
+{
+ u32 insn;
+ int shift;
+
+ switch (type) {
+ case AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX:
+ insn = aarch64_insn_get_ldp_pre_value();
+ break;
+ case AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX:
+ insn = aarch64_insn_get_stp_pre_value();
+ break;
+ case AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX:
+ insn = aarch64_insn_get_ldp_post_value();
+ break;
+ case AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX:
+ insn = aarch64_insn_get_stp_post_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ /* offset must be multiples of 4 in the range [-256, 252] */
+ BUG_ON(offset & 0x3);
+ BUG_ON(offset < -256 || offset > 252);
+ shift = 2;
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ /* offset must be multiples of 8 in the range [-512, 504] */
+ BUG_ON(offset & 0x7);
+ BUG_ON(offset < -512 || offset > 504);
+ shift = 3;
+ insn |= AARCH64_INSN_SF_BIT;
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+ reg1);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
+ reg2);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+ base);
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_7, insn,
+ offset >> shift);
+}
+
+u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ int imm, enum aarch64_insn_variant variant,
+ enum aarch64_insn_adsb_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_ADSB_ADD:
+ insn = aarch64_insn_get_add_imm_value();
+ break;
+ case AARCH64_INSN_ADSB_SUB:
+ insn = aarch64_insn_get_sub_imm_value();
+ break;
+ case AARCH64_INSN_ADSB_ADD_SETFLAGS:
+ insn = aarch64_insn_get_adds_imm_value();
+ break;
+ case AARCH64_INSN_ADSB_SUB_SETFLAGS:
+ insn = aarch64_insn_get_subs_imm_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ BUG_ON(imm & ~(SZ_4K - 1));
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
+}
+
+u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ int immr, int imms,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_bitfield_type type)
+{
+ u32 insn;
+ u32 mask;
+
+ switch (type) {
+ case AARCH64_INSN_BITFIELD_MOVE:
+ insn = aarch64_insn_get_bfm_value();
+ break;
+ case AARCH64_INSN_BITFIELD_MOVE_UNSIGNED:
+ insn = aarch64_insn_get_ubfm_value();
+ break;
+ case AARCH64_INSN_BITFIELD_MOVE_SIGNED:
+ insn = aarch64_insn_get_sbfm_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ mask = GENMASK(4, 0);
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT | AARCH64_INSN_N_BIT;
+ mask = GENMASK(5, 0);
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ BUG_ON(immr & ~mask);
+ BUG_ON(imms & ~mask);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
+}
+
+u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst,
+ int imm, int shift,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_movewide_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_MOVEWIDE_ZERO:
+ insn = aarch64_insn_get_movz_value();
+ break;
+ case AARCH64_INSN_MOVEWIDE_KEEP:
+ insn = aarch64_insn_get_movk_value();
+ break;
+ case AARCH64_INSN_MOVEWIDE_INVERSE:
+ insn = aarch64_insn_get_movn_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ BUG_ON(imm & ~(SZ_64K - 1));
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ BUG_ON(shift != 0 && shift != 16);
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ BUG_ON(shift != 0 && shift != 16 && shift != 32 &&
+ shift != 48);
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn |= (shift >> 4) << 21;
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
+}
+
+u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg,
+ int shift,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_adsb_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_ADSB_ADD:
+ insn = aarch64_insn_get_add_value();
+ break;
+ case AARCH64_INSN_ADSB_SUB:
+ insn = aarch64_insn_get_sub_value();
+ break;
+ case AARCH64_INSN_ADSB_ADD_SETFLAGS:
+ insn = aarch64_insn_get_adds_value();
+ break;
+ case AARCH64_INSN_ADSB_SUB_SETFLAGS:
+ insn = aarch64_insn_get_subs_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ BUG_ON(shift & ~(SZ_32 - 1));
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ BUG_ON(shift & ~(SZ_64 - 1));
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
+}
+
+u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_data1_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_DATA1_REVERSE_16:
+ insn = aarch64_insn_get_rev16_value();
+ break;
+ case AARCH64_INSN_DATA1_REVERSE_32:
+ insn = aarch64_insn_get_rev32_value();
+ break;
+ case AARCH64_INSN_DATA1_REVERSE_64:
+ BUG_ON(variant != AARCH64_INSN_VARIANT_64BIT);
+ insn = aarch64_insn_get_rev64_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+}
+
+u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_data2_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_DATA2_UDIV:
+ insn = aarch64_insn_get_udiv_value();
+ break;
+ case AARCH64_INSN_DATA2_SDIV:
+ insn = aarch64_insn_get_sdiv_value();
+ break;
+ case AARCH64_INSN_DATA2_LSLV:
+ insn = aarch64_insn_get_lslv_value();
+ break;
+ case AARCH64_INSN_DATA2_LSRV:
+ insn = aarch64_insn_get_lsrv_value();
+ break;
+ case AARCH64_INSN_DATA2_ASRV:
+ insn = aarch64_insn_get_asrv_value();
+ break;
+ case AARCH64_INSN_DATA2_RORV:
+ insn = aarch64_insn_get_rorv_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
+}
+
+u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg1,
+ enum aarch64_insn_register reg2,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_data3_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_DATA3_MADD:
+ insn = aarch64_insn_get_madd_value();
+ break;
+ case AARCH64_INSN_DATA3_MSUB:
+ insn = aarch64_insn_get_msub_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RA, insn, src);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+ reg1);
+
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn,
+ reg2);
+}
+
+u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg,
+ int shift,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_logic_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_LOGIC_AND:
+ insn = aarch64_insn_get_and_value();
+ break;
+ case AARCH64_INSN_LOGIC_BIC:
+ insn = aarch64_insn_get_bic_value();
+ break;
+ case AARCH64_INSN_LOGIC_ORR:
+ insn = aarch64_insn_get_orr_value();
+ break;
+ case AARCH64_INSN_LOGIC_ORN:
+ insn = aarch64_insn_get_orn_value();
+ break;
+ case AARCH64_INSN_LOGIC_EOR:
+ insn = aarch64_insn_get_eor_value();
+ break;
+ case AARCH64_INSN_LOGIC_EON:
+ insn = aarch64_insn_get_eon_value();
+ break;
+ case AARCH64_INSN_LOGIC_AND_SETFLAGS:
+ insn = aarch64_insn_get_ands_value();
+ break;
+ case AARCH64_INSN_LOGIC_BIC_SETFLAGS:
+ insn = aarch64_insn_get_bics_value();
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ BUG_ON(shift & ~(SZ_32 - 1));
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ BUG_ON(shift & ~(SZ_64 - 1));
+ break;
+ default:
+ BUG_ON(1);
+ return AARCH64_BREAK_FAULT;
+ }
+
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
+}
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 75c9cf1aafee..a0d10c55f307 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -235,13 +235,13 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
static struct break_hook kgdb_brkpt_hook = {
.esr_mask = 0xffffffff,
- .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM),
+ .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DBG_BRK_IMM),
.fn = kgdb_brk_fn
};
static struct break_hook kgdb_compiled_brkpt_hook = {
.esr_mask = 0xffffffff,
- .esr_val = DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM),
+ .esr_val = DBG_ESR_VAL_BRK(KGDB_COMPILED_DBG_BRK_IMM),
.fn = kgdb_compiled_brk_fn
};
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index baf5afb7e6a0..aa29ecb4f800 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1276,7 +1276,7 @@ arch_initcall(cpu_pmu_reset);
/*
* PMU platform driver and devicetree bindings.
*/
-static struct of_device_id armpmu_of_device_ids[] = {
+static const struct of_device_id armpmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3"},
{},
};
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 29d48690f2ac..89f41f7d27dd 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -57,36 +57,10 @@ unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
-static void setup_restart(void)
-{
- /*
- * Tell the mm system that we are going to reboot -
- * we may need it to insert some 1:1 mappings so that
- * soft boot works.
- */
- setup_mm_for_reboot();
-
- /* Clean and invalidate caches */
- flush_cache_all();
-
- /* Turn D-cache off */
- cpu_cache_off();
-
- /* Push out any further dirty data, and ensure cache is empty */
- flush_cache_all();
-}
-
void soft_restart(unsigned long addr)
{
- typedef void (*phys_reset_t)(unsigned long);
- phys_reset_t phys_reset;
-
- setup_restart();
-
- /* Switch to the identity mapping */
- phys_reset = (phys_reset_t)virt_to_phys(cpu_reset);
- phys_reset(addr);
-
+ setup_mm_for_reboot();
+ cpu_soft_restart(virt_to_phys(cpu_reset), addr);
/* Should never get here */
BUG();
}
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 553954771a67..866c1c821860 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -21,6 +21,7 @@
#include <linux/reboot.h>
#include <linux/pm.h>
#include <linux/delay.h>
+#include <linux/slab.h>
#include <uapi/linux/psci.h>
#include <asm/compiler.h>
@@ -28,6 +29,7 @@
#include <asm/errno.h>
#include <asm/psci.h>
#include <asm/smp_plat.h>
+#include <asm/suspend.h>
#include <asm/system_misc.h>
#define PSCI_POWER_STATE_TYPE_STANDBY 0
@@ -65,6 +67,8 @@ enum psci_function {
PSCI_FN_MAX,
};
+static DEFINE_PER_CPU_READ_MOSTLY(struct psci_power_state *, psci_power_state);
+
static u32 psci_function_id[PSCI_FN_MAX];
static int psci_to_linux_errno(int errno)
@@ -93,6 +97,18 @@ static u32 psci_power_state_pack(struct psci_power_state state)
& PSCI_0_2_POWER_STATE_AFFL_MASK);
}
+static void psci_power_state_unpack(u32 power_state,
+ struct psci_power_state *state)
+{
+ state->id = (power_state & PSCI_0_2_POWER_STATE_ID_MASK) >>
+ PSCI_0_2_POWER_STATE_ID_SHIFT;
+ state->type = (power_state & PSCI_0_2_POWER_STATE_TYPE_MASK) >>
+ PSCI_0_2_POWER_STATE_TYPE_SHIFT;
+ state->affinity_level =
+ (power_state & PSCI_0_2_POWER_STATE_AFFL_MASK) >>
+ PSCI_0_2_POWER_STATE_AFFL_SHIFT;
+}
+
/*
* The following two functions are invoked via the invoke_psci_fn pointer
* and will not be inlined, allowing us to piggyback on the AAPCS.
@@ -199,6 +215,63 @@ static int psci_migrate_info_type(void)
return err;
}
+static int __maybe_unused cpu_psci_cpu_init_idle(struct device_node *cpu_node,
+ unsigned int cpu)
+{
+ int i, ret, count = 0;
+ struct psci_power_state *psci_states;
+ struct device_node *state_node;
+
+ /*
+ * If the PSCI cpu_suspend function hook has not been initialized
+ * idle states must not be enabled, so bail out
+ */
+ if (!psci_ops.cpu_suspend)
+ return -EOPNOTSUPP;
+
+ /* Count idle states */
+ while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
+ count))) {
+ count++;
+ of_node_put(state_node);
+ }
+
+ if (!count)
+ return -ENODEV;
+
+ psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
+ if (!psci_states)
+ return -ENOMEM;
+
+ for (i = 0; i < count; i++) {
+ u32 psci_power_state;
+
+ state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+
+ ret = of_property_read_u32(state_node,
+ "arm,psci-suspend-param",
+ &psci_power_state);
+ if (ret) {
+ pr_warn(" * %s missing arm,psci-suspend-param property\n",
+ state_node->full_name);
+ of_node_put(state_node);
+ goto free_mem;
+ }
+
+ of_node_put(state_node);
+ pr_debug("psci-power-state %#x index %d\n", psci_power_state,
+ i);
+ psci_power_state_unpack(psci_power_state, &psci_states[i]);
+ }
+ /* Idle states parsed correctly, initialize per-cpu pointer */
+ per_cpu(psci_power_state, cpu) = psci_states;
+ return 0;
+
+free_mem:
+ kfree(psci_states);
+ return ret;
+}
+
static int get_set_conduit_method(struct device_node *np)
{
const char *method;
@@ -436,8 +509,39 @@ static int cpu_psci_cpu_kill(unsigned int cpu)
#endif
#endif
+static int psci_suspend_finisher(unsigned long index)
+{
+ struct psci_power_state *state = __get_cpu_var(psci_power_state);
+
+ return psci_ops.cpu_suspend(state[index - 1],
+ virt_to_phys(cpu_resume));
+}
+
+static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index)
+{
+ int ret;
+ struct psci_power_state *state = __get_cpu_var(psci_power_state);
+ /*
+ * idle state index 0 corresponds to wfi, should never be called
+ * from the cpu_suspend operations
+ */
+ if (WARN_ON_ONCE(!index))
+ return -EINVAL;
+
+ if (state->type == PSCI_POWER_STATE_TYPE_STANDBY)
+ ret = psci_ops.cpu_suspend(state[index - 1], 0);
+ else
+ ret = __cpu_suspend(index, psci_suspend_finisher);
+
+ return ret;
+}
+
const struct cpu_operations cpu_psci_ops = {
.name = "psci",
+#ifdef CONFIG_CPU_IDLE
+ .cpu_init_idle = cpu_psci_cpu_init_idle,
+ .cpu_suspend = cpu_psci_cpu_suspend,
+#endif
#ifdef CONFIG_SMP
.cpu_init = cpu_psci_cpu_init,
.cpu_prepare = cpu_psci_cpu_prepare,
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 89102a6ffad5..6c4fd2810ecb 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -36,13 +36,12 @@ void *return_address(unsigned int level)
{
struct return_address_data data;
struct stackframe frame;
- register unsigned long current_sp asm ("sp");
data.level = level + 2;
data.addr = NULL;
frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.sp = current_sp;
+ frame.sp = current_stack_pointer;
frame.pc = (unsigned long)return_address; /* dummy */
walk_stackframe(&frame, save_return_addr, &data);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index edb146d01857..2437196cc5d4 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -365,11 +365,6 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
void __init setup_arch(char **cmdline_p)
{
- /*
- * Unmask asynchronous aborts early to catch possible system errors.
- */
- local_async_enable();
-
setup_processor();
setup_machine_fdt(__fdt_pointer);
@@ -385,6 +380,12 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
+ /*
+ * Unmask asynchronous aborts after bringing up possible earlycon.
+ * (Report possible System Errors once we can report this occurred)
+ */
+ local_async_enable();
+
efi_init();
arm64_memblock_init();
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index b1925729c692..a564b440416a 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -49,28 +49,39 @@
orr \dst, \dst, \mask // dst|=(aff3>>rs3)
.endm
/*
- * Save CPU state for a suspend. This saves callee registers, and allocates
- * space on the kernel stack to save the CPU specific registers + some
- * other data for resume.
+ * Save CPU state for a suspend and execute the suspend finisher.
+ * On success it will return 0 through cpu_resume - ie through a CPU
+ * soft/hard reboot from the reset vector.
+ * On failure it returns the suspend finisher return value or force
+ * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher
+ * is not allowed to return, if it does this must be considered failure).
+ * It saves callee registers, and allocates space on the kernel stack
+ * to save the CPU specific registers + some other data for resume.
*
* x0 = suspend finisher argument
+ * x1 = suspend finisher function pointer
*/
-ENTRY(__cpu_suspend)
+ENTRY(__cpu_suspend_enter)
stp x29, lr, [sp, #-96]!
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
+ /*
+ * Stash suspend finisher and its argument in x20 and x19
+ */
+ mov x19, x0
+ mov x20, x1
mov x2, sp
sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx
- mov x1, sp
+ mov x0, sp
/*
- * x1 now points to struct cpu_suspend_ctx allocated on the stack
+ * x0 now points to struct cpu_suspend_ctx allocated on the stack
*/
- str x2, [x1, #CPU_CTX_SP]
- ldr x2, =sleep_save_sp
- ldr x2, [x2, #SLEEP_SAVE_SP_VIRT]
+ str x2, [x0, #CPU_CTX_SP]
+ ldr x1, =sleep_save_sp
+ ldr x1, [x1, #SLEEP_SAVE_SP_VIRT]
#ifdef CONFIG_SMP
mrs x7, mpidr_el1
ldr x9, =mpidr_hash
@@ -82,11 +93,21 @@ ENTRY(__cpu_suspend)
ldp w3, w4, [x9, #MPIDR_HASH_SHIFTS]
ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
- add x2, x2, x8, lsl #3
+ add x1, x1, x8, lsl #3
#endif
- bl __cpu_suspend_finisher
+ bl __cpu_suspend_save
+ /*
+ * Grab suspend finisher in x20 and its argument in x19
+ */
+ mov x0, x19
+ mov x1, x20
+ /*
+ * We are ready for power down, fire off the suspend finisher
+ * in x1, with argument in x0
+ */
+ blr x1
/*
- * Never gets here, unless suspend fails.
+ * Never gets here, unless suspend finisher fails.
* Successful cpu_suspend should return from cpu_resume, returning
* through this code path is considered an error
* If the return value is set to 0 force x0 = -EOPNOTSUPP
@@ -103,7 +124,7 @@ ENTRY(__cpu_suspend)
ldp x27, x28, [sp, #80]
ldp x29, lr, [sp], #96
ret
-ENDPROC(__cpu_suspend)
+ENDPROC(__cpu_suspend_enter)
.ltorg
/*
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 0347d38eea29..4f93c67e63de 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <linux/of.h>
#include <linux/smp.h>
+#include <linux/types.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
@@ -65,12 +66,21 @@ static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu)
static int smp_spin_table_cpu_prepare(unsigned int cpu)
{
- void **release_addr;
+ __le64 __iomem *release_addr;
if (!cpu_release_addr[cpu])
return -ENODEV;
- release_addr = __va(cpu_release_addr[cpu]);
+ /*
+ * The cpu-release-addr may or may not be inside the linear mapping.
+ * As ioremap_cache will either give us a new mapping or reuse the
+ * existing linear mapping, we can use it to cover both cases. In
+ * either case the memory will be MT_NORMAL.
+ */
+ release_addr = ioremap_cache(cpu_release_addr[cpu],
+ sizeof(*release_addr));
+ if (!release_addr)
+ return -ENOMEM;
/*
* We write the release address as LE regardless of the native
@@ -79,15 +89,17 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
* boot-loader's endianess before jumping. This is mandated by
* the boot protocol.
*/
- release_addr[0] = (void *) cpu_to_le64(__pa(secondary_holding_pen));
-
- __flush_dcache_area(release_addr, sizeof(release_addr[0]));
+ writeq_relaxed(__pa(secondary_holding_pen), release_addr);
+ __flush_dcache_area((__force void *)release_addr,
+ sizeof(*release_addr));
/*
* Send an event to wake up the secondary CPU.
*/
sev();
+ iounmap(release_addr);
+
return 0;
}
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 55437ba1f5a4..407991bf79f5 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -111,10 +111,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
frame.sp = thread_saved_sp(tsk);
frame.pc = thread_saved_pc(tsk);
} else {
- register unsigned long current_sp asm("sp");
data.no_sched_functions = 0;
frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.sp = current_sp;
+ frame.sp = current_stack_pointer;
frame.pc = (unsigned long)save_stack_trace_tsk;
}
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 55a99b9a97e0..13ad4dbb1615 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -9,22 +9,19 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
-extern int __cpu_suspend(unsigned long);
+extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long));
/*
- * This is called by __cpu_suspend() to save the state, and do whatever
+ * This is called by __cpu_suspend_enter() to save the state, and do whatever
* flushing is required to ensure that when the CPU goes to sleep we have
* the necessary data available when the caches are not searched.
*
- * @arg: Argument to pass to suspend operations
- * @ptr: CPU context virtual address
- * @save_ptr: address of the location where the context physical address
- * must be saved
+ * ptr: CPU context virtual address
+ * save_ptr: address of the location where the context physical address
+ * must be saved
*/
-int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr,
- phys_addr_t *save_ptr)
+void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr,
+ phys_addr_t *save_ptr)
{
- int cpu = smp_processor_id();
-
*save_ptr = virt_to_phys(ptr);
cpu_do_suspend(ptr);
@@ -35,8 +32,6 @@ int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr,
*/
__flush_dcache_area(ptr, sizeof(*ptr));
__flush_dcache_area(save_ptr, sizeof(*save_ptr));
-
- return cpu_ops[cpu]->cpu_suspend(arg);
}
/*
@@ -56,15 +51,15 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
}
/**
- * cpu_suspend
+ * cpu_suspend() - function to enter a low-power state
+ * @arg: argument to pass to CPU suspend operations
*
- * @arg: argument to pass to the finisher function
+ * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
+ * operations back-end error code otherwise.
*/
int cpu_suspend(unsigned long arg)
{
- struct mm_struct *mm = current->active_mm;
- int ret, cpu = smp_processor_id();
- unsigned long flags;
+ int cpu = smp_processor_id();
/*
* If cpu_ops have not been registered or suspend
@@ -72,6 +67,21 @@ int cpu_suspend(unsigned long arg)
*/
if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
return -EOPNOTSUPP;
+ return cpu_ops[cpu]->cpu_suspend(arg);
+}
+
+/*
+ * __cpu_suspend
+ *
+ * arg: argument to pass to the finisher function
+ * fn: finisher function pointer
+ *
+ */
+int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+ struct mm_struct *mm = current->active_mm;
+ int ret;
+ unsigned long flags;
/*
* From this point debug exceptions are disabled to prevent
@@ -86,7 +96,7 @@ int cpu_suspend(unsigned long arg)
* page tables, so that the thread address space is properly
* set-up on function return.
*/
- ret = __cpu_suspend(arg);
+ ret = __cpu_suspend_enter(arg, fn);
if (ret == 0) {
cpu_switch_mm(mm->pgd, mm);
flush_tlb_all();
@@ -95,7 +105,7 @@ int cpu_suspend(unsigned long arg)
* Restore per-cpu offset before any kernel
* subsystem relying on it has a chance to run.
*/
- set_my_cpu_offset(per_cpu_offset(cpu));
+ set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
/*
* Restore HW breakpoint registers to sane values
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 02cd3f023e9a..de1b085e7963 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -132,7 +132,6 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
- const register unsigned long current_sp asm ("sp");
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
@@ -145,7 +144,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
frame.pc = regs->pc;
} else if (tsk == current) {
frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.sp = current_sp;
+ frame.sp = current_stack_pointer;
frame.pc = (unsigned long)dump_backtrace;
} else {
/*
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 97f0c0429dfa..edf8715ba39b 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -97,9 +97,9 @@ SECTIONS
PERCPU_SECTION(64)
+ . = ALIGN(PAGE_SIZE);
__init_end = .;
- . = ALIGN(PAGE_SIZE);
_data = .;
_sdata = .;
RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 8d1ec2887a26..76794692c20b 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -174,7 +174,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
if (ret != 0)
- return ret;
+ return -EFAULT;
return kvm_arm_timer_set_reg(vcpu, reg->id, val);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5805e7c4a4dd..4cc3b719208e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1218,7 +1218,7 @@ static bool is_valid_cache(u32 val)
u32 level, ctype;
if (val >= CSSELR_MAX)
- return -ENOENT;
+ return false;
/* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
level = (val >> 1);
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 3ecb56c624d3..c56179ed2c09 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -1,5 +1,5 @@
obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
ioremap.o mmap.o pgd.o mmu.o \
- context.o proc.o
+ context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 4164c5ace9f8..2c71077cacfd 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -22,11 +22,8 @@
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <linux/dma-contiguous.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
#include <linux/vmalloc.h>
#include <linux/swiotlb.h>
-#include <linux/amba/bus.h>
#include <asm/cacheflush.h>
@@ -125,7 +122,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
no_map:
__dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
no_mem:
- *dma_handle = ~0;
+ *dma_handle = DMA_ERROR_CODE;
return NULL;
}
@@ -308,40 +305,12 @@ struct dma_map_ops coherent_swiotlb_dma_ops = {
};
EXPORT_SYMBOL(coherent_swiotlb_dma_ops);
-static int dma_bus_notifier(struct notifier_block *nb,
- unsigned long event, void *_dev)
-{
- struct device *dev = _dev;
-
- if (event != BUS_NOTIFY_ADD_DEVICE)
- return NOTIFY_DONE;
-
- if (of_property_read_bool(dev->of_node, "dma-coherent"))
- set_dma_ops(dev, &coherent_swiotlb_dma_ops);
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block platform_bus_nb = {
- .notifier_call = dma_bus_notifier,
-};
-
-static struct notifier_block amba_bus_nb = {
- .notifier_call = dma_bus_notifier,
-};
-
extern int swiotlb_late_init_with_default_size(size_t default_size);
static int __init swiotlb_late_init(void)
{
size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT);
- /*
- * These must be registered before of_platform_populate().
- */
- bus_register_notifier(&platform_bus_type, &platform_bus_nb);
- bus_register_notifier(&amba_bustype, &amba_bus_nb);
-
dma_ops = &noncoherent_swiotlb_dma_ops;
return swiotlb_late_init_with_default_size(swiotlb_size);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a83061f37e43..494297c698ca 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -255,7 +255,7 @@ static void __init free_unused_memmap(void)
*/
void __init mem_init(void)
{
- max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
+ set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
#ifndef CONFIG_SPARSEMEM_VMEMMAP
free_unused_memmap();
@@ -333,8 +333,14 @@ static int keep_initrd;
void free_initrd_mem(unsigned long start, unsigned long end)
{
- if (!keep_initrd)
+ if (!keep_initrd) {
+ if (start == initrd_start)
+ start = round_down(start, PAGE_SIZE);
+ if (end == initrd_end)
+ end = round_up(end, PAGE_SIZE);
+
free_reserved_area((void *)start, (void *)end, 0, "initrd");
+ }
}
static int __init keepinitrd_setup(char *__unused)
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 8ed6cb1a900f..1d73662f00ff 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
* You really shouldn't be using read() or write() on /dev/mem. This might go
* away in the future.
*/
-int valid_phys_addr_range(unsigned long addr, size_t size)
+int valid_phys_addr_range(phys_addr_t addr, size_t size)
{
if (addr < PHYS_OFFSET)
return 0;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c55567283cde..6894ef3e6234 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -94,7 +94,7 @@ static int __init early_cachepolicy(char *p)
*/
asm volatile(
" mrs %0, mair_el1\n"
- " bfi %0, %1, #%2, #8\n"
+ " bfi %0, %1, %2, #8\n"
" msr mair_el1, %0\n"
" isb\n"
: "=&r" (tmp)
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
new file mode 100644
index 000000000000..bb0ea94c4ba1
--- /dev/null
+++ b/arch/arm64/mm/pageattr.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+struct page_change_data {
+ pgprot_t set_mask;
+ pgprot_t clear_mask;
+};
+
+static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
+ void *data)
+{
+ struct page_change_data *cdata = data;
+ pte_t pte = *ptep;
+
+ pte = clear_pte_bit(pte, cdata->clear_mask);
+ pte = set_pte_bit(pte, cdata->set_mask);
+
+ set_pte(ptep, pte);
+ return 0;
+}
+
+static int change_memory_common(unsigned long addr, int numpages,
+ pgprot_t set_mask, pgprot_t clear_mask)
+{
+ unsigned long start = addr;
+ unsigned long size = PAGE_SIZE*numpages;
+ unsigned long end = start + size;
+ int ret;
+ struct page_change_data data;
+
+ if (!IS_ALIGNED(addr, PAGE_SIZE)) {
+ start &= PAGE_MASK;
+ end = start + size;
+ WARN_ON_ONCE(1);
+ }
+
+ if (!is_module_address(start) || !is_module_address(end - 1))
+ return -EINVAL;
+
+ data.set_mask = set_mask;
+ data.clear_mask = clear_mask;
+
+ ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+ &data);
+
+ flush_tlb_kernel_range(start, end);
+ return ret;
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+ return change_memory_common(addr, numpages,
+ __pgprot(PTE_RDONLY),
+ __pgprot(PTE_WRITE));
+}
+EXPORT_SYMBOL_GPL(set_memory_ro);
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+ return change_memory_common(addr, numpages,
+ __pgprot(PTE_WRITE),
+ __pgprot(PTE_RDONLY));
+}
+EXPORT_SYMBOL_GPL(set_memory_rw);
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+ return change_memory_common(addr, numpages,
+ __pgprot(PTE_PXN),
+ __pgprot(0));
+}
+EXPORT_SYMBOL_GPL(set_memory_nx);
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+ return change_memory_common(addr, numpages,
+ __pgprot(0),
+ __pgprot(PTE_PXN));
+}
+EXPORT_SYMBOL_GPL(set_memory_x);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 7736779c9809..4e778b13291b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -76,6 +76,21 @@ ENTRY(cpu_reset)
ret x0
ENDPROC(cpu_reset)
+ENTRY(cpu_soft_restart)
+ /* Save address of cpu_reset() and reset address */
+ mov x19, x0
+ mov x20, x1
+
+ /* Turn D-cache off */
+ bl cpu_cache_off
+
+ /* Push out all dirty data, and ensure cache is empty */
+ bl flush_cache_all
+
+ mov x0, x20
+ ret x19
+ENDPROC(cpu_soft_restart)
+
/*
* cpu_do_idle()
*
diff --git a/arch/arm64/net/Makefile b/arch/arm64/net/Makefile
new file mode 100644
index 000000000000..da9763378284
--- /dev/null
+++ b/arch/arm64/net/Makefile
@@ -0,0 +1,4 @@
+#
+# ARM64 networking code
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
new file mode 100644
index 000000000000..2134f7e6c288
--- /dev/null
+++ b/arch/arm64/net/bpf_jit.h
@@ -0,0 +1,169 @@
+/*
+ * BPF JIT compiler for ARM64
+ *
+ * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _BPF_JIT_H
+#define _BPF_JIT_H
+
+#include <asm/insn.h>
+
+/* 5-bit Register Operand */
+#define A64_R(x) AARCH64_INSN_REG_##x
+#define A64_FP AARCH64_INSN_REG_FP
+#define A64_LR AARCH64_INSN_REG_LR
+#define A64_ZR AARCH64_INSN_REG_ZR
+#define A64_SP AARCH64_INSN_REG_SP
+
+#define A64_VARIANT(sf) \
+ ((sf) ? AARCH64_INSN_VARIANT_64BIT : AARCH64_INSN_VARIANT_32BIT)
+
+/* Compare & branch (immediate) */
+#define A64_COMP_BRANCH(sf, Rt, offset, type) \
+ aarch64_insn_gen_comp_branch_imm(0, offset, Rt, A64_VARIANT(sf), \
+ AARCH64_INSN_BRANCH_COMP_##type)
+#define A64_CBZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, ZERO)
+
+/* Conditional branch (immediate) */
+#define A64_COND_BRANCH(cond, offset) \
+ aarch64_insn_gen_cond_branch_imm(0, offset, cond)
+#define A64_COND_EQ AARCH64_INSN_COND_EQ /* == */
+#define A64_COND_NE AARCH64_INSN_COND_NE /* != */
+#define A64_COND_CS AARCH64_INSN_COND_CS /* unsigned >= */
+#define A64_COND_HI AARCH64_INSN_COND_HI /* unsigned > */
+#define A64_COND_GE AARCH64_INSN_COND_GE /* signed >= */
+#define A64_COND_GT AARCH64_INSN_COND_GT /* signed > */
+#define A64_B_(cond, imm19) A64_COND_BRANCH(cond, (imm19) << 2)
+
+/* Unconditional branch (immediate) */
+#define A64_BRANCH(offset, type) aarch64_insn_gen_branch_imm(0, offset, \
+ AARCH64_INSN_BRANCH_##type)
+#define A64_B(imm26) A64_BRANCH((imm26) << 2, NOLINK)
+#define A64_BL(imm26) A64_BRANCH((imm26) << 2, LINK)
+
+/* Unconditional branch (register) */
+#define A64_BLR(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_LINK)
+#define A64_RET(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_RETURN)
+
+/* Load/store register (register offset) */
+#define A64_LS_REG(Rt, Rn, Rm, size, type) \
+ aarch64_insn_gen_load_store_reg(Rt, Rn, Rm, \
+ AARCH64_INSN_SIZE_##size, \
+ AARCH64_INSN_LDST_##type##_REG_OFFSET)
+#define A64_STRB(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 8, STORE)
+#define A64_LDRB(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 8, LOAD)
+#define A64_STRH(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 16, STORE)
+#define A64_LDRH(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 16, LOAD)
+#define A64_STR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, STORE)
+#define A64_LDR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, LOAD)
+#define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE)
+#define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD)
+
+/* Load/store register pair */
+#define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \
+ aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \
+ AARCH64_INSN_VARIANT_64BIT, \
+ AARCH64_INSN_LDST_##ls##_PAIR_##type)
+/* Rn -= 16; Rn[0] = Rt; Rn[8] = Rt2; */
+#define A64_PUSH(Rt, Rt2, Rn) A64_LS_PAIR(Rt, Rt2, Rn, -16, STORE, PRE_INDEX)
+/* Rt = Rn[0]; Rt2 = Rn[8]; Rn += 16; */
+#define A64_POP(Rt, Rt2, Rn) A64_LS_PAIR(Rt, Rt2, Rn, 16, LOAD, POST_INDEX)
+
+/* Add/subtract (immediate) */
+#define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
+ aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \
+ A64_VARIANT(sf), AARCH64_INSN_ADSB_##type)
+/* Rd = Rn OP imm12 */
+#define A64_ADD_I(sf, Rd, Rn, imm12) A64_ADDSUB_IMM(sf, Rd, Rn, imm12, ADD)
+#define A64_SUB_I(sf, Rd, Rn, imm12) A64_ADDSUB_IMM(sf, Rd, Rn, imm12, SUB)
+/* Rd = Rn */
+#define A64_MOV(sf, Rd, Rn) A64_ADD_I(sf, Rd, Rn, 0)
+
+/* Bitfield move */
+#define A64_BITFIELD(sf, Rd, Rn, immr, imms, type) \
+ aarch64_insn_gen_bitfield(Rd, Rn, immr, imms, \
+ A64_VARIANT(sf), AARCH64_INSN_BITFIELD_MOVE_##type)
+/* Signed, with sign replication to left and zeros to right */
+#define A64_SBFM(sf, Rd, Rn, ir, is) A64_BITFIELD(sf, Rd, Rn, ir, is, SIGNED)
+/* Unsigned, with zeros to left and right */
+#define A64_UBFM(sf, Rd, Rn, ir, is) A64_BITFIELD(sf, Rd, Rn, ir, is, UNSIGNED)
+
+/* Rd = Rn << shift */
+#define A64_LSL(sf, Rd, Rn, shift) ({ \
+ int sz = (sf) ? 64 : 32; \
+ A64_UBFM(sf, Rd, Rn, (unsigned)-(shift) % sz, sz - 1 - (shift)); \
+})
+/* Rd = Rn >> shift */
+#define A64_LSR(sf, Rd, Rn, shift) A64_UBFM(sf, Rd, Rn, shift, (sf) ? 63 : 31)
+/* Rd = Rn >> shift; signed */
+#define A64_ASR(sf, Rd, Rn, shift) A64_SBFM(sf, Rd, Rn, shift, (sf) ? 63 : 31)
+
+/* Move wide (immediate) */
+#define A64_MOVEW(sf, Rd, imm16, shift, type) \
+ aarch64_insn_gen_movewide(Rd, imm16, shift, \
+ A64_VARIANT(sf), AARCH64_INSN_MOVEWIDE_##type)
+/* Rd = Zeros (for MOVZ);
+ * Rd |= imm16 << shift (where shift is {0, 16, 32, 48});
+ * Rd = ~Rd; (for MOVN); */
+#define A64_MOVN(sf, Rd, imm16, shift) A64_MOVEW(sf, Rd, imm16, shift, INVERSE)
+#define A64_MOVZ(sf, Rd, imm16, shift) A64_MOVEW(sf, Rd, imm16, shift, ZERO)
+#define A64_MOVK(sf, Rd, imm16, shift) A64_MOVEW(sf, Rd, imm16, shift, KEEP)
+
+/* Add/subtract (shifted register) */
+#define A64_ADDSUB_SREG(sf, Rd, Rn, Rm, type) \
+ aarch64_insn_gen_add_sub_shifted_reg(Rd, Rn, Rm, 0, \
+ A64_VARIANT(sf), AARCH64_INSN_ADSB_##type)
+/* Rd = Rn OP Rm */
+#define A64_ADD(sf, Rd, Rn, Rm) A64_ADDSUB_SREG(sf, Rd, Rn, Rm, ADD)
+#define A64_SUB(sf, Rd, Rn, Rm) A64_ADDSUB_SREG(sf, Rd, Rn, Rm, SUB)
+#define A64_SUBS(sf, Rd, Rn, Rm) A64_ADDSUB_SREG(sf, Rd, Rn, Rm, SUB_SETFLAGS)
+/* Rd = -Rm */
+#define A64_NEG(sf, Rd, Rm) A64_SUB(sf, Rd, A64_ZR, Rm)
+/* Rn - Rm; set condition flags */
+#define A64_CMP(sf, Rn, Rm) A64_SUBS(sf, A64_ZR, Rn, Rm)
+
+/* Data-processing (1 source) */
+#define A64_DATA1(sf, Rd, Rn, type) aarch64_insn_gen_data1(Rd, Rn, \
+ A64_VARIANT(sf), AARCH64_INSN_DATA1_##type)
+/* Rd = BSWAPx(Rn) */
+#define A64_REV16(sf, Rd, Rn) A64_DATA1(sf, Rd, Rn, REVERSE_16)
+#define A64_REV32(sf, Rd, Rn) A64_DATA1(sf, Rd, Rn, REVERSE_32)
+#define A64_REV64(Rd, Rn) A64_DATA1(1, Rd, Rn, REVERSE_64)
+
+/* Data-processing (2 source) */
+/* Rd = Rn OP Rm */
+#define A64_UDIV(sf, Rd, Rn, Rm) aarch64_insn_gen_data2(Rd, Rn, Rm, \
+ A64_VARIANT(sf), AARCH64_INSN_DATA2_UDIV)
+
+/* Data-processing (3 source) */
+/* Rd = Ra + Rn * Rm */
+#define A64_MADD(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
+ A64_VARIANT(sf), AARCH64_INSN_DATA3_MADD)
+/* Rd = Rn * Rm */
+#define A64_MUL(sf, Rd, Rn, Rm) A64_MADD(sf, Rd, A64_ZR, Rn, Rm)
+
+/* Logical (shifted register) */
+#define A64_LOGIC_SREG(sf, Rd, Rn, Rm, type) \
+ aarch64_insn_gen_logical_shifted_reg(Rd, Rn, Rm, 0, \
+ A64_VARIANT(sf), AARCH64_INSN_LOGIC_##type)
+/* Rd = Rn OP Rm */
+#define A64_AND(sf, Rd, Rn, Rm) A64_LOGIC_SREG(sf, Rd, Rn, Rm, AND)
+#define A64_ORR(sf, Rd, Rn, Rm) A64_LOGIC_SREG(sf, Rd, Rn, Rm, ORR)
+#define A64_EOR(sf, Rd, Rn, Rm) A64_LOGIC_SREG(sf, Rd, Rn, Rm, EOR)
+#define A64_ANDS(sf, Rd, Rn, Rm) A64_LOGIC_SREG(sf, Rd, Rn, Rm, AND_SETFLAGS)
+/* Rn & Rm; set condition flags */
+#define A64_TST(sf, Rn, Rm) A64_ANDS(sf, A64_ZR, Rn, Rm)
+
+#endif /* _BPF_JIT_H */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
new file mode 100644
index 000000000000..7ae33545535b
--- /dev/null
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -0,0 +1,679 @@
+/*
+ * BPF JIT compiler for ARM64
+ *
+ * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "bpf_jit: " fmt
+
+#include <linux/filter.h>
+#include <linux/moduleloader.h>
+#include <linux/printk.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <asm/byteorder.h>
+#include <asm/cacheflush.h>
+
+#include "bpf_jit.h"
+
+int bpf_jit_enable __read_mostly;
+
+#define TMP_REG_1 (MAX_BPF_REG + 0)
+#define TMP_REG_2 (MAX_BPF_REG + 1)
+
+/* Map BPF registers to A64 registers */
+static const int bpf2a64[] = {
+ /* return value from in-kernel function, and exit value from eBPF */
+ [BPF_REG_0] = A64_R(7),
+ /* arguments from eBPF program to in-kernel function */
+ [BPF_REG_1] = A64_R(0),
+ [BPF_REG_2] = A64_R(1),
+ [BPF_REG_3] = A64_R(2),
+ [BPF_REG_4] = A64_R(3),
+ [BPF_REG_5] = A64_R(4),
+ /* callee saved registers that in-kernel function will preserve */
+ [BPF_REG_6] = A64_R(19),
+ [BPF_REG_7] = A64_R(20),
+ [BPF_REG_8] = A64_R(21),
+ [BPF_REG_9] = A64_R(22),
+ /* read-only frame pointer to access stack */
+ [BPF_REG_FP] = A64_FP,
+ /* temporary register for internal BPF JIT */
+ [TMP_REG_1] = A64_R(23),
+ [TMP_REG_2] = A64_R(24),
+};
+
+struct jit_ctx {
+ const struct bpf_prog *prog;
+ int idx;
+ int tmp_used;
+ int body_offset;
+ int *offset;
+ u32 *image;
+};
+
+static inline void emit(const u32 insn, struct jit_ctx *ctx)
+{
+ if (ctx->image != NULL)
+ ctx->image[ctx->idx] = cpu_to_le32(insn);
+
+ ctx->idx++;
+}
+
+static inline void emit_a64_mov_i64(const int reg, const u64 val,
+ struct jit_ctx *ctx)
+{
+ u64 tmp = val;
+ int shift = 0;
+
+ emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
+ tmp >>= 16;
+ shift += 16;
+ while (tmp) {
+ if (tmp & 0xffff)
+ emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
+ tmp >>= 16;
+ shift += 16;
+ }
+}
+
+static inline void emit_a64_mov_i(const int is64, const int reg,
+ const s32 val, struct jit_ctx *ctx)
+{
+ u16 hi = val >> 16;
+ u16 lo = val & 0xffff;
+
+ if (hi & 0x8000) {
+ if (hi == 0xffff) {
+ emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
+ } else {
+ emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
+ emit(A64_MOVK(is64, reg, lo, 0), ctx);
+ }
+ } else {
+ emit(A64_MOVZ(is64, reg, lo, 0), ctx);
+ if (hi)
+ emit(A64_MOVK(is64, reg, hi, 16), ctx);
+ }
+}
+
+static inline int bpf2a64_offset(int bpf_to, int bpf_from,
+ const struct jit_ctx *ctx)
+{
+ int to = ctx->offset[bpf_to + 1];
+ /* -1 to account for the Branch instruction */
+ int from = ctx->offset[bpf_from + 1] - 1;
+
+ return to - from;
+}
+
+static inline int epilogue_offset(const struct jit_ctx *ctx)
+{
+ int to = ctx->offset[ctx->prog->len - 1];
+ int from = ctx->idx - ctx->body_offset;
+
+ return to - from;
+}
+
+/* Stack must be multiples of 16B */
+#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
+
+static void build_prologue(struct jit_ctx *ctx)
+{
+ const u8 r6 = bpf2a64[BPF_REG_6];
+ const u8 r7 = bpf2a64[BPF_REG_7];
+ const u8 r8 = bpf2a64[BPF_REG_8];
+ const u8 r9 = bpf2a64[BPF_REG_9];
+ const u8 fp = bpf2a64[BPF_REG_FP];
+ const u8 ra = bpf2a64[BPF_REG_A];
+ const u8 rx = bpf2a64[BPF_REG_X];
+ const u8 tmp1 = bpf2a64[TMP_REG_1];
+ const u8 tmp2 = bpf2a64[TMP_REG_2];
+ int stack_size = MAX_BPF_STACK;
+
+ stack_size += 4; /* extra for skb_copy_bits buffer */
+ stack_size = STACK_ALIGN(stack_size);
+
+ /* Save callee-saved register */
+ emit(A64_PUSH(r6, r7, A64_SP), ctx);
+ emit(A64_PUSH(r8, r9, A64_SP), ctx);
+ if (ctx->tmp_used)
+ emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
+
+ /* Set up BPF stack */
+ emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
+
+ /* Set up frame pointer */
+ emit(A64_MOV(1, fp, A64_SP), ctx);
+
+ /* Clear registers A and X */
+ emit_a64_mov_i64(ra, 0, ctx);
+ emit_a64_mov_i64(rx, 0, ctx);
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+ const u8 r0 = bpf2a64[BPF_REG_0];
+ const u8 r6 = bpf2a64[BPF_REG_6];
+ const u8 r7 = bpf2a64[BPF_REG_7];
+ const u8 r8 = bpf2a64[BPF_REG_8];
+ const u8 r9 = bpf2a64[BPF_REG_9];
+ const u8 fp = bpf2a64[BPF_REG_FP];
+ const u8 tmp1 = bpf2a64[TMP_REG_1];
+ const u8 tmp2 = bpf2a64[TMP_REG_2];
+ int stack_size = MAX_BPF_STACK;
+
+ stack_size += 4; /* extra for skb_copy_bits buffer */
+ stack_size = STACK_ALIGN(stack_size);
+
+ /* We're done with BPF stack */
+ emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx);
+
+ /* Restore callee-saved register */
+ if (ctx->tmp_used)
+ emit(A64_POP(tmp1, tmp2, A64_SP), ctx);
+ emit(A64_POP(r8, r9, A64_SP), ctx);
+ emit(A64_POP(r6, r7, A64_SP), ctx);
+
+ /* Restore frame pointer */
+ emit(A64_MOV(1, fp, A64_SP), ctx);
+
+ /* Set return value */
+ emit(A64_MOV(1, A64_R(0), r0), ctx);
+
+ emit(A64_RET(A64_LR), ctx);
+}
+
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+ const u8 code = insn->code;
+ const u8 dst = bpf2a64[insn->dst_reg];
+ const u8 src = bpf2a64[insn->src_reg];
+ const u8 tmp = bpf2a64[TMP_REG_1];
+ const u8 tmp2 = bpf2a64[TMP_REG_2];
+ const s16 off = insn->off;
+ const s32 imm = insn->imm;
+ const int i = insn - ctx->prog->insnsi;
+ const bool is64 = BPF_CLASS(code) == BPF_ALU64;
+ u8 jmp_cond;
+ s32 jmp_offset;
+
+ switch (code) {
+ /* dst = src */
+ case BPF_ALU | BPF_MOV | BPF_X:
+ case BPF_ALU64 | BPF_MOV | BPF_X:
+ emit(A64_MOV(is64, dst, src), ctx);
+ break;
+ /* dst = dst OP src */
+ case BPF_ALU | BPF_ADD | BPF_X:
+ case BPF_ALU64 | BPF_ADD | BPF_X:
+ emit(A64_ADD(is64, dst, dst, src), ctx);
+ break;
+ case BPF_ALU | BPF_SUB | BPF_X:
+ case BPF_ALU64 | BPF_SUB | BPF_X:
+ emit(A64_SUB(is64, dst, dst, src), ctx);
+ break;
+ case BPF_ALU | BPF_AND | BPF_X:
+ case BPF_ALU64 | BPF_AND | BPF_X:
+ emit(A64_AND(is64, dst, dst, src), ctx);
+ break;
+ case BPF_ALU | BPF_OR | BPF_X:
+ case BPF_ALU64 | BPF_OR | BPF_X:
+ emit(A64_ORR(is64, dst, dst, src), ctx);
+ break;
+ case BPF_ALU | BPF_XOR | BPF_X:
+ case BPF_ALU64 | BPF_XOR | BPF_X:
+ emit(A64_EOR(is64, dst, dst, src), ctx);
+ break;
+ case BPF_ALU | BPF_MUL | BPF_X:
+ case BPF_ALU64 | BPF_MUL | BPF_X:
+ emit(A64_MUL(is64, dst, dst, src), ctx);
+ break;
+ case BPF_ALU | BPF_DIV | BPF_X:
+ case BPF_ALU64 | BPF_DIV | BPF_X:
+ emit(A64_UDIV(is64, dst, dst, src), ctx);
+ break;
+ case BPF_ALU | BPF_MOD | BPF_X:
+ case BPF_ALU64 | BPF_MOD | BPF_X:
+ ctx->tmp_used = 1;
+ emit(A64_UDIV(is64, tmp, dst, src), ctx);
+ emit(A64_MUL(is64, tmp, tmp, src), ctx);
+ emit(A64_SUB(is64, dst, dst, tmp), ctx);
+ break;
+ /* dst = -dst */
+ case BPF_ALU | BPF_NEG:
+ case BPF_ALU64 | BPF_NEG:
+ emit(A64_NEG(is64, dst, dst), ctx);
+ break;
+ /* dst = BSWAP##imm(dst) */
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ if (BPF_SRC(code) == BPF_FROM_BE)
+ break;
+#else /* !CONFIG_CPU_BIG_ENDIAN */
+ if (BPF_SRC(code) == BPF_FROM_LE)
+ break;
+#endif
+ switch (imm) {
+ case 16:
+ emit(A64_REV16(is64, dst, dst), ctx);
+ break;
+ case 32:
+ emit(A64_REV32(is64, dst, dst), ctx);
+ break;
+ case 64:
+ emit(A64_REV64(dst, dst), ctx);
+ break;
+ }
+ break;
+ /* dst = imm */
+ case BPF_ALU | BPF_MOV | BPF_K:
+ case BPF_ALU64 | BPF_MOV | BPF_K:
+ emit_a64_mov_i(is64, dst, imm, ctx);
+ break;
+ /* dst = dst OP imm */
+ case BPF_ALU | BPF_ADD | BPF_K:
+ case BPF_ALU64 | BPF_ADD | BPF_K:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(is64, tmp, imm, ctx);
+ emit(A64_ADD(is64, dst, dst, tmp), ctx);
+ break;
+ case BPF_ALU | BPF_SUB | BPF_K:
+ case BPF_ALU64 | BPF_SUB | BPF_K:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(is64, tmp, imm, ctx);
+ emit(A64_SUB(is64, dst, dst, tmp), ctx);
+ break;
+ case BPF_ALU | BPF_AND | BPF_K:
+ case BPF_ALU64 | BPF_AND | BPF_K:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(is64, tmp, imm, ctx);
+ emit(A64_AND(is64, dst, dst, tmp), ctx);
+ break;
+ case BPF_ALU | BPF_OR | BPF_K:
+ case BPF_ALU64 | BPF_OR | BPF_K:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(is64, tmp, imm, ctx);
+ emit(A64_ORR(is64, dst, dst, tmp), ctx);
+ break;
+ case BPF_ALU | BPF_XOR | BPF_K:
+ case BPF_ALU64 | BPF_XOR | BPF_K:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(is64, tmp, imm, ctx);
+ emit(A64_EOR(is64, dst, dst, tmp), ctx);
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K:
+ case BPF_ALU64 | BPF_MUL | BPF_K:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(is64, tmp, imm, ctx);
+ emit(A64_MUL(is64, dst, dst, tmp), ctx);
+ break;
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU64 | BPF_DIV | BPF_K:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(is64, tmp, imm, ctx);
+ emit(A64_UDIV(is64, dst, dst, tmp), ctx);
+ break;
+ case BPF_ALU | BPF_MOD | BPF_K:
+ case BPF_ALU64 | BPF_MOD | BPF_K:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(is64, tmp2, imm, ctx);
+ emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
+ emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
+ emit(A64_SUB(is64, dst, dst, tmp), ctx);
+ break;
+ case BPF_ALU | BPF_LSH | BPF_K:
+ case BPF_ALU64 | BPF_LSH | BPF_K:
+ emit(A64_LSL(is64, dst, dst, imm), ctx);
+ break;
+ case BPF_ALU | BPF_RSH | BPF_K:
+ case BPF_ALU64 | BPF_RSH | BPF_K:
+ emit(A64_LSR(is64, dst, dst, imm), ctx);
+ break;
+ case BPF_ALU | BPF_ARSH | BPF_K:
+ case BPF_ALU64 | BPF_ARSH | BPF_K:
+ emit(A64_ASR(is64, dst, dst, imm), ctx);
+ break;
+
+#define check_imm(bits, imm) do { \
+ if ((((imm) > 0) && ((imm) >> (bits))) || \
+ (((imm) < 0) && (~(imm) >> (bits)))) { \
+ pr_info("[%2d] imm=%d(0x%x) out of range\n", \
+ i, imm, imm); \
+ return -EINVAL; \
+ } \
+} while (0)
+#define check_imm19(imm) check_imm(19, imm)
+#define check_imm26(imm) check_imm(26, imm)
+
+ /* JUMP off */
+ case BPF_JMP | BPF_JA:
+ jmp_offset = bpf2a64_offset(i + off, i, ctx);
+ check_imm26(jmp_offset);
+ emit(A64_B(jmp_offset), ctx);
+ break;
+ /* IF (dst COND src) JUMP off */
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ emit(A64_CMP(1, dst, src), ctx);
+emit_cond_jmp:
+ jmp_offset = bpf2a64_offset(i + off, i, ctx);
+ check_imm19(jmp_offset);
+ switch (BPF_OP(code)) {
+ case BPF_JEQ:
+ jmp_cond = A64_COND_EQ;
+ break;
+ case BPF_JGT:
+ jmp_cond = A64_COND_HI;
+ break;
+ case BPF_JGE:
+ jmp_cond = A64_COND_CS;
+ break;
+ case BPF_JNE:
+ jmp_cond = A64_COND_NE;
+ break;
+ case BPF_JSGT:
+ jmp_cond = A64_COND_GT;
+ break;
+ case BPF_JSGE:
+ jmp_cond = A64_COND_GE;
+ break;
+ default:
+ return -EFAULT;
+ }
+ emit(A64_B_(jmp_cond, jmp_offset), ctx);
+ break;
+ case BPF_JMP | BPF_JSET | BPF_X:
+ emit(A64_TST(1, dst, src), ctx);
+ goto emit_cond_jmp;
+ /* IF (dst COND imm) JUMP off */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(1, tmp, imm, ctx);
+ emit(A64_CMP(1, dst, tmp), ctx);
+ goto emit_cond_jmp;
+ case BPF_JMP | BPF_JSET | BPF_K:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(1, tmp, imm, ctx);
+ emit(A64_TST(1, dst, tmp), ctx);
+ goto emit_cond_jmp;
+ /* function call */
+ case BPF_JMP | BPF_CALL:
+ {
+ const u8 r0 = bpf2a64[BPF_REG_0];
+ const u64 func = (u64)__bpf_call_base + imm;
+
+ ctx->tmp_used = 1;
+ emit_a64_mov_i64(tmp, func, ctx);
+ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
+ emit(A64_MOV(1, A64_FP, A64_SP), ctx);
+ emit(A64_BLR(tmp), ctx);
+ emit(A64_MOV(1, r0, A64_R(0)), ctx);
+ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
+ break;
+ }
+ /* function return */
+ case BPF_JMP | BPF_EXIT:
+ if (i == ctx->prog->len - 1)
+ break;
+ jmp_offset = epilogue_offset(ctx);
+ check_imm26(jmp_offset);
+ emit(A64_B(jmp_offset), ctx);
+ break;
+
+ /* LDX: dst = *(size *)(src + off) */
+ case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(1, tmp, off, ctx);
+ switch (BPF_SIZE(code)) {
+ case BPF_W:
+ emit(A64_LDR32(dst, src, tmp), ctx);
+ break;
+ case BPF_H:
+ emit(A64_LDRH(dst, src, tmp), ctx);
+ break;
+ case BPF_B:
+ emit(A64_LDRB(dst, src, tmp), ctx);
+ break;
+ case BPF_DW:
+ emit(A64_LDR64(dst, src, tmp), ctx);
+ break;
+ }
+ break;
+
+ /* ST: *(size *)(dst + off) = imm */
+ case BPF_ST | BPF_MEM | BPF_W:
+ case BPF_ST | BPF_MEM | BPF_H:
+ case BPF_ST | BPF_MEM | BPF_B:
+ case BPF_ST | BPF_MEM | BPF_DW:
+ goto notyet;
+
+ /* STX: *(size *)(dst + off) = src */
+ case BPF_STX | BPF_MEM | BPF_W:
+ case BPF_STX | BPF_MEM | BPF_H:
+ case BPF_STX | BPF_MEM | BPF_B:
+ case BPF_STX | BPF_MEM | BPF_DW:
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(1, tmp, off, ctx);
+ switch (BPF_SIZE(code)) {
+ case BPF_W:
+ emit(A64_STR32(src, dst, tmp), ctx);
+ break;
+ case BPF_H:
+ emit(A64_STRH(src, dst, tmp), ctx);
+ break;
+ case BPF_B:
+ emit(A64_STRB(src, dst, tmp), ctx);
+ break;
+ case BPF_DW:
+ emit(A64_STR64(src, dst, tmp), ctx);
+ break;
+ }
+ break;
+ /* STX XADD: lock *(u32 *)(dst + off) += src */
+ case BPF_STX | BPF_XADD | BPF_W:
+ /* STX XADD: lock *(u64 *)(dst + off) += src */
+ case BPF_STX | BPF_XADD | BPF_DW:
+ goto notyet;
+
+ /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
+ case BPF_LD | BPF_ABS | BPF_W:
+ case BPF_LD | BPF_ABS | BPF_H:
+ case BPF_LD | BPF_ABS | BPF_B:
+ /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
+ case BPF_LD | BPF_IND | BPF_W:
+ case BPF_LD | BPF_IND | BPF_H:
+ case BPF_LD | BPF_IND | BPF_B:
+ {
+ const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
+ const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
+ const u8 fp = bpf2a64[BPF_REG_FP];
+ const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
+ const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
+ const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
+ const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
+ const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
+ int size;
+
+ emit(A64_MOV(1, r1, r6), ctx);
+ emit_a64_mov_i(0, r2, imm, ctx);
+ if (BPF_MODE(code) == BPF_IND)
+ emit(A64_ADD(0, r2, r2, src), ctx);
+ switch (BPF_SIZE(code)) {
+ case BPF_W:
+ size = 4;
+ break;
+ case BPF_H:
+ size = 2;
+ break;
+ case BPF_B:
+ size = 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+ emit_a64_mov_i64(r3, size, ctx);
+ emit(A64_ADD_I(1, r4, fp, MAX_BPF_STACK), ctx);
+ emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
+ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
+ emit(A64_MOV(1, A64_FP, A64_SP), ctx);
+ emit(A64_BLR(r5), ctx);
+ emit(A64_MOV(1, r0, A64_R(0)), ctx);
+ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
+
+ jmp_offset = epilogue_offset(ctx);
+ check_imm19(jmp_offset);
+ emit(A64_CBZ(1, r0, jmp_offset), ctx);
+ emit(A64_MOV(1, r5, r0), ctx);
+ switch (BPF_SIZE(code)) {
+ case BPF_W:
+ emit(A64_LDR32(r0, r5, A64_ZR), ctx);
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ emit(A64_REV32(0, r0, r0), ctx);
+#endif
+ break;
+ case BPF_H:
+ emit(A64_LDRH(r0, r5, A64_ZR), ctx);
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ emit(A64_REV16(0, r0, r0), ctx);
+#endif
+ break;
+ case BPF_B:
+ emit(A64_LDRB(r0, r5, A64_ZR), ctx);
+ break;
+ }
+ break;
+ }
+notyet:
+ pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+ return -EFAULT;
+
+ default:
+ pr_err_once("unknown opcode %02x\n", code);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int build_body(struct jit_ctx *ctx)
+{
+ const struct bpf_prog *prog = ctx->prog;
+ int i;
+
+ for (i = 0; i < prog->len; i++) {
+ const struct bpf_insn *insn = &prog->insnsi[i];
+ int ret;
+
+ if (ctx->image == NULL)
+ ctx->offset[i] = ctx->idx;
+
+ ret = build_insn(insn, ctx);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+ flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+void bpf_jit_compile(struct bpf_prog *prog)
+{
+ /* Nothing to do here. We support Internal BPF. */
+}
+
+void bpf_int_jit_compile(struct bpf_prog *prog)
+{
+ struct jit_ctx ctx;
+ int image_size;
+
+ if (!bpf_jit_enable)
+ return;
+
+ if (!prog || !prog->len)
+ return;
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.prog = prog;
+
+ ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
+ if (ctx.offset == NULL)
+ return;
+
+ /* 1. Initial fake pass to compute ctx->idx. */
+
+ /* Fake pass to fill in ctx->offset. */
+ if (build_body(&ctx))
+ goto out;
+
+ build_prologue(&ctx);
+
+ build_epilogue(&ctx);
+
+ /* Now we know the actual image size. */
+ image_size = sizeof(u32) * ctx.idx;
+ ctx.image = module_alloc(image_size);
+ if (unlikely(ctx.image == NULL))
+ goto out;
+
+ /* 2. Now, the actual pass. */
+
+ ctx.idx = 0;
+ build_prologue(&ctx);
+
+ ctx.body_offset = ctx.idx;
+ if (build_body(&ctx)) {
+ module_free(NULL, ctx.image);
+ goto out;
+ }
+
+ build_epilogue(&ctx);
+
+ /* And we're done. */
+ if (bpf_jit_enable > 1)
+ bpf_jit_dump(prog->len, image_size, 2, ctx.image);
+
+ bpf_flush_icache(ctx.image, ctx.image + ctx.idx);
+ prog->bpf_func = (void *)ctx.image;
+ prog->jited = 1;
+
+out:
+ kfree(ctx.offset);
+}
+
+void bpf_jit_free(struct bpf_prog *prog)
+{
+ if (prog->jited)
+ module_free(NULL, prog->bpf_func);
+
+ kfree(prog);
+}
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index ed30699cc635..af76634f8d98 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -671,7 +671,7 @@ config TICKSOURCE_CORETMR
default y
endmenu
-menu "Clock souce"
+menu "Clock source"
depends on GENERIC_CLOCKEVENTS
config CYCLES_CLOCKSOURCE
bool "CYCLES"
diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index 1f94784eab6d..694619365265 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c
@@ -455,7 +455,7 @@ void handle_sec_sci_fault(uint32_t gstat)
printk(KERN_DEBUG "sec ack err\n");
break;
default:
- printk(KERN_DEBUG "sec sci unknow err\n");
+ printk(KERN_DEBUG "sec sci unknown err\n");
}
}
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index db95f570705f..4729752b7256 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -234,9 +234,6 @@ struct kvm_vm_data {
#define KVM_REQ_PTC_G 32
#define KVM_REQ_RESUME 33
-struct kvm;
-struct kvm_vcpu;
-
struct kvm_mmio_req {
uint64_t addr; /* physical address */
uint64_t size; /* size in bytes */
@@ -595,6 +592,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu);
struct kvm *kvm_arch_alloc_vm(void);
void kvm_arch_free_vm(struct kvm *kvm);
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ enum kvm_mr_change change) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+
#endif /* __ASSEMBLY__*/
#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0729ba6acddf..ec6b9acb6bea 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
static DEFINE_SPINLOCK(vp_lock);
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
long status;
long tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
{
long status;
@@ -1364,10 +1364,6 @@ static void kvm_release_vm_pages(struct kvm *kvm)
}
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
@@ -1376,10 +1372,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_release_vm_pages(kvm);
}
-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
-{
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
if (cpu != vcpu->cpu) {
@@ -1468,7 +1460,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
kfree(vcpu->arch.apic);
}
-
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1551,21 +1542,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
-}
-
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages)
{
return 0;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
@@ -1597,14 +1579,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return 0;
}
-void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
- enum kvm_mr_change change)
-{
- return;
-}
-
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
kvm_flush_remote_tlbs(kvm);
@@ -1853,10 +1827,6 @@ int kvm_arch_hardware_setup(void)
return 0;
}
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
{
return __apic_accept_irq(vcpu, irq->vector);
diff --git a/arch/m68k/Kconfig.devices b/arch/m68k/Kconfig.devices
index d163991c5717..2a74777667fd 100644
--- a/arch/m68k/Kconfig.devices
+++ b/arch/m68k/Kconfig.devices
@@ -73,7 +73,7 @@ config ATARI_ETHERNEC
ROM port. The driver works by polling instead of interrupts, so it
is quite slow.
- This driver also suppports the ethernet part of the NetUSBee ROM
+ This driver also supports the ethernet part of the NetUSBee ROM
port combined Ethernet/USB adapter.
To compile the actual ethernet driver, choose Y or M in for the NE2000
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index 602866657938..c370426a7322 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -1,7 +1,7 @@
if CPU_CAVIUM_OCTEON
config CAVIUM_CN63XXP1
- bool "Enable CN63XXP1 errata worarounds"
+ bool "Enable CN63XXP1 errata workarounds"
default "n"
help
The CN63XXP1 chip requires build time workarounds to
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 7a3fc67bd7f9..f2c249796ea8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -96,11 +96,6 @@
#define CAUSEB_DC 27
#define CAUSEF_DC (_ULCAST_(1) << 27)
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-struct kvm_interrupt;
-
extern atomic_t kvm_mips_instance;
extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn);
extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn);
@@ -767,5 +762,16 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
#endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index cd7114147ae7..e3b21e51ff7e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -77,24 +77,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return 1;
}
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
int kvm_arch_hardware_setup(void)
{
return 0;
}
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
void kvm_arch_check_processor_compat(void *rtn)
{
*(int *)rtn = 0;
@@ -163,10 +155,6 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
mutex_unlock(&kvm->lock);
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
static void kvm_mips_uninit_tlbs(void *arg)
{
/* Restore wired count */
@@ -194,21 +182,12 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl,
return -ENOIOCTLCMD;
}
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
-}
-
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages)
{
return 0;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
@@ -254,19 +233,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
}
}
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
-}
-
-void kvm_arch_flush_shadow(struct kvm *kvm)
-{
-}
-
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
int err, size, offset;
@@ -998,10 +964,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
return 0;
}
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-}
-
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c
index 8f1866d8124d..468ffa043607 100644
--- a/arch/mips/lantiq/falcon/sysctrl.c
+++ b/arch/mips/lantiq/falcon/sysctrl.c
@@ -221,7 +221,7 @@ void __init ltq_soc_init(void)
(request_mem_region(res_sys[2].start,
resource_size(&res_sys[2]),
res_sys[2].name) < 0))
- pr_err("Failed to request core reources");
+ pr_err("Failed to request core resources");
status_membase = ioremap_nocache(res_status.start,
resource_size(&res_status));
diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c
index 51804b10a036..2b15491de494 100644
--- a/arch/mips/lantiq/xway/sysctrl.c
+++ b/arch/mips/lantiq/xway/sysctrl.c
@@ -318,7 +318,7 @@ void __init ltq_soc_init(void)
res_cgu.name) < 0) ||
(request_mem_region(res_ebu.start, resource_size(&res_ebu),
res_ebu.name) < 0))
- pr_err("Failed to request core reources");
+ pr_err("Failed to request core resources");
pmu_membase = ioremap_nocache(res_pmu.start, resource_size(&res_pmu));
ltq_cgu_membase = ioremap_nocache(res_cgu.start,
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
index cb1ef9984069..37fe8e7887e2 100644
--- a/arch/mips/pci/pci-lantiq.c
+++ b/arch/mips/pci/pci-lantiq.c
@@ -218,7 +218,7 @@ static int ltq_pci_probe(struct platform_device *pdev)
res_cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
res_bridge = platform_get_resource(pdev, IORESOURCE_MEM, 1);
if (!res_cfg || !res_bridge) {
- dev_err(&pdev->dev, "missing memory reources\n");
+ dev_err(&pdev->dev, "missing memory resources\n");
return -EINVAL;
}
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index a648de1b1096..4434b54e1d87 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -181,7 +181,7 @@ endmenu
config SMP
bool "Symmetric multi-processing support"
default y
- depends on MN10300_PROC_MN2WS0038 || MN10300_PROC_MN2WS0050
+ depends on MN10300_PROC_MN2WS0050
---help---
This enables support for systems with more than one CPU. If you have
a system with only one CPU, say N. If you have a system with more
diff --git a/arch/powerpc/boot/simpleboot.c b/arch/powerpc/boot/simpleboot.c
index 21cd48074ec8..9f8c678f0d9a 100644
--- a/arch/powerpc/boot/simpleboot.c
+++ b/arch/powerpc/boot/simpleboot.c
@@ -61,7 +61,7 @@ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
if (*reg++ != 0)
fatal("Memory range is not based at address 0\n");
- /* get the memsize and trucate it to under 4G on 32 bit machines */
+ /* get the memsize and truncate it to under 4G on 32 bit machines */
memsize64 = 0;
for (i = 0; i < *ns; i++)
memsize64 = (memsize64 << 32) | *reg++;
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 465dfcb82c92..5bca220bbb60 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -53,17 +53,17 @@
#define BOOKE_INTERRUPT_DEBUG 15
/* E500 */
-#define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32
-#define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33
-/*
- * TODO: Unify 32-bit and 64-bit kernel exception handlers to use same defines
- */
-#define BOOKE_INTERRUPT_SPE_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
-#define BOOKE_INTERRUPT_SPE_FP_DATA BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
-#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
-#define BOOKE_INTERRUPT_ALTIVEC_ASSIST \
- BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
+#ifdef CONFIG_SPE_POSSIBLE
+#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
+#define BOOKE_INTERRUPT_SPE_FP_DATA 33
#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32
+#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33
+#endif
+
#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
#define BOOKE_INTERRUPT_DOORBELL 36
#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index f7aa5cc395c4..3286f0d6a86c 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -23,15 +23,16 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
-/* LPIDs we support with this build -- runtime limit may be lower */
+/*
+ * Number of available lpids. Only the low-order 6 bits of LPID rgister are
+ * implemented on e500mc+ cores.
+ */
#define KVMPPC_NR_LPIDS 64
#define KVMPPC_INST_EHPRIV 0x7c00021c
#define EHPRIV_OC_SHIFT 11
/* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */
#define EHPRIV_OC_DEBUG 1
-#define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \
- (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT))
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 98d9dd50d063..047855619cc4 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,14 +53,18 @@
#define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
-extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address)
+{
+}
+
#define HPTEG_CACHE_NUM (1 << 15)
#define HPTEG_HASH_BITS_PTE 13
#define HPTEG_HASH_BITS_PTE_LONG 12
@@ -76,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
/* Physical Address Mask - allowed range of real mode RAM access */
#define KVM_PAM 0x0fffffffffffffffULL
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-
struct lppaca;
struct slb_shadow;
struct dtl_entry;
@@ -144,6 +144,7 @@ enum kvm_exit_types {
EMULATED_TLBWE_EXITS,
EMULATED_RFI_EXITS,
EMULATED_RFCI_EXITS,
+ EMULATED_RFDI_EXITS,
DEC_EXITS,
EXT_INTR_EXITS,
HALT_WAKEUP,
@@ -589,8 +590,6 @@ struct kvm_vcpu_arch {
u32 crit_save;
/* guest debug registers*/
struct debug_reg dbg_reg;
- /* hardware visible debug registers when in guest state */
- struct debug_reg shadow_dbg_reg;
#endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
@@ -612,7 +611,6 @@ struct kvm_vcpu_arch {
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
struct hrtimer dec_timer;
- struct tasklet_struct tasklet;
u64 dec_jiffies;
u64 dec_expires;
unsigned long pending_exceptions;
@@ -687,4 +685,12 @@ struct kvm_vcpu_arch {
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_exit(void) {}
+
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index fb86a2299d8a..a6dcdb6d13c1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -38,6 +38,12 @@
#include <asm/paca.h>
#endif
+/*
+ * KVMPPC_INST_SW_BREAKPOINT is debug Instruction
+ * for supporting software breakpoint.
+ */
+#define KVMPPC_INST_SW_BREAKPOINT 0x00dddd00
+
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
@@ -89,7 +95,7 @@ extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
-extern void kvmppc_decrementer_func(unsigned long data);
+extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
@@ -206,6 +212,9 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
+void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu);
+void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu);
+
union kvmppc_one_reg {
u32 wval;
u64 dval;
@@ -243,7 +252,7 @@ struct kvmppc_ops {
int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
unsigned long end);
- int (*age_hva)(struct kvm *kvm, unsigned long hva);
+ int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
void (*mmu_destroy)(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 1d653308a33c..16547efa2d5a 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -319,6 +319,8 @@
* DBSR bits which have conflicting definitions on true Book E versus IBM 40x.
*/
#ifdef CONFIG_BOOKE
+#define DBSR_IDE 0x80000000 /* Imprecise Debug Event */
+#define DBSR_MRR 0x30000000 /* Most Recent Reset */
#define DBSR_IC 0x08000000 /* Instruction Completion */
#define DBSR_BT 0x04000000 /* Branch Taken */
#define DBSR_IRPT 0x02000000 /* Exception Debug Event */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index e0e49dbb145d..ab4d4732c492 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -476,6 +476,11 @@ struct kvm_get_htab_header {
/* FP and vector status/control registers */
#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+/*
+ * VSCR register is documented as a 32-bit register in the ISA, but it can
+ * only be accesses via a vector register. Expose VSCR as a 32-bit register
+ * even though the kernel represents it as a 128-bit vector.
+ */
#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
/* Virtual processor areas */
@@ -557,6 +562,7 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
#define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
+#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 4f1393d20079..dddba3e94260 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -91,6 +91,7 @@ _GLOBAL(setup_altivec_idle)
blr
+#ifdef CONFIG_PPC_E500MC
_GLOBAL(__setup_cpu_e6500)
mflr r6
#ifdef CONFIG_PPC64
@@ -107,14 +108,20 @@ _GLOBAL(__setup_cpu_e6500)
bl __setup_cpu_e5500
mtlr r6
blr
+#endif /* CONFIG_PPC_E500MC */
#ifdef CONFIG_PPC32
+#ifdef CONFIG_E200
_GLOBAL(__setup_cpu_e200)
/* enable dedicated debug exception handling resources (Debug APU) */
mfspr r3,SPRN_HID0
ori r3,r3,HID0_DAPUEN@l
mtspr SPRN_HID0,r3
b __setup_e200_ivors
+#endif /* CONFIG_E200 */
+
+#ifdef CONFIG_E500
+#ifndef CONFIG_PPC_E500MC
_GLOBAL(__setup_cpu_e500v1)
_GLOBAL(__setup_cpu_e500v2)
mflr r4
@@ -129,6 +136,7 @@ _GLOBAL(__setup_cpu_e500v2)
#endif
mtlr r4
blr
+#else /* CONFIG_PPC_E500MC */
_GLOBAL(__setup_cpu_e500mc)
_GLOBAL(__setup_cpu_e5500)
mflr r5
@@ -159,7 +167,9 @@ _GLOBAL(__setup_cpu_e5500)
2:
mtlr r5
blr
-#endif
+#endif /* CONFIG_PPC_E500MC */
+#endif /* CONFIG_E500 */
+#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC_BOOK3E_64
_GLOBAL(__restore_cpu_e6500)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 9b6dcaaec1a3..808405906336 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1961,6 +1961,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_E500
#ifdef CONFIG_PPC32
+#ifndef CONFIG_PPC_E500MC
{ /* e500 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x80200000,
@@ -2000,6 +2001,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_e500,
.platform = "ppc8548",
},
+#else
{ /* e500mc */
.pvr_mask = 0xffff0000,
.pvr_value = 0x80230000,
@@ -2018,7 +2020,9 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_e500mc,
.platform = "ppce500mc",
},
+#endif /* CONFIG_PPC_E500MC */
#endif /* CONFIG_PPC32 */
+#ifdef CONFIG_PPC_E500MC
{ /* e5500 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x80240000,
@@ -2062,6 +2066,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_e500mc,
.platform = "ppce6500",
},
+#endif /* CONFIG_PPC_E500MC */
#ifdef CONFIG_PPC32
{ /* default match */
.pvr_mask = 0x00000000,
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index bb9cac6c8051..3e68d1c69718 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -635,7 +635,7 @@ interrupt_end_book3e:
/* Altivec Unavailable Interrupt */
START_EXCEPTION(altivec_unavailable);
- NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL,
+ NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL,
PROLOG_ADDITION_NONE)
/* we can probably do a shorter exception entry for that one... */
EXCEPTION_COMMON(0x200)
@@ -658,7 +658,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
/* AltiVec Assist */
START_EXCEPTION(altivec_assist);
NORMAL_EXCEPTION_PROLOG(0x220,
- BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST,
+ BOOKE_INTERRUPT_ALTIVEC_ASSIST,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x220)
INTS_DISABLE
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index b497188a94a1..fffd1f96bb1d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -613,34 +613,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r10, SPRN_SPRG_RSCRATCH0
b InstructionStorage
+/* Define SPE handlers for e200 and e500v2 */
#ifdef CONFIG_SPE
/* SPE Unavailable */
START_EXCEPTION(SPEUnavailable)
- NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL)
+ NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
beq 1f
bl load_up_spe
b fast_exception_return
1: addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_EE_LITE(0x2010, KernelSPE)
-#else
- EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \
+#elif defined(CONFIG_SPE_POSSIBLE)
+ EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
unknown_exception, EXC_XFER_EE)
-#endif /* CONFIG_SPE */
+#endif /* CONFIG_SPE_POSSIBLE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
- EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
+ EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
SPEFloatingPointException, EXC_XFER_EE)
/* SPE Floating Point Round */
EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
SPEFloatingPointRoundException, EXC_XFER_EE)
-#else
- EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
+#elif defined(CONFIG_SPE_POSSIBLE)
+ EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
unknown_exception, EXC_XFER_EE)
EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
unknown_exception, EXC_XFER_EE)
-#endif /* CONFIG_SPE */
+#endif /* CONFIG_SPE_POSSIBLE */
+
/* Performance Monitor */
EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
@@ -947,6 +949,7 @@ get_phys_addr:
* Global functions
*/
+#ifdef CONFIG_E200
/* Adjust or setup IVORs for e200 */
_GLOBAL(__setup_e200_ivors)
li r3,DebugDebug@l
@@ -959,7 +962,10 @@ _GLOBAL(__setup_e200_ivors)
mtspr SPRN_IVOR34,r3
sync
blr
+#endif
+#ifdef CONFIG_E500
+#ifndef CONFIG_PPC_E500MC
/* Adjust or setup IVORs for e500v1/v2 */
_GLOBAL(__setup_e500_ivors)
li r3,DebugCrit@l
@@ -974,7 +980,7 @@ _GLOBAL(__setup_e500_ivors)
mtspr SPRN_IVOR35,r3
sync
blr
-
+#else
/* Adjust or setup IVORs for e500mc */
_GLOBAL(__setup_e500mc_ivors)
li r3,DebugDebug@l
@@ -1000,6 +1006,8 @@ _GLOBAL(__setup_ehv_ivors)
mtspr SPRN_IVOR41,r3
sync
blr
+#endif /* CONFIG_PPC_E500MC */
+#endif /* CONFIG_E500 */
#ifdef CONFIG_SPE
/*
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b299ba..b32db4b95361 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -535,174 +535,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -ENOTSUPP;
}
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
- int r;
- union kvmppc_one_reg val;
- int size;
+ int r = 0;
long int i;
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
-
- r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
if (r == -EINVAL) {
r = 0;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_DAR:
- val = get_reg_val(reg->id, kvmppc_get_dar(vcpu));
+ *val = get_reg_val(id, kvmppc_get_dar(vcpu));
break;
case KVM_REG_PPC_DSISR:
- val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu));
+ *val = get_reg_val(id, kvmppc_get_dsisr(vcpu));
break;
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
- i = reg->id - KVM_REG_PPC_FPR0;
- val = get_reg_val(reg->id, VCPU_FPR(vcpu, i));
+ i = id - KVM_REG_PPC_FPR0;
+ *val = get_reg_val(id, VCPU_FPR(vcpu, i));
break;
case KVM_REG_PPC_FPSCR:
- val = get_reg_val(reg->id, vcpu->arch.fp.fpscr);
- break;
-#ifdef CONFIG_ALTIVEC
- case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
- break;
- case KVM_REG_PPC_VSCR:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+ *val = get_reg_val(id, vcpu->arch.fp.fpscr);
break;
- case KVM_REG_PPC_VRSAVE:
- val = get_reg_val(reg->id, vcpu->arch.vrsave);
- break;
-#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
if (cpu_has_feature(CPU_FTR_VSX)) {
- long int i = reg->id - KVM_REG_PPC_VSR0;
- val.vsxval[0] = vcpu->arch.fp.fpr[i][0];
- val.vsxval[1] = vcpu->arch.fp.fpr[i][1];
+ i = id - KVM_REG_PPC_VSR0;
+ val->vsxval[0] = vcpu->arch.fp.fpr[i][0];
+ val->vsxval[1] = vcpu->arch.fp.fpr[i][1];
} else {
r = -ENXIO;
}
break;
#endif /* CONFIG_VSX */
- case KVM_REG_PPC_DEBUG_INST: {
- u32 opcode = INS_TW;
- r = copy_to_user((u32 __user *)(long)reg->addr,
- &opcode, sizeof(u32));
+ case KVM_REG_PPC_DEBUG_INST:
+ *val = get_reg_val(id, INS_TW);
break;
- }
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
if (!vcpu->arch.icp) {
r = -ENXIO;
break;
}
- val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
+ *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
case KVM_REG_PPC_FSCR:
- val = get_reg_val(reg->id, vcpu->arch.fscr);
+ *val = get_reg_val(id, vcpu->arch.fscr);
break;
case KVM_REG_PPC_TAR:
- val = get_reg_val(reg->id, vcpu->arch.tar);
+ *val = get_reg_val(id, vcpu->arch.tar);
break;
case KVM_REG_PPC_EBBHR:
- val = get_reg_val(reg->id, vcpu->arch.ebbhr);
+ *val = get_reg_val(id, vcpu->arch.ebbhr);
break;
case KVM_REG_PPC_EBBRR:
- val = get_reg_val(reg->id, vcpu->arch.ebbrr);
+ *val = get_reg_val(id, vcpu->arch.ebbrr);
break;
case KVM_REG_PPC_BESCR:
- val = get_reg_val(reg->id, vcpu->arch.bescr);
+ *val = get_reg_val(id, vcpu->arch.bescr);
break;
case KVM_REG_PPC_VTB:
- val = get_reg_val(reg->id, vcpu->arch.vtb);
+ *val = get_reg_val(id, vcpu->arch.vtb);
break;
case KVM_REG_PPC_IC:
- val = get_reg_val(reg->id, vcpu->arch.ic);
+ *val = get_reg_val(id, vcpu->arch.ic);
break;
default:
r = -EINVAL;
break;
}
}
- if (r)
- return r;
-
- if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
- r = -EFAULT;
return r;
}
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
- int r;
- union kvmppc_one_reg val;
- int size;
+ int r = 0;
long int i;
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
-
- if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
- return -EFAULT;
-
- r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
if (r == -EINVAL) {
r = 0;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_DAR:
- kvmppc_set_dar(vcpu, set_reg_val(reg->id, val));
+ kvmppc_set_dar(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DSISR:
- kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val));
+ kvmppc_set_dsisr(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
- i = reg->id - KVM_REG_PPC_FPR0;
- VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val);
+ i = id - KVM_REG_PPC_FPR0;
+ VCPU_FPR(vcpu, i) = set_reg_val(id, *val);
break;
case KVM_REG_PPC_FPSCR:
- vcpu->arch.fp.fpscr = set_reg_val(reg->id, val);
- break;
-#ifdef CONFIG_ALTIVEC
- case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
- break;
- case KVM_REG_PPC_VSCR:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
- break;
- case KVM_REG_PPC_VRSAVE:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ vcpu->arch.fp.fpscr = set_reg_val(id, *val);
break;
-#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
if (cpu_has_feature(CPU_FTR_VSX)) {
- long int i = reg->id - KVM_REG_PPC_VSR0;
- vcpu->arch.fp.fpr[i][0] = val.vsxval[0];
- vcpu->arch.fp.fpr[i][1] = val.vsxval[1];
+ i = id - KVM_REG_PPC_VSR0;
+ vcpu->arch.fp.fpr[i][0] = val->vsxval[0];
+ vcpu->arch.fp.fpr[i][1] = val->vsxval[1];
} else {
r = -ENXIO;
}
@@ -715,29 +652,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
break;
}
r = kvmppc_xics_set_icp(vcpu,
- set_reg_val(reg->id, val));
+ set_reg_val(id, *val));
break;
#endif /* CONFIG_KVM_XICS */
case KVM_REG_PPC_FSCR:
- vcpu->arch.fscr = set_reg_val(reg->id, val);
+ vcpu->arch.fscr = set_reg_val(id, *val);
break;
case KVM_REG_PPC_TAR:
- vcpu->arch.tar = set_reg_val(reg->id, val);
+ vcpu->arch.tar = set_reg_val(id, *val);
break;
case KVM_REG_PPC_EBBHR:
- vcpu->arch.ebbhr = set_reg_val(reg->id, val);
+ vcpu->arch.ebbhr = set_reg_val(id, *val);
break;
case KVM_REG_PPC_EBBRR:
- vcpu->arch.ebbrr = set_reg_val(reg->id, val);
+ vcpu->arch.ebbrr = set_reg_val(id, *val);
break;
case KVM_REG_PPC_BESCR:
- vcpu->arch.bescr = set_reg_val(reg->id, val);
+ vcpu->arch.bescr = set_reg_val(id, *val);
break;
case KVM_REG_PPC_VTB:
- vcpu->arch.vtb = set_reg_val(reg->id, val);
+ vcpu->arch.vtb = set_reg_val(id, *val);
break;
case KVM_REG_PPC_IC:
- vcpu->arch.ic = set_reg_val(reg->id, val);
+ vcpu->arch.ic = set_reg_val(id, *val);
break;
default:
r = -EINVAL;
@@ -778,13 +715,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- return -EINVAL;
+ vcpu->guest_debug = dbg->control;
+ return 0;
}
-void kvmppc_decrementer_func(unsigned long data)
+void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
-
kvmppc_core_queue_dec(vcpu);
kvm_vcpu_kick(vcpu);
}
@@ -851,9 +787,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
- return kvm->arch.kvm_ops->age_hva(kvm, hva);
+ return kvm->arch.kvm_ops->age_hva(kvm, start, end);
}
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 4bf956cf94d6..d2b3ec088b8c 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
unsigned long end);
-extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
+ unsigned long end);
extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 79294c4c5015..d40770248b6a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
return ret;
}
-int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
{
if (!kvm->arch.using_mmu_notifiers)
return 0;
- return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+ return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
}
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 27cced9c7249..e63587d30b70 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -725,6 +725,30 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
return kvmppc_hcall_impl_hv_realmode(cmd);
}
+static int kvmppc_emulate_debug_inst(struct kvm_run *run,
+ struct kvm_vcpu *vcpu)
+{
+ u32 last_inst;
+
+ if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
+ EMULATE_DONE) {
+ /*
+ * Fetch failed, so return to guest and
+ * try executing it again.
+ */
+ return RESUME_GUEST;
+ }
+
+ if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.address = kvmppc_get_pc(vcpu);
+ return RESUME_HOST;
+ } else {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+}
+
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -807,12 +831,18 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
/*
* This occurs if the guest executes an illegal instruction.
- * We just generate a program interrupt to the guest, since
- * we don't emulate any guest instructions at this stage.
+ * If the guest debug is disabled, generate a program interrupt
+ * to the guest. If guest debug is enabled, we need to check
+ * whether the instruction is a software breakpoint instruction.
+ * Accordingly return to Guest or Host.
*/
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- r = RESUME_GUEST;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+ r = kvmppc_emulate_debug_inst(run, vcpu);
+ } else {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ r = RESUME_GUEST;
+ }
break;
/*
* This occurs if the guest (kernel or userspace), does something that
@@ -856,7 +886,9 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
{
int i, j;
- kvmppc_set_pvr_hv(vcpu, sregs->pvr);
+ /* Only accept the same PVR as the host's, since we can't spoof it */
+ if (sregs->pvr != vcpu->arch.pvr)
+ return -EINVAL;
j = 0;
for (i = 0; i < vcpu->arch.slb_nr; i++) {
@@ -922,6 +954,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
long int i;
switch (id) {
+ case KVM_REG_PPC_DEBUG_INST:
+ *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+ break;
case KVM_REG_PPC_HIOR:
*val = get_reg_val(id, 0);
break;
@@ -1489,7 +1524,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
static int kvmppc_grab_hwthread(int cpu)
{
struct paca_struct *tpaca;
- long timeout = 1000;
+ long timeout = 10000;
tpaca = &paca[cpu];
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index b9615ba5b083..4fdc27c80f4c 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -163,6 +163,12 @@ void __init kvm_cma_reserve(void)
unsigned long align_size;
struct memblock_region *reg;
phys_addr_t selected_size = 0;
+
+ /*
+ * We need CMA reservation only when we are in HV mode
+ */
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return;
/*
* We cannot use memblock_phys_mem_size() here, because
* memblock_analyze() has not been called yet.
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f0c4db7704c3..edb2ccdbb2ba 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -355,6 +355,7 @@ kvmppc_hv_entry:
* MSR = ~IR|DR
* R13 = PACA
* R1 = host R1
+ * R2 = TOC
* all other volatile GPRS = free
*/
mflr r0
@@ -503,7 +504,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
toc_tlbie_lock:
.tc native_tlbie_lock[TC],native_tlbie_lock
.previous
- ld r3,toc_tlbie_lock@toc(2)
+ ld r3,toc_tlbie_lock@toc(r2)
#ifdef __BIG_ENDIAN__
lwz r8,PACA_LOCK_TOKEN(r13)
#else
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index faffb27badd9..cf2eb16846d1 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
return 0;
}
-static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
/* XXX could be more clever ;) */
return 0;
@@ -1319,6 +1320,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
int r = 0;
switch (id) {
+ case KVM_REG_PPC_DEBUG_INST:
+ *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+ break;
case KVM_REG_PPC_HIOR:
*val = get_reg_val(id, to_book3s(vcpu)->hior);
break;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b4c89fa6f109..9b55dec2d6cc 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -124,6 +124,40 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
}
#endif
+/*
+ * Load up guest vcpu FP state if it's needed.
+ * It also set the MSR_FP in thread so that host know
+ * we're holding FPU, and then host can help to save
+ * guest vcpu FP state if other threads require to use FPU.
+ * This simulates an FP unavailable fault.
+ *
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (!(current->thread.regs->msr & MSR_FP)) {
+ enable_kernel_fp();
+ load_fp_state(&vcpu->arch.fp);
+ current->thread.fp_save_area = &vcpu->arch.fp;
+ current->thread.regs->msr |= MSR_FP;
+ }
+#endif
+}
+
+/*
+ * Save guest vcpu FP state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (current->thread.regs->msr & MSR_FP)
+ giveup_fpu(current);
+ current->thread.fp_save_area = NULL;
+#endif
+}
+
static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
{
#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
@@ -134,6 +168,40 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
#endif
}
+/*
+ * Simulate AltiVec unavailable fault to load guest state
+ * from thread to AltiVec unit.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ if (!(current->thread.regs->msr & MSR_VEC)) {
+ enable_kernel_altivec();
+ load_vr_state(&vcpu->arch.vr);
+ current->thread.vr_save_area = &vcpu->arch.vr;
+ current->thread.regs->msr |= MSR_VEC;
+ }
+ }
+#endif
+}
+
+/*
+ * Save guest vcpu AltiVec state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ if (current->thread.regs->msr & MSR_VEC)
+ giveup_altivec(current);
+ current->thread.vr_save_area = NULL;
+ }
+#endif
+}
+
static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
{
/* Synchronize guest's desire to get debug interrupts into shadow MSR */
@@ -267,6 +335,16 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
}
+void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DEBUG);
+}
+
+void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu)
+{
+ clear_bit(BOOKE_IRQPRIO_DEBUG, &vcpu->arch.pending_exceptions);
+}
+
static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
kvmppc_set_srr0(vcpu, srr0);
@@ -341,9 +419,15 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_ITLB_MISS:
case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_FP_UNAVAIL:
+#ifdef CONFIG_SPE_POSSIBLE
case BOOKE_IRQPRIO_SPE_UNAVAIL:
case BOOKE_IRQPRIO_SPE_FP_DATA:
case BOOKE_IRQPRIO_SPE_FP_ROUND:
+#endif
+#ifdef CONFIG_ALTIVEC
+ case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL:
+ case BOOKE_IRQPRIO_ALTIVEC_ASSIST:
+#endif
case BOOKE_IRQPRIO_AP_UNAVAIL:
allowed = 1;
msr_mask = MSR_CE | MSR_ME | MSR_DE;
@@ -377,7 +461,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
allowed = vcpu->arch.shared->msr & MSR_DE;
allowed = allowed && !crit;
msr_mask = MSR_ME;
- int_class = INT_CLASS_CRIT;
+ if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+ int_class = INT_CLASS_DBG;
+ else
+ int_class = INT_CLASS_CRIT;
+
break;
}
@@ -654,20 +742,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/*
* Since we can't trap on MSR_FP in GS-mode, we consider the guest
- * as always using the FPU. Kernel usage of FP (via
- * enable_kernel_fp()) in this thread must not occur while
- * vcpu->fpu_active is set.
+ * as always using the FPU.
*/
- vcpu->fpu_active = 1;
-
kvmppc_load_guest_fp(vcpu);
#endif
+#ifdef CONFIG_ALTIVEC
+ /* Save userspace AltiVec state in stack */
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ enable_kernel_altivec();
+ /*
+ * Since we can't trap on MSR_VEC in GS-mode, we consider the guest
+ * as always using the AltiVec.
+ */
+ kvmppc_load_guest_altivec(vcpu);
+#endif
+
/* Switch to guest debug context */
- debug = vcpu->arch.shadow_dbg_reg;
+ debug = vcpu->arch.dbg_reg;
switch_booke_debug_regs(&debug);
debug = current->thread.debug;
- current->thread.debug = vcpu->arch.shadow_dbg_reg;
+ current->thread.debug = vcpu->arch.dbg_reg;
vcpu->arch.pgdir = current->mm->pgd;
kvmppc_fix_ee_before_entry();
@@ -683,8 +778,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_FPU
kvmppc_save_guest_fp(vcpu);
+#endif
- vcpu->fpu_active = 0;
+#ifdef CONFIG_ALTIVEC
+ kvmppc_save_guest_altivec(vcpu);
#endif
out:
@@ -728,9 +825,36 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
- struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+ struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg);
u32 dbsr = vcpu->arch.dbsr;
+ if (vcpu->guest_debug == 0) {
+ /*
+ * Debug resources belong to Guest.
+ * Imprecise debug event is not injected
+ */
+ if (dbsr & DBSR_IDE) {
+ dbsr &= ~DBSR_IDE;
+ if (!dbsr)
+ return RESUME_GUEST;
+ }
+
+ if (dbsr && (vcpu->arch.shared->msr & MSR_DE) &&
+ (vcpu->arch.dbg_reg.dbcr0 & DBCR0_IDM))
+ kvmppc_core_queue_debug(vcpu);
+
+ /* Inject a program interrupt if trap debug is not allowed */
+ if ((dbsr & DBSR_TIE) && !(vcpu->arch.shared->msr & MSR_DE))
+ kvmppc_core_queue_program(vcpu, ESR_PTR);
+
+ return RESUME_GUEST;
+ }
+
+ /*
+ * Debug resource owned by userspace.
+ * Clear guest dbsr (vcpu->arch.dbsr)
+ */
+ vcpu->arch.dbsr = 0;
run->debug.arch.status = 0;
run->debug.arch.address = vcpu->arch.pc;
@@ -868,7 +992,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOKE_INTERRUPT_DATA_STORAGE:
case BOOKE_INTERRUPT_DTLB_MISS:
case BOOKE_INTERRUPT_HV_PRIV:
- emulated = kvmppc_get_last_inst(vcpu, false, &last_inst);
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+ break;
+ case BOOKE_INTERRUPT_PROGRAM:
+ /* SW breakpoints arrive as illegal instructions on HV */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
break;
default:
break;
@@ -947,6 +1076,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_PROGRAM:
+ if ((vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
+ (last_inst == KVMPPC_INST_SW_BREAKPOINT)) {
+ /*
+ * We are here because of an SW breakpoint instr,
+ * so lets return to host to handle.
+ */
+ r = kvmppc_handle_debug(run, vcpu);
+ run->exit_reason = KVM_EXIT_DEBUG;
+ kvmppc_account_exit(vcpu, DEBUG_EXITS);
+ break;
+ }
+
if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
/*
* Program traps generated by user-level software must
@@ -991,7 +1132,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
r = RESUME_GUEST;
break;
-#else
+#elif defined(CONFIG_SPE_POSSIBLE)
case BOOKE_INTERRUPT_SPE_UNAVAIL:
/*
* Guest wants SPE, but host kernel doesn't support it. Send
@@ -1012,6 +1153,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
run->hw.hardware_exit_reason = exit_nr;
r = RESUME_HOST;
break;
+#endif /* CONFIG_SPE_POSSIBLE */
+
+/*
+ * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC,
+ * see kvmppc_core_check_processor_compat().
+ */
+#ifdef CONFIG_ALTIVEC
+ case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_ALTIVEC_ASSIST:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST);
+ r = RESUME_GUEST;
+ break;
#endif
case BOOKE_INTERRUPT_DATA_STORAGE:
@@ -1188,6 +1345,8 @@ out:
else {
/* interrupts now hard-disabled */
kvmppc_fix_ee_before_entry();
+ kvmppc_load_guest_fp(vcpu);
+ kvmppc_load_guest_altivec(vcpu);
}
}
@@ -1243,6 +1402,11 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
(unsigned long)vcpu);
+ /*
+ * Clear DBSR.MRR to avoid guest debug interrupt as
+ * this is of host interest
+ */
+ mtspr(SPRN_DBSR, DBSR_MRR);
return 0;
}
@@ -1457,144 +1621,125 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
}
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = 0;
- union kvmppc_one_reg val;
- int size;
-
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_IAC1:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac1);
break;
case KVM_REG_PPC_IAC2:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac2);
break;
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
case KVM_REG_PPC_IAC3:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac3);
break;
case KVM_REG_PPC_IAC4:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac4);
break;
#endif
case KVM_REG_PPC_DAC1:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.dac1);
break;
case KVM_REG_PPC_DAC2:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.dac2);
break;
case KVM_REG_PPC_EPR: {
u32 epr = kvmppc_get_epr(vcpu);
- val = get_reg_val(reg->id, epr);
+ *val = get_reg_val(id, epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR:
- val = get_reg_val(reg->id, vcpu->arch.epcr);
+ *val = get_reg_val(id, vcpu->arch.epcr);
break;
#endif
case KVM_REG_PPC_TCR:
- val = get_reg_val(reg->id, vcpu->arch.tcr);
+ *val = get_reg_val(id, vcpu->arch.tcr);
break;
case KVM_REG_PPC_TSR:
- val = get_reg_val(reg->id, vcpu->arch.tsr);
+ *val = get_reg_val(id, vcpu->arch.tsr);
break;
case KVM_REG_PPC_DEBUG_INST:
- val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG);
+ *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
break;
case KVM_REG_PPC_VRSAVE:
- val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ *val = get_reg_val(id, vcpu->arch.vrsave);
break;
default:
- r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
break;
}
- if (r)
- return r;
-
- if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
- r = -EFAULT;
-
return r;
}
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = 0;
- union kvmppc_one_reg val;
- int size;
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
-
- if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
- return -EFAULT;
-
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_IAC1:
- vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac1 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_IAC2:
- vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac2 = set_reg_val(id, *val);
break;
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
case KVM_REG_PPC_IAC3:
- vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac3 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_IAC4:
- vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac4 = set_reg_val(id, *val);
break;
#endif
case KVM_REG_PPC_DAC1:
- vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.dac1 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_DAC2:
- vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.dac2 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_EPR: {
- u32 new_epr = set_reg_val(reg->id, val);
+ u32 new_epr = set_reg_val(id, *val);
kvmppc_set_epr(vcpu, new_epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR: {
- u32 new_epcr = set_reg_val(reg->id, val);
+ u32 new_epcr = set_reg_val(id, *val);
kvmppc_set_epcr(vcpu, new_epcr);
break;
}
#endif
case KVM_REG_PPC_OR_TSR: {
- u32 tsr_bits = set_reg_val(reg->id, val);
+ u32 tsr_bits = set_reg_val(id, *val);
kvmppc_set_tsr_bits(vcpu, tsr_bits);
break;
}
case KVM_REG_PPC_CLEAR_TSR: {
- u32 tsr_bits = set_reg_val(reg->id, val);
+ u32 tsr_bits = set_reg_val(id, *val);
kvmppc_clr_tsr_bits(vcpu, tsr_bits);
break;
}
case KVM_REG_PPC_TSR: {
- u32 tsr = set_reg_val(reg->id, val);
+ u32 tsr = set_reg_val(id, *val);
kvmppc_set_tsr(vcpu, tsr);
break;
}
case KVM_REG_PPC_TCR: {
- u32 tcr = set_reg_val(reg->id, val);
+ u32 tcr = set_reg_val(id, *val);
kvmppc_set_tcr(vcpu, tcr);
break;
}
case KVM_REG_PPC_VRSAVE:
- vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ vcpu->arch.vrsave = set_reg_val(id, *val);
break;
default:
- r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
break;
}
@@ -1694,10 +1839,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
update_timer_ints(vcpu);
}
-void kvmppc_decrementer_func(unsigned long data)
+void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
-
if (vcpu->arch.tcr & TCR_ARE) {
vcpu->arch.dec = vcpu->arch.decar;
kvmppc_emulate_dec(vcpu);
@@ -1842,7 +1985,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int n, b = 0, w = 0;
if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
- vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+ vcpu->arch.dbg_reg.dbcr0 = 0;
vcpu->guest_debug = 0;
kvm_guest_protect_msr(vcpu, MSR_DE, false);
return 0;
@@ -1850,15 +1993,13 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
kvm_guest_protect_msr(vcpu, MSR_DE, true);
vcpu->guest_debug = dbg->control;
- vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
- /* Set DBCR0_EDM in guest visible DBCR0 register. */
- vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+ vcpu->arch.dbg_reg.dbcr0 = 0;
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+ vcpu->arch.dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
/* Code below handles only HW breakpoints */
- dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+ dbg_reg = &(vcpu->arch.dbg_reg);
#ifdef CONFIG_KVM_BOOKE_HV
/*
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index f753543c56fa..22ba08ea68e9 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -32,9 +32,15 @@
#define BOOKE_IRQPRIO_ALIGNMENT 2
#define BOOKE_IRQPRIO_PROGRAM 3
#define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#ifdef CONFIG_SPE_POSSIBLE
#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
#define BOOKE_IRQPRIO_SPE_FP_DATA 6
#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
+#endif
+#ifdef CONFIG_PPC_E500MC
+#define BOOKE_IRQPRIO_ALTIVEC_UNAVAIL 5
+#define BOOKE_IRQPRIO_ALTIVEC_ASSIST 6
+#endif
#define BOOKE_IRQPRIO_SYSCALL 8
#define BOOKE_IRQPRIO_AP_UNAVAIL 9
#define BOOKE_IRQPRIO_DTLB_MISS 10
@@ -116,40 +122,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong *spr_val);
-/*
- * Load up guest vcpu FP state if it's needed.
- * It also set the MSR_FP in thread so that host know
- * we're holding FPU, and then host can help to save
- * guest vcpu FP state if other threads require to use FPU.
- * This simulates an FP unavailable fault.
- *
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
- if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
- enable_kernel_fp();
- load_fp_state(&vcpu->arch.fp);
- current->thread.fp_save_area = &vcpu->arch.fp;
- current->thread.regs->msr |= MSR_FP;
- }
-#endif
-}
-
-/*
- * Save guest vcpu FP state into thread.
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
- if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
- giveup_fpu(current);
- current->thread.fp_save_area = NULL;
-#endif
-}
-
static inline void kvmppc_clear_dbsr(void)
{
mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 28c158881d23..a82f64502de1 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -25,6 +25,7 @@
#define OP_19_XOP_RFI 50
#define OP_19_XOP_RFCI 51
+#define OP_19_XOP_RFDI 39
#define OP_31_XOP_MFMSR 83
#define OP_31_XOP_WRTEE 131
@@ -37,6 +38,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
}
+static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pc = vcpu->arch.dsrr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.dsrr1);
+}
+
static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
{
vcpu->arch.pc = vcpu->arch.csrr0;
@@ -65,6 +72,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
*advance = 0;
break;
+ case OP_19_XOP_RFDI:
+ kvmppc_emul_rfdi(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_RFDI_EXITS);
+ *advance = 0;
+ break;
+
default:
emulated = EMULATE_FAIL;
break;
@@ -118,6 +131,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
+ bool debug_inst = false;
switch (sprn) {
case SPRN_DEAR:
@@ -132,14 +146,128 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_CSRR1:
vcpu->arch.csrr1 = spr_val;
break;
+ case SPRN_DSRR0:
+ vcpu->arch.dsrr0 = spr_val;
+ break;
+ case SPRN_DSRR1:
+ vcpu->arch.dsrr1 = spr_val;
+ break;
+ case SPRN_IAC1:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac1 = spr_val;
+ break;
+ case SPRN_IAC2:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac2 = spr_val;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case SPRN_IAC3:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac3 = spr_val;
+ break;
+ case SPRN_IAC4:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac4 = spr_val;
+ break;
+#endif
+ case SPRN_DAC1:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.dac1 = spr_val;
+ break;
+ case SPRN_DAC2:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.dac2 = spr_val;
+ break;
case SPRN_DBCR0:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ spr_val &= (DBCR0_IDM | DBCR0_IC | DBCR0_BT | DBCR0_TIE |
+ DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 |
+ DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W);
+
vcpu->arch.dbg_reg.dbcr0 = spr_val;
break;
case SPRN_DBCR1:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
vcpu->arch.dbg_reg.dbcr1 = spr_val;
break;
+ case SPRN_DBCR2:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.dbcr2 = spr_val;
+ break;
case SPRN_DBSR:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
vcpu->arch.dbsr &= ~spr_val;
+ if (!(vcpu->arch.dbsr & ~DBSR_IDE))
+ kvmppc_core_dequeue_debug(vcpu);
break;
case SPRN_TSR:
kvmppc_clr_tsr_bits(vcpu, spr_val);
@@ -252,6 +380,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
emulated = EMULATE_FAIL;
}
+ if (debug_inst) {
+ current->thread.debug = vcpu->arch.dbg_reg;
+ switch_booke_debug_regs(&vcpu->arch.dbg_reg);
+ }
return emulated;
}
@@ -278,12 +410,43 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_CSRR1:
*spr_val = vcpu->arch.csrr1;
break;
+ case SPRN_DSRR0:
+ *spr_val = vcpu->arch.dsrr0;
+ break;
+ case SPRN_DSRR1:
+ *spr_val = vcpu->arch.dsrr1;
+ break;
+ case SPRN_IAC1:
+ *spr_val = vcpu->arch.dbg_reg.iac1;
+ break;
+ case SPRN_IAC2:
+ *spr_val = vcpu->arch.dbg_reg.iac2;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case SPRN_IAC3:
+ *spr_val = vcpu->arch.dbg_reg.iac3;
+ break;
+ case SPRN_IAC4:
+ *spr_val = vcpu->arch.dbg_reg.iac4;
+ break;
+#endif
+ case SPRN_DAC1:
+ *spr_val = vcpu->arch.dbg_reg.dac1;
+ break;
+ case SPRN_DAC2:
+ *spr_val = vcpu->arch.dbg_reg.dac2;
+ break;
case SPRN_DBCR0:
*spr_val = vcpu->arch.dbg_reg.dbcr0;
+ if (vcpu->guest_debug)
+ *spr_val = *spr_val | DBCR0_EDM;
break;
case SPRN_DBCR1:
*spr_val = vcpu->arch.dbg_reg.dbcr1;
break;
+ case SPRN_DBCR2:
+ *spr_val = vcpu->arch.dbg_reg.dbcr2;
+ break;
case SPRN_DBSR:
*spr_val = vcpu->arch.dbsr;
break;
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index e9fa56a911fd..81bd8a07aa51 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -238,7 +238,7 @@ kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
- SPRN_SRR0, SPRN_SRR1,NEED_ESR
+ SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
@@ -256,11 +256,9 @@ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
- SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
@@ -350,7 +348,7 @@ kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
-kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
@@ -361,9 +359,6 @@ kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index a326178bdea5..72920bed3ac6 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -22,6 +22,7 @@
#include <linux/kvm_host.h>
#include <asm/mmu-book3e.h>
#include <asm/tlb.h>
+#include <asm/cputhreads.h>
enum vcpu_ftr {
VCPU_FTR_MMU_V2
@@ -289,6 +290,25 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe)
#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu)
#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS)
+
+/*
+ * These functions should be called with preemption disabled
+ * and the returned value is valid only in that context
+ */
+static inline int get_thread_specific_lpid(int vm_lpid)
+{
+ int vcpu_lpid = vm_lpid;
+
+ if (threads_per_core == 2)
+ vcpu_lpid |= smp_processor_id() & 1;
+
+ return vcpu_lpid;
+}
+
+static inline int get_lpid(struct kvm_vcpu *vcpu)
+{
+ return get_thread_specific_lpid(vcpu->kvm->arch.lpid);
+}
#else
unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
struct kvm_book3e_206_tlb_entry *gtlbe);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index c99c40e9182a..ce7291c79f6c 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -259,6 +259,7 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
break;
/* extra exceptions */
+#ifdef CONFIG_SPE_POSSIBLE
case SPRN_IVOR32:
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
break;
@@ -268,6 +269,15 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
case SPRN_IVOR34:
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case SPRN_IVOR32:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL] = spr_val;
+ break;
+ case SPRN_IVOR33:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST] = spr_val;
+ break;
+#endif
case SPRN_IVOR35:
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
break;
@@ -381,6 +391,7 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
break;
/* extra exceptions */
+#ifdef CONFIG_SPE_POSSIBLE
case SPRN_IVOR32:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
break;
@@ -390,6 +401,15 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
case SPRN_IVOR34:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case SPRN_IVOR32:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL];
+ break;
+ case SPRN_IVOR33:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST];
+ break;
+#endif
case SPRN_IVOR35:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
break;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 08f14bb57897..769778f855b0 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -69,7 +69,8 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
* writing shadow tlb entry to host TLB
*/
static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
- uint32_t mas0)
+ uint32_t mas0,
+ uint32_t lpid)
{
unsigned long flags;
@@ -80,7 +81,7 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
#ifdef CONFIG_KVM_BOOKE_HV
- mtspr(SPRN_MAS8, stlbe->mas8);
+ mtspr(SPRN_MAS8, MAS8_TGS | get_thread_specific_lpid(lpid));
#endif
asm volatile("isync; tlbwe" : : : "memory");
@@ -129,11 +130,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
if (tlbsel == 0) {
mas0 = get_host_mas0(stlbe->mas2);
- __write_host_tlbe(stlbe, mas0);
+ __write_host_tlbe(stlbe, mas0, vcpu_e500->vcpu.kvm->arch.lpid);
} else {
__write_host_tlbe(stlbe,
MAS0_TLBSEL(1) |
- MAS0_ESEL(to_htlb1_esel(sesel)));
+ MAS0_ESEL(to_htlb1_esel(sesel)),
+ vcpu_e500->vcpu.kvm->arch.lpid);
}
}
@@ -176,7 +178,7 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
magic.mas8 = 0;
- __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
+ __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index), 0);
preempt_enable();
}
#endif
@@ -317,10 +319,6 @@ static void kvmppc_e500_setup_stlbe(
stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
-
-#ifdef CONFIG_KVM_BOOKE_HV
- stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
-#endif
}
static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -633,7 +631,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
local_irq_save(flags);
mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space);
- mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid);
+ mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(vcpu));
asm volatile("tlbsx 0, %[geaddr]\n" : :
[geaddr] "r" (geaddr));
mtspr(SPRN_MAS5, 0);
@@ -732,7 +730,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
return 0;
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
/* XXX could be more clever ;) */
return 0;
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 164bad2a19bf..2fdc8722e324 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -48,10 +48,11 @@ void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
return;
}
-
- tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id;
+ preempt_disable();
+ tag = PPC_DBELL_LPID(get_lpid(vcpu)) | vcpu->vcpu_id;
mb();
ppc_msgsnd(dbell_type, 0, tag);
+ preempt_enable();
}
/* gtlbe must not be mapped by more than one host tlb entry */
@@ -60,12 +61,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
{
unsigned int tid, ts;
gva_t eaddr;
- u32 val, lpid;
+ u32 val;
unsigned long flags;
ts = get_tlb_ts(gtlbe);
tid = get_tlb_tid(gtlbe);
- lpid = vcpu_e500->vcpu.kvm->arch.lpid;
/* We search the host TLB to invalidate its shadow TLB entry */
val = (tid << 16) | ts;
@@ -74,7 +74,7 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
local_irq_save(flags);
mtspr(SPRN_MAS6, val);
- mtspr(SPRN_MAS5, MAS5_SGS | lpid);
+ mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
val = mfspr(SPRN_MAS1);
@@ -95,7 +95,7 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
unsigned long flags;
local_irq_save(flags);
- mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid);
+ mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
asm volatile("tlbilxlpid");
mtspr(SPRN_MAS5, 0);
local_irq_restore(flags);
@@ -110,6 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
{
}
+/* We use two lpids per VM */
static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid);
static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
@@ -118,10 +119,12 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
kvmppc_booke_vcpu_load(vcpu, cpu);
- mtspr(SPRN_LPID, vcpu->kvm->arch.lpid);
+ mtspr(SPRN_LPID, get_lpid(vcpu));
mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
mtspr(SPRN_GPIR, vcpu->vcpu_id);
mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
+ vcpu->arch.eplc = EPC_EGS | (get_lpid(vcpu) << EPC_ELPID_SHIFT);
+ vcpu->arch.epsc = vcpu->arch.eplc;
mtspr(SPRN_EPLC, vcpu->arch.eplc);
mtspr(SPRN_EPSC, vcpu->arch.epsc);
@@ -141,12 +144,10 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
mtspr(SPRN_GESR, vcpu->arch.shared->esr);
if (vcpu->arch.oldpir != mfspr(SPRN_PIR) ||
- __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) {
+ __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) {
kvmppc_e500_tlbil_all(vcpu_e500);
- __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu;
+ __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu;
}
-
- kvmppc_load_guest_fp(vcpu);
}
static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
@@ -179,6 +180,16 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
r = 0;
+#ifdef CONFIG_ALTIVEC
+ /*
+ * Since guests have the priviledge to enable AltiVec, we need AltiVec
+ * support in the host to save/restore their context.
+ * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit
+ * because it's cleared in the absence of CONFIG_ALTIVEC!
+ */
+ else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
+ r = 0;
+#endif
else
r = -ENOTSUPP;
@@ -194,9 +205,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
#ifdef CONFIG_64BIT
vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
#endif
- vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP;
- vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
- vcpu->arch.epsc = vcpu->arch.eplc;
+ vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP;
vcpu->arch.pvr = mfspr(SPRN_PVR);
vcpu_e500->svr = mfspr(SPRN_SVR);
@@ -356,13 +365,26 @@ static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
if (lpid < 0)
return lpid;
+ /*
+ * Use two lpids per VM on cores with two threads like e6500. Use
+ * even numbers to speedup vcpu lpid computation with consecutive lpids
+ * per VM. vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on.
+ */
+ if (threads_per_core == 2)
+ lpid <<= 1;
+
kvm->arch.lpid = lpid;
return 0;
}
static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
{
- kvmppc_free_lpid(kvm->arch.lpid);
+ int lpid = kvm->arch.lpid;
+
+ if (threads_per_core == 2)
+ lpid >>= 1;
+
+ kvmppc_free_lpid(lpid);
}
static struct kvmppc_ops kvm_ops_e500mc = {
@@ -390,7 +412,13 @@ static int __init kvmppc_e500mc_init(void)
if (r)
goto err_out;
- kvmppc_init_lpid(64);
+ /*
+ * Use two lpids per VM on dual threaded processors like e6500
+ * to workarround the lack of tlb write conditional instruction.
+ * Expose half the number of available hardware lpids to the lpid
+ * allocator.
+ */
+ kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core);
kvmppc_claim_lpid(0); /* host */
r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index e96b50d0bdab..5cc2e7af3a7b 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -219,7 +219,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* this default type might be overwritten by subcategories */
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
- emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
if (emulated != EMULATE_DONE)
return emulated;
@@ -274,6 +274,21 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
+ case 0:
+ /*
+ * Instruction with primary opcode 0. Based on PowerISA
+ * these are illegal instructions.
+ */
+ if (inst == KVMPPC_INST_SW_BREAKPOINT) {
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.address = kvmppc_get_pc(vcpu);
+ emulated = EMULATE_EXIT_USER;
+ advance = 0;
+ } else
+ emulated = EMULATE_FAIL;
+
+ break;
+
default:
emulated = EMULATE_FAIL;
}
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 0de4ffa175a9..6d3c0ee1d744 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -58,7 +58,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
/* this default type might be overwritten by subcategories */
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
- emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
if (emulated != EMULATE_DONE)
return emulated;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c79284b58be..c1f8f53cd312 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -294,7 +294,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
u32 last_inst;
- kvmppc_get_last_inst(vcpu, false, &last_inst);
+ kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
/* XXX Deliver Program interrupt to guest. */
pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst);
r = RESUME_HOST;
@@ -384,24 +384,16 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
}
EXPORT_SYMBOL_GPL(kvmppc_ld);
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
int kvm_arch_hardware_setup(void)
{
return 0;
}
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
void kvm_arch_check_processor_compat(void *rtn)
{
*(int *)rtn = kvmppc_core_check_processor_compat();
@@ -462,10 +454,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
module_put(kvm->arch.kvm_ops->owner);
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
@@ -608,10 +596,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
return kvmppc_core_create_memslot(kvm, slot, npages);
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
@@ -628,10 +612,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
kvmppc_core_commit_memory_region(kvm, mem, old);
}
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
@@ -658,7 +638,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
/* Make sure we're not using the vcpu anymore */
hrtimer_cancel(&vcpu->arch.dec_timer);
- tasklet_kill(&vcpu->arch.tasklet);
kvmppc_remove_vcpu_debugfs(vcpu);
@@ -684,16 +663,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvmppc_core_pending_dec(vcpu);
}
-/*
- * low level hrtimer wake routine. Because this runs in hardirq context
- * we schedule a tasklet to do the real work.
- */
enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
- tasklet_schedule(&vcpu->arch.tasklet);
+ kvmppc_decrementer_func(vcpu);
return HRTIMER_NORESTART;
}
@@ -703,7 +678,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
int ret;
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
vcpu->arch.dec_expires = ~(u64)0;
@@ -927,6 +901,103 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvmppc_handle_store);
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = 0;
+ union kvmppc_one_reg val;
+ int size;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ break;
+#endif /* CONFIG_ALTIVEC */
+ default:
+ r = -EINVAL;
+ break;
+ }
+ }
+
+ if (r)
+ return r;
+
+ if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+ r = -EFAULT;
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r;
+ union kvmppc_one_reg val;
+ int size;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+ return -EFAULT;
+
+ r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ break;
+#endif /* CONFIG_ALTIVEC */
+ default:
+ r = -EINVAL;
+ break;
+ }
+ }
+
+ return r;
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r;
@@ -1343,9 +1414,4 @@ int kvm_arch_init(void *opaque)
return 0;
}
-void kvm_arch_exit(void)
-{
-
-}
-
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e8bc40869cbd..7d9ee3d8c618 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -303,9 +303,13 @@ config PPC_ICSWX_USE_SIGILL
If in doubt, say N here.
+config SPE_POSSIBLE
+ def_bool y
+ depends on E200 || (E500 && !PPC_E500MC)
+
config SPE
bool "SPE Support"
- depends on E200 || (E500 && !PPC_E500MC)
+ depends on SPE_POSSIBLE
default y
---help---
This option enables kernel support for the Signal Processing
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index b44eec3e8dbd..4b005ae5dc4b 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -322,7 +322,7 @@ static void opal_handle_message(void)
/* check for errors. */
if (ret) {
- pr_warning("%s: Failed to retrive opal message, err=%lld\n",
+ pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
__func__, ret);
return;
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 17ee193960a0..34064f50945e 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -126,7 +126,7 @@ static int pseries_remove_mem_node(struct device_node *np)
return 0;
/*
- * Find the bae address and size of the memblock
+ * Find the base address and size of the memblock
*/
regs = of_get_property(np, "reg", NULL);
if (!regs)
@@ -200,7 +200,7 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
/* The first int of the property is the number of lmb's described
* by the property. This is followed by an array of of_drconf_cell
- * entries. Get the niumber of entries and skip to the array of
+ * entries. Get the number of entries and skip to the array of
* of_drconf_cell's.
*/
entries = be32_to_cpu(*p++);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 773bef7614d8..2175f911a73a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
#ifndef ASM_KVM_HOST_H
#define ASM_KVM_HOST_H
+
+#include <linux/types.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
+#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <asm/debug.h>
@@ -154,7 +157,9 @@ struct kvm_s390_sie_block {
__u8 armid; /* 0x00e3 */
__u8 reservede4[4]; /* 0x00e4 */
__u64 tecmc; /* 0x00e8 */
- __u8 reservedf0[16]; /* 0x00f0 */
+ __u8 reservedf0[12]; /* 0x00f0 */
+#define CRYCB_FORMAT1 0x00000001
+ __u32 crycbd; /* 0x00fc */
__u64 gcr[16]; /* 0x0100 */
__u64 gbea; /* 0x0180 */
__u8 reserved188[24]; /* 0x0188 */
@@ -187,6 +192,7 @@ struct kvm_vcpu_stat {
u32 exit_stop_request;
u32 exit_validity;
u32 exit_instruction;
+ u32 halt_wakeup;
u32 instruction_lctl;
u32 instruction_lctlg;
u32 instruction_stctl;
@@ -407,6 +413,15 @@ struct s390_io_adapter {
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
#define MAX_S390_ADAPTER_MAPS 256
+struct kvm_s390_crypto {
+ struct kvm_s390_crypto_cb *crycb;
+ __u32 crycbd;
+};
+
+struct kvm_s390_crypto_cb {
+ __u8 reserved00[128]; /* 0x0000 */
+};
+
struct kvm_arch{
struct sca_block *sca;
debug_info_t *dbf;
@@ -420,6 +435,7 @@ struct kvm_arch{
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
spinlock_t start_stop_lock;
+ struct kvm_s390_crypto crypto;
};
#define KVM_HVA_ERR_BAD (-1UL)
@@ -431,8 +447,6 @@ static inline bool kvm_is_error_hva(unsigned long addr)
}
#define ASYNC_PF_PER_VCPU 64
-struct kvm_vcpu;
-struct kvm_async_pf;
struct kvm_arch_async_pf {
unsigned long pfault_token;
};
@@ -450,4 +464,18 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
+
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_check_processor_compat(void *rtn) {}
+static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot) {}
+
#endif
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 9e18a61d3df3..d39a31c3cdf2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -18,9 +18,9 @@
unsigned long *crst_table_alloc(struct mm_struct *);
void crst_table_free(struct mm_struct *, unsigned long *);
-unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
+unsigned long *page_table_alloc(struct mm_struct *);
void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mmu_gather *, unsigned long *);
+void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
bool init_skey);
@@ -145,8 +145,8 @@ static inline void pmd_populate(struct mm_struct *mm,
/*
* page table entry allocation/free routines.
*/
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
+#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5efb2fe186e7..b7054356cc98 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -30,6 +30,7 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
+#include <linux/radix-tree.h>
#include <asm/bug.h>
#include <asm/page.h>
@@ -789,82 +790,67 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
/**
* struct gmap_struct - guest address space
+ * @crst_list: list of all crst tables used in the guest address space
* @mm: pointer to the parent mm_struct
+ * @guest_to_host: radix tree with guest to host address translation
+ * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_table_lock: spinlock to protect all entries in the guest page table
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
- * @crst_list: list of all crst tables used in the guest address space
* @pfault_enabled: defines if pfaults are applicable for the guest
*/
struct gmap {
struct list_head list;
+ struct list_head crst_list;
struct mm_struct *mm;
+ struct radix_tree_root guest_to_host;
+ struct radix_tree_root host_to_guest;
+ spinlock_t guest_table_lock;
unsigned long *table;
unsigned long asce;
+ unsigned long asce_end;
void *private;
- struct list_head crst_list;
bool pfault_enabled;
};
/**
- * struct gmap_rmap - reverse mapping for segment table entries
- * @gmap: pointer to the gmap_struct
- * @entry: pointer to a segment table entry
- * @vmaddr: virtual address in the guest address space
- */
-struct gmap_rmap {
- struct list_head list;
- struct gmap *gmap;
- unsigned long *entry;
- unsigned long vmaddr;
-};
-
-/**
- * struct gmap_pgtable - gmap information attached to a page table
- * @vmaddr: address of the 1MB segment in the process virtual memory
- * @mapper: list of segment table entries mapping a page table
- */
-struct gmap_pgtable {
- unsigned long vmaddr;
- struct list_head mapper;
-};
-
-/**
* struct gmap_notifier - notify function block for page invalidation
* @notifier_call: address of callback function
*/
struct gmap_notifier {
struct list_head list;
- void (*notifier_call)(struct gmap *gmap, unsigned long address);
+ void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
};
-struct gmap *gmap_alloc(struct mm_struct *mm);
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
void gmap_free(struct gmap *gmap);
void gmap_enable(struct gmap *gmap);
void gmap_disable(struct gmap *gmap);
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len);
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
-unsigned long __gmap_translate(unsigned long address, struct gmap *);
-unsigned long gmap_translate(unsigned long address, struct gmap *);
-unsigned long __gmap_fault(unsigned long address, struct gmap *);
-unsigned long gmap_fault(unsigned long address, struct gmap *);
-void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
-void __gmap_zap(unsigned long address, struct gmap *);
+unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
+void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+void __gmap_zap(struct gmap *, unsigned long gaddr);
bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
void gmap_register_ipte_notifier(struct gmap_notifier *);
void gmap_unregister_ipte_notifier(struct gmap_notifier *);
int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
-void gmap_do_ipte_notify(struct mm_struct *, pte_t *);
+void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+ unsigned long addr,
pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
if (pgste_val(pgste) & PGSTE_IN_BIT) {
pgste_val(pgste) &= ~PGSTE_IN_BIT;
- gmap_do_ipte_notify(mm, ptep);
+ gmap_do_ipte_notify(mm, addr, ptep);
}
#endif
return pgste;
@@ -1110,7 +1096,7 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
pgste_val(pgste) &= ~PGSTE_UC_BIT;
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
- pgste = pgste_ipte_notify(mm, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
__ptep_ipte(addr, ptep);
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
pte_val(pte) |= _PAGE_PROTECT;
@@ -1132,7 +1118,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
}
oldpte = pte = *ptep;
@@ -1179,7 +1165,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
@@ -1203,7 +1189,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste_ipte_notify(mm, ptep, pgste);
+ pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
@@ -1240,7 +1226,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
}
pte = *ptep;
@@ -1274,7 +1260,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
if (!full && mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
@@ -1299,7 +1285,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
if (pte_write(pte)) {
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
ptep_flush_lazy(mm, address, ptep);
@@ -1325,7 +1311,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
return 0;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
}
ptep_flush_direct(vma->vm_mm, address, ptep);
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index a25f09fbaf36..572c59949004 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -105,7 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long address)
{
- page_table_free_rcu(tlb, (unsigned long *) pte);
+ page_table_free_rcu(tlb, (unsigned long *) pte, address);
}
/*
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 0fc26430a1e5..48eda3ab4944 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -111,12 +111,22 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_GPRS (1UL << 1)
#define KVM_SYNC_ACRS (1UL << 2)
#define KVM_SYNC_CRS (1UL << 3)
+#define KVM_SYNC_ARCH0 (1UL << 4)
+#define KVM_SYNC_PFAULT (1UL << 5)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
__u64 gprs[16]; /* general purpose registers */
__u32 acrs[16]; /* access registers */
__u64 crs[16]; /* control registers */
+ __u64 todpr; /* tod programmable register [ARCH0] */
+ __u64 cputm; /* cpu timer [ARCH0] */
+ __u64 ckc; /* clock comparator [ARCH0] */
+ __u64 pp; /* program parameter [ARCH0] */
+ __u64 gbea; /* guest breaking-event address [ARCH0] */
+ __u64 pft; /* pfault token [PFAULT] */
+ __u64 pfs; /* pfault select [PFAULT] */
+ __u64 pfc; /* pfault compare [PFAULT] */
};
#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 59bd8f991b98..9254afff250c 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -28,22 +28,32 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
- if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end
+ if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
|| start < 2 * PAGE_SIZE)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
vcpu->stat.diagnose_10++;
- /* we checked for start > end above */
- if (end < prefix || start >= prefix + 2 * PAGE_SIZE) {
- gmap_discard(start, end, vcpu->arch.gmap);
+ /*
+ * We checked for start >= end above, so lets check for the
+ * fast path (no prefix swap page involved)
+ */
+ if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
+ gmap_discard(vcpu->arch.gmap, start, end);
} else {
- if (start < prefix)
- gmap_discard(start, prefix, vcpu->arch.gmap);
- if (end >= prefix)
- gmap_discard(prefix + 2 * PAGE_SIZE,
- end, vcpu->arch.gmap);
+ /*
+ * This is slow path. gmap_discard will check for start
+ * so lets split this into before prefix, prefix, after
+ * prefix and let gmap_discard make some of these calls
+ * NOPs.
+ */
+ gmap_discard(vcpu->arch.gmap, start, prefix);
+ if (start <= prefix)
+ gmap_discard(vcpu->arch.gmap, 0, 4096);
+ if (end > prefix + 4096)
+ gmap_discard(vcpu->arch.gmap, 4096, 8192);
+ gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
}
return 0;
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4653ac6e182b..0f961a1c64b3 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -254,8 +254,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
new = old = ACCESS_ONCE(*ic);
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- if (!ipte_lock_count)
- wake_up(&vcpu->kvm->arch.ipte_wq);
+ wake_up(&vcpu->kvm->arch.ipte_wq);
out:
mutex_unlock(&ipte_mutex);
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f4c819bfc193..a39838457f01 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -26,8 +26,9 @@
#define IOINT_SSID_MASK 0x00030000
#define IOINT_CSSID_MASK 0x03fc0000
#define IOINT_AI_MASK 0x04000000
+#define PFAULT_INIT 0x0600
-static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
static int is_ioint(u64 type)
{
@@ -76,7 +77,7 @@ static u64 int_word_to_isc_bits(u32 int_word)
return (0x80 >> isc) << 24;
}
-static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
+static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info *inti)
{
switch (inti->type) {
@@ -85,6 +86,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
return 0;
if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
return 1;
+ return 0;
case KVM_S390_INT_EMERGENCY:
if (psw_extint_disabled(vcpu))
return 0;
@@ -205,11 +207,30 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
}
}
-static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
- struct kvm_s390_pgm_info *pgm_info)
+static u16 get_ilc(struct kvm_vcpu *vcpu)
{
const unsigned short table[] = { 2, 4, 4, 6 };
+
+ switch (vcpu->arch.sie_block->icptcode) {
+ case ICPT_INST:
+ case ICPT_INSTPROGI:
+ case ICPT_OPEREXC:
+ case ICPT_PARTEXEC:
+ case ICPT_IOINST:
+ /* last instruction only stored for these icptcodes */
+ return table[vcpu->arch.sie_block->ipa >> 14];
+ case ICPT_PROGI:
+ return vcpu->arch.sie_block->pgmilc;
+ default:
+ return 0;
+ }
+}
+
+static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info)
+{
int rc = 0;
+ u16 ilc = get_ilc(vcpu);
switch (pgm_info->code & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
@@ -276,25 +297,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
(u8 *) __LC_PER_ACCESS_ID);
}
- switch (vcpu->arch.sie_block->icptcode) {
- case ICPT_INST:
- case ICPT_INSTPROGI:
- case ICPT_OPEREXC:
- case ICPT_PARTEXEC:
- case ICPT_IOINST:
- /* last instruction only stored for these icptcodes */
- rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
- (u16 *) __LC_PGM_ILC);
- break;
- case ICPT_PROGI:
- rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
- (u16 *) __LC_PGM_ILC);
- break;
- default:
- rc |= put_guest_lc(vcpu, 0,
- (u16 *) __LC_PGM_ILC);
- }
-
+ rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
rc |= put_guest_lc(vcpu, pgm_info->code,
(u16 *)__LC_PGM_INT_CODE);
rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
@@ -305,7 +308,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
return rc;
}
-static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
+static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info *inti)
{
const unsigned short table[] = { 2, 4, 4, 6 };
@@ -343,7 +346,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
case KVM_S390_INT_CLOCK_COMP:
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->ext.ext_params, 0);
- deliver_ckc_interrupt(vcpu);
+ rc = deliver_ckc_interrupt(vcpu);
break;
case KVM_S390_INT_CPU_TIMER:
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
@@ -376,8 +379,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
case KVM_S390_INT_PFAULT_INIT:
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
inti->ext.ext_params2);
- rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR);
+ rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+ (u16 *) __LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
@@ -501,14 +505,11 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
default:
BUG();
}
- if (rc) {
- printk("kvm: The guest lowcore is not mapped during interrupt "
- "delivery, killing userspace\n");
- do_exit(SIGKILL);
- }
+
+ return rc;
}
-static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
{
int rc;
@@ -518,11 +519,7 @@ static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
- if (rc) {
- printk("kvm: The guest lowcore is not mapped during interrupt "
- "delivery, killing userspace\n");
- do_exit(SIGKILL);
- }
+ return rc;
}
/* Check whether SIGP interpretation facility has an external call pending */
@@ -629,6 +626,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
*/
vcpu->preempted = true;
wake_up_interruptible(&vcpu->wq);
+ vcpu->stat.halt_wakeup++;
}
}
@@ -661,12 +659,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
&vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
}
-void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
struct kvm_s390_interrupt_info *n, *inti = NULL;
int deliver;
+ int rc = 0;
__reset_intercept_indicators(vcpu);
if (atomic_read(&li->active)) {
@@ -685,16 +684,16 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
atomic_set(&li->active, 0);
spin_unlock(&li->lock);
if (deliver) {
- __do_deliver_interrupt(vcpu, inti);
+ rc = __do_deliver_interrupt(vcpu, inti);
kfree(inti);
}
- } while (deliver);
+ } while (!rc && deliver);
}
- if (kvm_cpu_has_pending_timer(vcpu))
- deliver_ckc_interrupt(vcpu);
+ if (!rc && kvm_cpu_has_pending_timer(vcpu))
+ rc = deliver_ckc_interrupt(vcpu);
- if (atomic_read(&fi->active)) {
+ if (!rc && atomic_read(&fi->active)) {
do {
deliver = 0;
spin_lock(&fi->lock);
@@ -711,67 +710,13 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
atomic_set(&fi->active, 0);
spin_unlock(&fi->lock);
if (deliver) {
- __do_deliver_interrupt(vcpu, inti);
- kfree(inti);
- }
- } while (deliver);
- }
-}
-
-void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
-{
- struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
- struct kvm_s390_interrupt_info *n, *inti = NULL;
- int deliver;
-
- __reset_intercept_indicators(vcpu);
- if (atomic_read(&li->active)) {
- do {
- deliver = 0;
- spin_lock(&li->lock);
- list_for_each_entry_safe(inti, n, &li->list, list) {
- if ((inti->type == KVM_S390_MCHK) &&
- __interrupt_is_deliverable(vcpu, inti)) {
- list_del(&inti->list);
- deliver = 1;
- break;
- }
- __set_intercept_indicator(vcpu, inti);
- }
- if (list_empty(&li->list))
- atomic_set(&li->active, 0);
- spin_unlock(&li->lock);
- if (deliver) {
- __do_deliver_interrupt(vcpu, inti);
+ rc = __do_deliver_interrupt(vcpu, inti);
kfree(inti);
}
- } while (deliver);
+ } while (!rc && deliver);
}
- if (atomic_read(&fi->active)) {
- do {
- deliver = 0;
- spin_lock(&fi->lock);
- list_for_each_entry_safe(inti, n, &fi->list, list) {
- if ((inti->type == KVM_S390_MCHK) &&
- __interrupt_is_deliverable(vcpu, inti)) {
- list_del(&inti->list);
- fi->irq_count--;
- deliver = 1;
- break;
- }
- __set_intercept_indicator(vcpu, inti);
- }
- if (list_empty(&fi->list))
- atomic_set(&fi->active, 0);
- spin_unlock(&fi->lock);
- if (deliver) {
- __do_deliver_interrupt(vcpu, inti);
- kfree(inti);
- }
- } while (deliver);
- }
+ return rc;
}
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
@@ -1048,7 +993,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
s390int->parm64, 2);
- mutex_lock(&vcpu->kvm->lock);
li = &vcpu->arch.local_int;
spin_lock(&li->lock);
if (inti->type == KVM_S390_PROGRAM_INT)
@@ -1060,7 +1004,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
li->action_bits |= ACTION_STOP_ON_STOP;
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
spin_unlock(&li->lock);
- mutex_unlock(&vcpu->kvm->lock);
kvm_s390_vcpu_wakeup(vcpu);
return 0;
}
@@ -1300,7 +1243,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
}
INIT_LIST_HEAD(&map->list);
map->guest_addr = addr;
- map->addr = gmap_translate(addr, kvm->arch.gmap);
+ map->addr = gmap_translate(kvm->arch.gmap, addr);
if (map->addr == -EFAULT) {
ret = -EFAULT;
goto out;
@@ -1410,7 +1353,6 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
r = enqueue_floating_irq(dev, attr);
break;
case KVM_DEV_FLIC_CLEAR_IRQS:
- r = 0;
kvm_s390_clear_float_irqs(dev->kvm);
break;
case KVM_DEV_FLIC_APF_ENABLE:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 81b0e11521e4..55aade49b6d1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_instruction", VCPU_STAT(exit_instruction) },
{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+ { "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
@@ -100,16 +101,12 @@ int test_vfacility(unsigned long nr)
}
/* Section: not file related */
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
/* every s390 is virtualization enabled ;-) */
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
int kvm_arch_hardware_setup(void)
@@ -124,17 +121,10 @@ void kvm_arch_hardware_unsetup(void)
gmap_unregister_ipte_notifier(&gmap_notifier);
}
-void kvm_arch_check_processor_compat(void *rtn)
-{
-}
-
int kvm_arch_init(void *opaque)
{
- return 0;
-}
-
-void kvm_arch_exit(void)
-{
+ /* Register floating interrupt controller interface. */
+ return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
}
/* Section: device related */
@@ -404,6 +394,22 @@ long kvm_arch_vm_ioctl(struct file *filp,
return r;
}
+static int kvm_s390_crypto_init(struct kvm *kvm)
+{
+ if (!test_vfacility(76))
+ return 0;
+
+ kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
+ GFP_KERNEL | GFP_DMA);
+ if (!kvm->arch.crypto.crycb)
+ return -ENOMEM;
+
+ kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb |
+ CRYCB_FORMAT1;
+
+ return 0;
+}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int rc;
@@ -441,6 +447,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.dbf)
goto out_nodbf;
+ if (kvm_s390_crypto_init(kvm) < 0)
+ goto out_crypto;
+
spin_lock_init(&kvm->arch.float_int.lock);
INIT_LIST_HEAD(&kvm->arch.float_int.list);
init_waitqueue_head(&kvm->arch.ipte_wq);
@@ -451,7 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (type & KVM_VM_S390_UCONTROL) {
kvm->arch.gmap = NULL;
} else {
- kvm->arch.gmap = gmap_alloc(current->mm);
+ kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
if (!kvm->arch.gmap)
goto out_nogmap;
kvm->arch.gmap->private = kvm;
@@ -465,6 +474,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
out_nogmap:
+ kfree(kvm->arch.crypto.crycb);
+out_crypto:
debug_unregister(kvm->arch.dbf);
out_nodbf:
free_page((unsigned long)(kvm->arch.sca));
@@ -514,15 +525,12 @@ static void kvm_free_vcpus(struct kvm *kvm)
mutex_unlock(&kvm->lock);
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
free_page((unsigned long)(kvm->arch.sca));
debug_unregister(kvm->arch.dbf);
+ kfree(kvm->arch.crypto.crycb);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
@@ -535,7 +543,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
if (kvm_is_ucontrol(vcpu->kvm)) {
- vcpu->arch.gmap = gmap_alloc(current->mm);
+ vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
if (!vcpu->arch.gmap)
return -ENOMEM;
vcpu->arch.gmap->private = vcpu->kvm;
@@ -546,15 +554,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
KVM_SYNC_GPRS |
KVM_SYNC_ACRS |
- KVM_SYNC_CRS;
+ KVM_SYNC_CRS |
+ KVM_SYNC_ARCH0 |
+ KVM_SYNC_PFAULT;
return 0;
}
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
- /* Nothing todo */
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
@@ -607,6 +612,14 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
return 0;
}
+static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
+{
+ if (!test_vfacility(76))
+ return;
+
+ vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
+}
+
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
{
free_page(vcpu->arch.sie_block->cbrlo);
@@ -653,6 +666,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(&vcpu->arch.cpu_id);
vcpu->arch.cpu_id.version = 0xff;
+
+ kvm_s390_vcpu_crypto_setup(vcpu);
+
return rc;
}
@@ -1049,6 +1065,11 @@ retry:
goto retry;
}
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+ goto retry;
+ }
+
if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
if (!ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
@@ -1085,18 +1106,8 @@ retry:
*/
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
{
- struct mm_struct *mm = current->mm;
- hva_t hva;
- long rc;
-
- hva = gmap_fault(gpa, vcpu->arch.gmap);
- if (IS_ERR_VALUE(hva))
- return (long)hva;
- down_read(&mm->mmap_sem);
- rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
- up_read(&mm->mmap_sem);
-
- return rc < 0 ? rc : 0;
+ return gmap_fault(vcpu->arch.gmap, gpa,
+ writable ? FAULT_FLAG_WRITE : 0);
}
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
@@ -1191,8 +1202,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
if (test_cpu_flag(CIF_MCCK_PENDING))
s390_handle_mcck();
- if (!kvm_is_ucontrol(vcpu->kvm))
- kvm_s390_deliver_pending_interrupts(vcpu);
+ if (!kvm_is_ucontrol(vcpu->kvm)) {
+ rc = kvm_s390_deliver_pending_interrupts(vcpu);
+ if (rc)
+ return rc;
+ }
rc = kvm_s390_handle_requests(vcpu);
if (rc)
@@ -1296,6 +1310,48 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
}
+static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
+ vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+ kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+ memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+ /* some control register changes require a tlb flush */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+ vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
+ vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+ vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
+ vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
+ vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
+ vcpu->arch.pfault_token = kvm_run->s.regs.pft;
+ vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
+ vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
+ }
+ kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
+ kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
+ kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
+ memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+ kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
+ kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
+ kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+ kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+ kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+ kvm_run->s.regs.pft = vcpu->arch.pfault_token;
+ kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
+ kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int rc;
@@ -1317,17 +1373,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return -EINVAL;
}
- vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
- vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
- kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
- kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
- }
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
- kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
- memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
- kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
- }
+ sync_regs(vcpu, kvm_run);
might_fault();
rc = __vcpu_run(vcpu);
@@ -1357,10 +1403,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = 0;
}
- kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
- kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
- kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
- memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+ store_regs(vcpu, kvm_run);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1489,7 +1532,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
*/
- vcpu->arch.sie_block->ihcpu = 0xffff;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
return;
}
@@ -1644,9 +1687,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- r = gmap_fault(arg, vcpu->arch.gmap);
- if (!IS_ERR_VALUE(r))
- r = 0;
+ r = gmap_fault(vcpu->arch.gmap, arg, 0);
break;
}
case KVM_ENABLE_CAP:
@@ -1677,21 +1718,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
-}
-
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages)
{
return 0;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
@@ -1737,15 +1769,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
return;
}
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
-}
-
static int __init kvm_s390_init(void)
{
int ret;
@@ -1764,7 +1787,7 @@ static int __init kvm_s390_init(void)
return -ENOMEM;
}
memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
- vfacilities[0] &= 0xff82fff3f4fc2000UL;
+ vfacilities[0] &= 0xff82fffbf47c2000UL;
vfacilities[1] &= 0x005c000000000000UL;
return 0;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3862fa2cefe0..244d02303182 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -70,7 +70,7 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
{
vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
- vcpu->arch.sie_block->ihcpu = 0xffff;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
}
@@ -138,8 +138,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
-void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
-void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
void kvm_s390_clear_float_irqs(struct kvm *kvm);
int __must_check kvm_s390_inject_vm(struct kvm *kvm,
@@ -228,6 +227,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int psw_extint_disabled(struct kvm_vcpu *vcpu);
void kvm_s390_destroy_adapters(struct kvm *kvm);
int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
+extern struct kvm_device_ops kvm_flic_ops;
/* implemented in guestdbg.c */
void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f89c1cd67751..72bb2dd8b9cd 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -352,13 +352,6 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
return 0;
}
-static void handle_new_psw(struct kvm_vcpu *vcpu)
-{
- /* Check whether the new psw is enabled for machine checks. */
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK)
- kvm_s390_deliver_pending_machine_checks(vcpu);
-}
-
#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
#define PSW_ADDR_24 0x0000000000ffffffUL
@@ -405,7 +398,6 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
if (!is_valid_psw(gpsw))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- handle_new_psw(vcpu);
return 0;
}
@@ -427,7 +419,6 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gpsw = new_psw;
if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- handle_new_psw(vcpu);
return 0;
}
@@ -738,7 +729,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* invalid entry */
break;
/* try to free backing */
- __gmap_zap(cbrle, gmap);
+ __gmap_zap(gmap, cbrle);
}
up_read(&gmap->mm->mmap_sem);
if (i < entries)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3f3b35403d0a..a2b81d6ce8a5 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -442,18 +442,15 @@ static inline int do_exception(struct pt_regs *regs, int access)
down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE
- gmap = (struct gmap *)
- ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
+ gmap = (current->flags & PF_VCPU) ?
+ (struct gmap *) S390_lowcore.gmap : NULL;
if (gmap) {
- address = __gmap_fault(address, gmap);
+ current->thread.gmap_addr = address;
+ address = __gmap_translate(gmap, address);
if (address == -EFAULT) {
fault = VM_FAULT_BADMAP;
goto out_up;
}
- if (address == -ENOMEM) {
- fault = VM_FAULT_OOM;
- goto out_up;
- }
if (gmap->pfault_enabled)
flags |= FAULT_FLAG_RETRY_NOWAIT;
}
@@ -530,6 +527,20 @@ retry:
goto retry;
}
}
+#ifdef CONFIG_PGSTE
+ if (gmap) {
+ address = __gmap_link(gmap, current->thread.gmap_addr,
+ address);
+ if (address == -EFAULT) {
+ fault = VM_FAULT_BADMAP;
+ goto out_up;
+ }
+ if (address == -ENOMEM) {
+ fault = VM_FAULT_OOM;
+ goto out_up;
+ }
+ }
+#endif
fault = 0;
out_up:
up_read(&mm->mmap_sem);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5404a6261db9..296b61a4af59 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -145,30 +145,56 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
/**
* gmap_alloc - allocate a guest address space
* @mm: pointer to the parent mm_struct
+ * @limit: maximum size of the gmap address space
*
* Returns a guest address space structure.
*/
-struct gmap *gmap_alloc(struct mm_struct *mm)
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
{
struct gmap *gmap;
struct page *page;
unsigned long *table;
-
+ unsigned long etype, atype;
+
+ if (limit < (1UL << 31)) {
+ limit = (1UL << 31) - 1;
+ atype = _ASCE_TYPE_SEGMENT;
+ etype = _SEGMENT_ENTRY_EMPTY;
+ } else if (limit < (1UL << 42)) {
+ limit = (1UL << 42) - 1;
+ atype = _ASCE_TYPE_REGION3;
+ etype = _REGION3_ENTRY_EMPTY;
+ } else if (limit < (1UL << 53)) {
+ limit = (1UL << 53) - 1;
+ atype = _ASCE_TYPE_REGION2;
+ etype = _REGION2_ENTRY_EMPTY;
+ } else {
+ limit = -1UL;
+ atype = _ASCE_TYPE_REGION1;
+ etype = _REGION1_ENTRY_EMPTY;
+ }
gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
if (!gmap)
goto out;
INIT_LIST_HEAD(&gmap->crst_list);
+ INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
+ INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+ spin_lock_init(&gmap->guest_table_lock);
gmap->mm = mm;
page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
if (!page)
goto out_free;
+ page->index = 0;
list_add(&page->lru, &gmap->crst_list);
table = (unsigned long *) page_to_phys(page);
- crst_table_init(table, _REGION1_ENTRY_EMPTY);
+ crst_table_init(table, etype);
gmap->table = table;
- gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | __pa(table);
+ gmap->asce = atype | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | __pa(table);
+ gmap->asce_end = limit;
+ down_write(&mm->mmap_sem);
list_add(&gmap->list, &mm->context.gmap_list);
+ up_write(&mm->mmap_sem);
return gmap;
out_free:
@@ -178,36 +204,38 @@ out:
}
EXPORT_SYMBOL_GPL(gmap_alloc);
-static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
-{
- struct gmap_pgtable *mp;
- struct gmap_rmap *rmap;
- struct page *page;
-
- if (*table & _SEGMENT_ENTRY_INVALID)
- return 0;
- page = pfn_to_page(*table >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- list_for_each_entry(rmap, &mp->mapper, list) {
- if (rmap->entry != table)
- continue;
- list_del(&rmap->list);
- kfree(rmap);
- break;
- }
- *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
- return 1;
-}
-
static void gmap_flush_tlb(struct gmap *gmap)
{
if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
- _ASCE_TYPE_REGION1);
+ __tlb_flush_asce(gmap->mm, gmap->asce);
else
__tlb_flush_global();
}
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+ struct radix_tree_iter iter;
+ unsigned long indices[16];
+ unsigned long index;
+ void **slot;
+ int i, nr;
+
+ /* A radix tree is freed by deleting all of its entries */
+ index = 0;
+ do {
+ nr = 0;
+ radix_tree_for_each_slot(slot, root, &iter, index) {
+ indices[nr] = iter.index;
+ if (++nr == 16)
+ break;
+ }
+ for (i = 0; i < nr; i++) {
+ index = indices[i];
+ radix_tree_delete(root, index);
+ }
+ } while (nr > 0);
+}
+
/**
* gmap_free - free a guest address space
* @gmap: pointer to the guest address space structure
@@ -215,31 +243,21 @@ static void gmap_flush_tlb(struct gmap *gmap)
void gmap_free(struct gmap *gmap)
{
struct page *page, *next;
- unsigned long *table;
- int i;
-
/* Flush tlb. */
if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
- _ASCE_TYPE_REGION1);
+ __tlb_flush_asce(gmap->mm, gmap->asce);
else
__tlb_flush_global();
/* Free all segment & region tables. */
- down_read(&gmap->mm->mmap_sem);
- spin_lock(&gmap->mm->page_table_lock);
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
- table = (unsigned long *) page_to_phys(page);
- if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
- /* Remove gmap rmap structures for segment table. */
- for (i = 0; i < PTRS_PER_PMD; i++, table++)
- gmap_unlink_segment(gmap, table);
+ list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
__free_pages(page, ALLOC_ORDER);
- }
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ gmap_radix_tree_free(&gmap->guest_to_host);
+ gmap_radix_tree_free(&gmap->host_to_guest);
+ down_write(&gmap->mm->mmap_sem);
list_del(&gmap->list);
+ up_write(&gmap->mm->mmap_sem);
kfree(gmap);
}
EXPORT_SYMBOL_GPL(gmap_free);
@@ -267,42 +285,97 @@ EXPORT_SYMBOL_GPL(gmap_disable);
/*
* gmap_alloc_table is assumed to be called with mmap_sem held
*/
-static int gmap_alloc_table(struct gmap *gmap,
- unsigned long *table, unsigned long init)
- __releases(&gmap->mm->page_table_lock)
- __acquires(&gmap->mm->page_table_lock)
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+ unsigned long init, unsigned long gaddr)
{
struct page *page;
unsigned long *new;
/* since we dont free the gmap table until gmap_free we can unlock */
- spin_unlock(&gmap->mm->page_table_lock);
page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
- spin_lock(&gmap->mm->page_table_lock);
if (!page)
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
crst_table_init(new, init);
+ spin_lock(&gmap->mm->page_table_lock);
if (*table & _REGION_ENTRY_INVALID) {
list_add(&page->lru, &gmap->crst_list);
*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
- } else
+ page->index = gaddr;
+ page = NULL;
+ }
+ spin_unlock(&gmap->mm->page_table_lock);
+ if (page)
__free_pages(page, ALLOC_ORDER);
return 0;
}
/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+ struct page *page;
+ unsigned long offset;
+
+ offset = (unsigned long) entry / sizeof(unsigned long);
+ offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+ page = pmd_to_page((pmd_t *) entry);
+ return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+ unsigned long *entry;
+ int flush = 0;
+
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+ if (entry) {
+ flush = (*entry != _SEGMENT_ENTRY_INVALID);
+ *entry = _SEGMENT_ENTRY_INVALID;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
+/**
* gmap_unmap_segment - unmap segment from the guest address space
* @gmap: pointer to the guest address space structure
- * @addr: address in the guest address space
+ * @to: address in the guest address space
* @len: length of the memory area to unmap
*
* Returns 0 if the unmap succeeded, -EINVAL if not.
*/
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
{
- unsigned long *table;
unsigned long off;
int flush;
@@ -312,31 +385,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
return -EINVAL;
flush = 0;
- down_read(&gmap->mm->mmap_sem);
- spin_lock(&gmap->mm->page_table_lock);
- for (off = 0; off < len; off += PMD_SIZE) {
- /* Walk the guest addr space page table */
- table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if (*table & _REGION_ENTRY_INVALID)
- goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 42) & 0x7ff);
- if (*table & _REGION_ENTRY_INVALID)
- goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 31) & 0x7ff);
- if (*table & _REGION_ENTRY_INVALID)
- goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 20) & 0x7ff);
-
- /* Clear segment table entry in guest address space. */
- flush |= gmap_unlink_segment(gmap, table);
- *table = _SEGMENT_ENTRY_INVALID;
- }
-out:
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE)
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ up_write(&gmap->mm->mmap_sem);
if (flush)
gmap_flush_tlb(gmap);
return 0;
@@ -348,87 +400,47 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
* @gmap: pointer to the guest address space structure
* @from: source address in the parent address space
* @to: target address in the guest address space
+ * @len: length of the memory area to map
*
* Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
*/
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len)
{
- unsigned long *table;
unsigned long off;
int flush;
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
- if (len == 0 || from + len > TASK_MAX_SIZE ||
- from + len < from || to + len < to)
+ if (len == 0 || from + len < from || to + len < to ||
+ from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
return -EINVAL;
flush = 0;
- down_read(&gmap->mm->mmap_sem);
- spin_lock(&gmap->mm->page_table_lock);
+ down_write(&gmap->mm->mmap_sem);
for (off = 0; off < len; off += PMD_SIZE) {
- /* Walk the gmap address space page table */
- table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
- goto out_unmap;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 42) & 0x7ff);
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
- goto out_unmap;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 31) & 0x7ff);
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
- goto out_unmap;
- table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 20) & 0x7ff);
-
- /* Store 'from' address in an invalid segment table entry. */
- flush |= gmap_unlink_segment(gmap, table);
- *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
- _SEGMENT_ENTRY_PROTECT);
+ /* Remove old translation */
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ /* Store new translation */
+ if (radix_tree_insert(&gmap->guest_to_host,
+ (to + off) >> PMD_SHIFT,
+ (void *) from + off))
+ break;
}
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ up_write(&gmap->mm->mmap_sem);
if (flush)
gmap_flush_tlb(gmap);
- return 0;
-
-out_unmap:
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ if (off >= len)
+ return 0;
gmap_unmap_segment(gmap, to, len);
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(gmap_map_segment);
-static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
-{
- unsigned long *table;
-
- table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return ERR_PTR(-EFAULT);
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return ERR_PTR(-EFAULT);
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return ERR_PTR(-EFAULT);
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 20) & 0x7ff);
- return table;
-}
-
/**
* __gmap_translate - translate a guest address to a user space address
- * @address: guest address
* @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
*
* Returns user space address which corresponds to the guest address or
* -EFAULT if no such mapping exists.
@@ -436,168 +448,161 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
* The mmap_sem of the mm that belongs to the address space must be held
* when this function gets called.
*/
-unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
{
- unsigned long *segment_ptr, vmaddr, segment;
- struct gmap_pgtable *mp;
- struct page *page;
+ unsigned long vmaddr;
- current->thread.gmap_addr = address;
- segment_ptr = gmap_table_walk(address, gmap);
- if (IS_ERR(segment_ptr))
- return PTR_ERR(segment_ptr);
- /* Convert the gmap address to an mm address. */
- segment = *segment_ptr;
- if (!(segment & _SEGMENT_ENTRY_INVALID)) {
- page = pfn_to_page(segment >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- return mp->vmaddr | (address & ~PMD_MASK);
- } else if (segment & _SEGMENT_ENTRY_PROTECT) {
- vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
- return vmaddr | (address & ~PMD_MASK);
- }
- return -EFAULT;
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+ return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
}
EXPORT_SYMBOL_GPL(__gmap_translate);
/**
* gmap_translate - translate a guest address to a user space address
- * @address: guest address
* @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
*
* Returns user space address which corresponds to the guest address or
* -EFAULT if no such mapping exists.
* This function does not establish potentially missing page table entries.
*/
-unsigned long gmap_translate(unsigned long address, struct gmap *gmap)
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
{
unsigned long rc;
down_read(&gmap->mm->mmap_sem);
- rc = __gmap_translate(address, gmap);
+ rc = __gmap_translate(gmap, gaddr);
up_read(&gmap->mm->mmap_sem);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_translate);
-static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
- unsigned long *segment_ptr, struct gmap *gmap)
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct gmap *gmap;
+ int flush;
+
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ }
+}
+
+/**
+ * gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
{
- unsigned long vmaddr;
- struct vm_area_struct *vma;
- struct gmap_pgtable *mp;
- struct gmap_rmap *rmap;
struct mm_struct *mm;
- struct page *page;
+ unsigned long *table;
+ spinlock_t *ptl;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
+ int rc;
- mm = gmap->mm;
- vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
- vma = find_vma(mm, vmaddr);
- if (!vma || vma->vm_start > vmaddr)
- return -EFAULT;
+ /* Create higher level tables in the gmap page table */
+ table = gmap->table;
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+ table += (gaddr >> 53) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+ gaddr & 0xffe0000000000000))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+ table += (gaddr >> 42) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+ gaddr & 0xfffffc0000000000))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+ table += (gaddr >> 31) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+ gaddr & 0xffffffff80000000))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ table += (gaddr >> 20) & 0x7ff;
/* Walk the parent mm page table */
+ mm = gmap->mm;
pgd = pgd_offset(mm, vmaddr);
- pud = pud_alloc(mm, pgd, vmaddr);
- if (!pud)
- return -ENOMEM;
- pmd = pmd_alloc(mm, pud, vmaddr);
- if (!pmd)
- return -ENOMEM;
- if (!pmd_present(*pmd) &&
- __pte_alloc(mm, vma, pmd, vmaddr))
- return -ENOMEM;
+ VM_BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd, vmaddr);
+ VM_BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, vmaddr);
+ VM_BUG_ON(pmd_none(*pmd));
/* large pmds cannot yet be handled */
if (pmd_large(*pmd))
return -EFAULT;
- /* pmd now points to a valid segment table entry. */
- rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
- if (!rmap)
- return -ENOMEM;
/* Link gmap segment table entry location to page table. */
- page = pmd_page(*pmd);
- mp = (struct gmap_pgtable *) page->index;
- rmap->gmap = gmap;
- rmap->entry = segment_ptr;
- rmap->vmaddr = address & PMD_MASK;
- spin_lock(&mm->page_table_lock);
- if (*segment_ptr == segment) {
- list_add(&rmap->list, &mp->mapper);
- /* Set gmap segment table entry to page table. */
- *segment_ptr = pmd_val(*pmd) & PAGE_MASK;
- rmap = NULL;
- }
- spin_unlock(&mm->page_table_lock);
- kfree(rmap);
- return 0;
-}
-
-static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
-{
- struct gmap_rmap *rmap, *next;
- struct gmap_pgtable *mp;
- struct page *page;
- int flush;
-
- flush = 0;
- spin_lock(&mm->page_table_lock);
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
- *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
- _SEGMENT_ENTRY_PROTECT);
- list_del(&rmap->list);
- kfree(rmap);
- flush = 1;
- }
- spin_unlock(&mm->page_table_lock);
- if (flush)
- __tlb_flush_global();
+ rc = radix_tree_preload(GFP_KERNEL);
+ if (rc)
+ return rc;
+ ptl = pmd_lock(mm, pmd);
+ spin_lock(&gmap->guest_table_lock);
+ if (*table == _SEGMENT_ENTRY_INVALID) {
+ rc = radix_tree_insert(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT, table);
+ if (!rc)
+ *table = pmd_val(*pmd);
+ } else
+ rc = 0;
+ spin_unlock(&gmap->guest_table_lock);
+ spin_unlock(ptl);
+ radix_tree_preload_end();
+ return rc;
}
-/*
- * this function is assumed to be called with mmap_sem held
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
*/
-unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+ unsigned int fault_flags)
{
- unsigned long *segment_ptr, segment;
- struct gmap_pgtable *mp;
- struct page *page;
+ unsigned long vmaddr;
int rc;
- current->thread.gmap_addr = address;
- segment_ptr = gmap_table_walk(address, gmap);
- if (IS_ERR(segment_ptr))
- return -EFAULT;
- /* Convert the gmap address to an mm address. */
- while (1) {
- segment = *segment_ptr;
- if (!(segment & _SEGMENT_ENTRY_INVALID)) {
- /* Page table is present */
- page = pfn_to_page(segment >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- return mp->vmaddr | (address & ~PMD_MASK);
- }
- if (!(segment & _SEGMENT_ENTRY_PROTECT))
- /* Nothing mapped in the gmap address space. */
- break;
- rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
- if (rc)
- return rc;
- }
- return -EFAULT;
-}
-
-unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
-{
- unsigned long rc;
-
down_read(&gmap->mm->mmap_sem);
- rc = __gmap_fault(address, gmap);
+ vmaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(vmaddr)) {
+ rc = vmaddr;
+ goto out_up;
+ }
+ if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
+ rc = -EFAULT;
+ goto out_up;
+ }
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
up_read(&gmap->mm->mmap_sem);
-
return rc;
}
EXPORT_SYMBOL_GPL(gmap_fault);
@@ -617,17 +622,24 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
free_swap_and_cache(entry);
}
-/**
- * The mm->mmap_sem lock must be held
+/*
+ * this function is assumed to be called with mmap_sem held
*/
-static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
{
- unsigned long ptev, pgstev;
+ unsigned long vmaddr, ptev, pgstev;
+ pte_t *ptep, pte;
spinlock_t *ptl;
pgste_t pgste;
- pte_t *ptep, pte;
- ptep = get_locked_pte(mm, address, &ptl);
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (!vmaddr)
+ return;
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
if (unlikely(!ptep))
return;
pte = *ptep;
@@ -639,87 +651,34 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
ptev = pte_val(pte);
if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
- gmap_zap_swap_entry(pte_to_swp_entry(pte), mm);
- pte_clear(mm, address, ptep);
+ gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
+ pte_clear(gmap->mm, vmaddr, ptep);
}
pgste_set_unlock(ptep, pgste);
out_pte:
pte_unmap_unlock(*ptep, ptl);
}
-
-/*
- * this function is assumed to be called with mmap_sem held
- */
-void __gmap_zap(unsigned long address, struct gmap *gmap)
-{
- unsigned long *table, *segment_ptr;
- unsigned long segment, pgstev, ptev;
- struct gmap_pgtable *mp;
- struct page *page;
-
- segment_ptr = gmap_table_walk(address, gmap);
- if (IS_ERR(segment_ptr))
- return;
- segment = *segment_ptr;
- if (segment & _SEGMENT_ENTRY_INVALID)
- return;
- page = pfn_to_page(segment >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- address = mp->vmaddr | (address & ~PMD_MASK);
- /* Page table is present */
- table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
- table = table + ((address >> 12) & 0xff);
- pgstev = table[PTRS_PER_PTE];
- ptev = table[0];
- /* quick check, checked again with locks held */
- if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
- ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
- gmap_zap_unused(gmap->mm, address);
-}
EXPORT_SYMBOL_GPL(__gmap_zap);
-void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
{
-
- unsigned long *table, address, size;
+ unsigned long gaddr, vmaddr, size;
struct vm_area_struct *vma;
- struct gmap_pgtable *mp;
- struct page *page;
down_read(&gmap->mm->mmap_sem);
- address = from;
- while (address < to) {
- /* Walk the gmap address space page table */
- table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
+ for (gaddr = from; gaddr < to;
+ gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (!vmaddr)
continue;
- }
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- page = pfn_to_page(*table >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- vma = find_vma(gmap->mm, mp->vmaddr);
- size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
- zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
- size, NULL);
- address = (address + PMD_SIZE) & PMD_MASK;
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Find vma in the parent mm */
+ vma = find_vma(gmap->mm, vmaddr);
+ size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+ zap_page_range(vma, vmaddr, size, NULL);
}
up_read(&gmap->mm->mmap_sem);
}
@@ -755,7 +714,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
/**
* gmap_ipte_notify - mark a range of ptes for invalidation notification
* @gmap: pointer to guest mapping meta data structure
- * @start: virtual address in the guest address space
+ * @gaddr: virtual address in the guest address space
* @len: size of area
*
* Returns 0 if for each page in the given range a gmap mapping exists and
@@ -763,7 +722,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
* for one or more pages -EFAULT is returned. If no memory could be allocated
* -ENOMEM is returned. This function establishes missing page table entries.
*/
-int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
+int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
{
unsigned long addr;
spinlock_t *ptl;
@@ -771,12 +730,12 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
pgste_t pgste;
int rc = 0;
- if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK))
+ if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
return -EINVAL;
down_read(&gmap->mm->mmap_sem);
while (len) {
/* Convert gmap address and connect the page tables */
- addr = __gmap_fault(start, gmap);
+ addr = __gmap_translate(gmap, gaddr);
if (IS_ERR_VALUE(addr)) {
rc = addr;
break;
@@ -786,6 +745,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
rc = -EFAULT;
break;
}
+ rc = __gmap_link(gmap, gaddr, addr);
+ if (rc)
+ break;
/* Walk the process page table, lock and get pte pointer */
ptep = get_locked_pte(gmap->mm, addr, &ptl);
if (unlikely(!ptep))
@@ -796,7 +758,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
pgste = pgste_get_lock(ptep);
pgste_val(pgste) |= PGSTE_IN_BIT;
pgste_set_unlock(ptep, pgste);
- start += PAGE_SIZE;
+ gaddr += PAGE_SIZE;
len -= PAGE_SIZE;
}
spin_unlock(ptl);
@@ -809,28 +771,30 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify);
/**
* gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
* @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
* @pte: pointer to the page table entry
*
* This function is assumed to be called with the page table lock held
* for the pte to notify.
*/
-void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
+void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
{
- unsigned long segment_offset;
+ unsigned long offset, gaddr;
+ unsigned long *table;
struct gmap_notifier *nb;
- struct gmap_pgtable *mp;
- struct gmap_rmap *rmap;
- struct page *page;
+ struct gmap *gmap;
- segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
- segment_offset = segment_offset * (4096 / sizeof(pte_t));
- page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
+ offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+ offset = offset * (4096 / sizeof(pte_t));
spin_lock(&gmap_notifier_lock);
- list_for_each_entry(rmap, &mp->mapper, list) {
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ table = radix_tree_lookup(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (!table)
+ continue;
+ gaddr = __gmap_segment_gaddr(table) + offset;
list_for_each_entry(nb, &gmap_notifier_list, list)
- nb->notifier_call(rmap->gmap,
- rmap->vmaddr + segment_offset);
+ nb->notifier_call(gmap, gaddr);
}
spin_unlock(&gmap_notifier_lock);
}
@@ -841,29 +805,18 @@ static inline int page_table_with_pgste(struct page *page)
return atomic_read(&page->_mapcount) == 0;
}
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
- unsigned long vmaddr)
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
{
struct page *page;
unsigned long *table;
- struct gmap_pgtable *mp;
page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (!page)
return NULL;
- mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
- if (!mp) {
- __free_page(page);
- return NULL;
- }
if (!pgtable_page_ctor(page)) {
- kfree(mp);
__free_page(page);
return NULL;
}
- mp->vmaddr = vmaddr & PMD_MASK;
- INIT_LIST_HEAD(&mp->mapper);
- page->index = (unsigned long) mp;
atomic_set(&page->_mapcount, 0);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
@@ -874,14 +827,10 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
static inline void page_table_free_pgste(unsigned long *table)
{
struct page *page;
- struct gmap_pgtable *mp;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- BUG_ON(!list_empty(&mp->mapper));
pgtable_page_dtor(page);
atomic_set(&page->_mapcount, -1);
- kfree(mp);
__free_page(page);
}
@@ -994,13 +943,13 @@ retry:
}
if (!(pte_val(*ptep) & _PAGE_INVALID) &&
(pte_val(*ptep) & _PAGE_PROTECT)) {
- pte_unmap_unlock(*ptep, ptl);
- if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
- up_read(&mm->mmap_sem);
- return -EFAULT;
- }
- goto retry;
+ pte_unmap_unlock(*ptep, ptl);
+ if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
+ up_read(&mm->mmap_sem);
+ return -EFAULT;
}
+ goto retry;
+ }
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
@@ -1038,8 +987,7 @@ static inline int page_table_with_pgste(struct page *page)
return 0;
}
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
- unsigned long vmaddr)
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
{
return NULL;
}
@@ -1053,8 +1001,8 @@ static inline void page_table_free_pgste(unsigned long *table)
{
}
-static inline void gmap_disconnect_pgtable(struct mm_struct *mm,
- unsigned long *table)
+static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+ unsigned long vmaddr)
{
}
@@ -1074,14 +1022,14 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
/*
* page table entry allocation/free routines.
*/
-unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
+unsigned long *page_table_alloc(struct mm_struct *mm)
{
unsigned long *uninitialized_var(table);
struct page *uninitialized_var(page);
unsigned int mask, bit;
if (mm_has_pgste(mm))
- return page_table_alloc_pgste(mm, vmaddr);
+ return page_table_alloc_pgste(mm);
/* Allocate fragments of a 4K page as 1K/2K page table */
spin_lock_bh(&mm->context.list_lock);
mask = FRAG_MASK;
@@ -1123,10 +1071,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
unsigned int bit, mask;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page)) {
- gmap_disconnect_pgtable(mm, table);
+ if (page_table_with_pgste(page))
return page_table_free_pgste(table);
- }
/* Free 1K/2K page table fragment of a 4K page */
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
spin_lock_bh(&mm->context.list_lock);
@@ -1158,7 +1104,8 @@ static void __page_table_free_rcu(void *table, unsigned bit)
}
}
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+ unsigned long vmaddr)
{
struct mm_struct *mm;
struct page *page;
@@ -1167,7 +1114,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
mm = tlb->mm;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
if (page_table_with_pgste(page)) {
- gmap_disconnect_pgtable(mm, table);
+ gmap_unlink(mm, table, vmaddr);
table = (unsigned long *) (__pa(table) | FRAG_MASK);
tlb_remove_table(tlb, table);
return;
@@ -1303,7 +1250,7 @@ again:
if (page_table_with_pgste(page))
continue;
/* Allocate new page table with pgstes */
- new = page_table_alloc_pgste(mm, addr);
+ new = page_table_alloc_pgste(mm);
if (!new)
return -ENOMEM;
@@ -1318,7 +1265,7 @@ again:
/* Establish new table */
pmd_populate(mm, pmd, (pte_t *) new);
/* Free old table with rcu, there might be a walker! */
- page_table_free_rcu(tlb, table);
+ page_table_free_rcu(tlb, table, addr);
new = NULL;
}
spin_unlock(ptl);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index fe9012a49aa5..fdbd7888cb07 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -65,7 +65,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
pte_t *pte;
if (slab_is_available())
- pte = (pte_t *) page_table_alloc(&init_mm, address);
+ pte = (pte_t *) page_table_alloc(&init_mm);
else
pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
PTRS_PER_PTE * sizeof(pte_t));
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 7fcd492adbfc..7cca41842a9e 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -134,6 +134,7 @@ config TILEGX
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_ARCH_KGDB
+ select ARCH_SUPPORTS_ATOMIC_RMW
config TILEPRO
def_bool !TILEGX
diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c
index 5301a9ffbae1..320ff5e6e61e 100644
--- a/arch/tile/gxio/mpipe.c
+++ b/arch/tile/gxio/mpipe.c
@@ -29,6 +29,32 @@
/* HACK: Avoid pointless "shadow" warnings. */
#define link link_shadow
+/**
+ * strscpy - Copy a C-string into a sized buffer, but only if it fits
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @size: size of destination buffer
+ *
+ * Use this routine to avoid copying too-long strings.
+ * The routine returns the total number of bytes copied
+ * (including the trailing NUL) or zero if the buffer wasn't
+ * big enough. To ensure that programmers pay attention
+ * to the return code, the destination has a single NUL
+ * written at the front (if size is non-zero) when the
+ * buffer is not big enough.
+ */
+static size_t strscpy(char *dest, const char *src, size_t size)
+{
+ size_t len = strnlen(src, size) + 1;
+ if (len > size) {
+ if (size)
+ dest[0] = '\0';
+ return 0;
+ }
+ memcpy(dest, src, len);
+ return len;
+}
+
int gxio_mpipe_init(gxio_mpipe_context_t *context, unsigned int mpipe_index)
{
char file[32];
@@ -511,8 +537,8 @@ int gxio_mpipe_link_instance(const char *link_name)
if (!context)
return GXIO_ERR_NO_DEVICE;
- strncpy(name.name, link_name, sizeof(name.name));
- name.name[GXIO_MPIPE_LINK_NAME_LEN - 1] = '\0';
+ if (strscpy(name.name, link_name, sizeof(name.name)) == 0)
+ return GXIO_ERR_NO_DEVICE;
return gxio_mpipe_info_instance_aux(context, name);
}
@@ -529,7 +555,8 @@ int gxio_mpipe_link_enumerate_mac(int idx, char *link_name, uint8_t *link_mac)
rv = gxio_mpipe_info_enumerate_aux(context, idx, &name, &mac);
if (rv >= 0) {
- strncpy(link_name, name.name, sizeof(name.name));
+ if (strscpy(link_name, name.name, sizeof(name.name)) == 0)
+ return GXIO_ERR_INVAL_MEMORY_SIZE;
memcpy(link_mac, mac.mac, sizeof(mac.mac));
}
@@ -545,8 +572,8 @@ int gxio_mpipe_link_open(gxio_mpipe_link_t *link,
_gxio_mpipe_link_name_t name;
int rv;
- strncpy(name.name, link_name, sizeof(name.name));
- name.name[GXIO_MPIPE_LINK_NAME_LEN - 1] = '\0';
+ if (strscpy(name.name, link_name, sizeof(name.name)) == 0)
+ return GXIO_ERR_NO_DEVICE;
rv = gxio_mpipe_link_open_aux(context, name, flags);
if (rv < 0)
diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h
index 5d5d3b739a6b..86a746243dc8 100644
--- a/arch/tile/include/asm/sections.h
+++ b/arch/tile/include/asm/sections.h
@@ -19,9 +19,6 @@
#include <asm-generic/sections.h>
-/* Text and data are at different areas in the kernel VA space. */
-extern char _sinitdata[], _einitdata[];
-
/* Write-once data is writable only till the end of initialization. */
extern char __w1data_begin[], __w1data_end[];
diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h
index 9f6a78d665fa..9b069692153f 100644
--- a/arch/tile/include/asm/vdso.h
+++ b/arch/tile/include/asm/vdso.h
@@ -15,6 +15,7 @@
#ifndef __TILE_VDSO_H__
#define __TILE_VDSO_H__
+#include <linux/seqlock.h>
#include <linux/types.h>
/*
@@ -26,15 +27,20 @@
*/
struct vdso_data {
- __u64 tz_update_count; /* Timezone atomicity ctr */
- __u64 tb_update_count; /* Timebase atomicity ctr */
- __u64 xtime_tod_stamp; /* TOD clock for xtime */
- __u64 xtime_clock_sec; /* Kernel time second */
- __u64 xtime_clock_nsec; /* Kernel time nanosecond */
- __u64 wtom_clock_sec; /* Wall to monotonic clock second */
- __u64 wtom_clock_nsec; /* Wall to monotonic clock nanosecond */
+ seqcount_t tz_seq; /* Timezone seqlock */
+ seqcount_t tb_seq; /* Timebase seqlock */
+ __u64 cycle_last; /* TOD clock for xtime */
+ __u64 mask; /* Cycle mask */
__u32 mult; /* Cycle to nanosecond multiplier */
__u32 shift; /* Cycle to nanosecond divisor (power of two) */
+ __u64 wall_time_sec;
+ __u64 wall_time_snsec;
+ __u64 monotonic_time_sec;
+ __u64 monotonic_time_snsec;
+ __u64 wall_time_coarse_sec;
+ __u64 wall_time_coarse_nsec;
+ __u64 monotonic_time_coarse_sec;
+ __u64 monotonic_time_coarse_nsec;
__u32 tz_minuteswest; /* Minutes west of Greenwich */
__u32 tz_dsttime; /* Type of dst correction */
};
diff --git a/arch/tile/include/uapi/arch/sim_def.h b/arch/tile/include/uapi/arch/sim_def.h
index 4b44a2b6a09a..1c069537ae41 100644
--- a/arch/tile/include/uapi/arch/sim_def.h
+++ b/arch/tile/include/uapi/arch/sim_def.h
@@ -360,19 +360,19 @@
* @{
*/
-/** Use with with SIM_PROFILER_CHIP_xxx to control the memory controllers. */
+/** Use with SIM_PROFILER_CHIP_xxx to control the memory controllers. */
#define SIM_CHIP_MEMCTL 0x001
-/** Use with with SIM_PROFILER_CHIP_xxx to control the XAUI interface. */
+/** Use with SIM_PROFILER_CHIP_xxx to control the XAUI interface. */
#define SIM_CHIP_XAUI 0x002
-/** Use with with SIM_PROFILER_CHIP_xxx to control the PCIe interface. */
+/** Use with SIM_PROFILER_CHIP_xxx to control the PCIe interface. */
#define SIM_CHIP_PCIE 0x004
-/** Use with with SIM_PROFILER_CHIP_xxx to control the MPIPE interface. */
+/** Use with SIM_PROFILER_CHIP_xxx to control the MPIPE interface. */
#define SIM_CHIP_MPIPE 0x008
-/** Use with with SIM_PROFILER_CHIP_xxx to control the TRIO interface. */
+/** Use with SIM_PROFILER_CHIP_xxx to control the TRIO interface. */
#define SIM_CHIP_TRIO 0x010
/** Reference all chip devices. */
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index d8fbc289e680..c1b362277fb7 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -249,33 +249,52 @@ cycles_t ns2cycles(unsigned long nsecs)
void update_vsyscall_tz(void)
{
- /* Userspace gettimeofday will spin while this value is odd. */
- ++vdso_data->tz_update_count;
- smp_wmb();
+ write_seqcount_begin(&vdso_data->tz_seq);
vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
vdso_data->tz_dsttime = sys_tz.tz_dsttime;
- smp_wmb();
- ++vdso_data->tz_update_count;
+ write_seqcount_end(&vdso_data->tz_seq);
}
void update_vsyscall(struct timekeeper *tk)
{
- struct timespec *wtm = &tk->wall_to_monotonic;
- struct clocksource *clock = tk->tkr.clock;
-
- if (clock != &cycle_counter_cs)
+ if (tk->tkr.clock != &cycle_counter_cs)
return;
- /* Userspace gettimeofday will spin while this value is odd. */
- ++vdso_data->tb_update_count;
- smp_wmb();
- vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
- vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
- vdso_data->wtom_clock_sec = wtm->tv_sec;
- vdso_data->wtom_clock_nsec = wtm->tv_nsec;
- vdso_data->mult = tk->tkr.mult;
- vdso_data->shift = tk->tkr.shift;
- smp_wmb();
- ++vdso_data->tb_update_count;
+ write_seqcount_begin(&vdso_data->tb_seq);
+
+ vdso_data->cycle_last = tk->tkr.cycle_last;
+ vdso_data->mask = tk->tkr.mask;
+ vdso_data->mult = tk->tkr.mult;
+ vdso_data->shift = tk->tkr.shift;
+
+ vdso_data->wall_time_sec = tk->xtime_sec;
+ vdso_data->wall_time_snsec = tk->tkr.xtime_nsec;
+
+ vdso_data->monotonic_time_sec = tk->xtime_sec
+ + tk->wall_to_monotonic.tv_sec;
+ vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
+ + ((u64)tk->wall_to_monotonic.tv_nsec
+ << tk->tkr.shift);
+ while (vdso_data->monotonic_time_snsec >=
+ (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+ vdso_data->monotonic_time_snsec -=
+ ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+ vdso_data->monotonic_time_sec++;
+ }
+
+ vdso_data->wall_time_coarse_sec = tk->xtime_sec;
+ vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
+ tk->tkr.shift);
+
+ vdso_data->monotonic_time_coarse_sec =
+ vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
+ vdso_data->monotonic_time_coarse_nsec =
+ vdso_data->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
+
+ while (vdso_data->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
+ vdso_data->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
+ vdso_data->monotonic_time_coarse_sec++;
+ }
+
+ write_seqcount_end(&vdso_data->tb_seq);
}
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index f3ceb6308e42..86900ccd4977 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -277,7 +277,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
if (fixup_exception(regs)) /* ILL_TRANS or UNALIGN_DATA */
return;
if (fault_num >= 0 &&
- fault_num < sizeof(int_name)/sizeof(int_name[0]) &&
+ fault_num < ARRAY_SIZE(int_name) &&
int_name[fault_num] != NULL)
name = int_name[fault_num];
else
diff --git a/arch/tile/kernel/vdso/vdso.lds.S b/arch/tile/kernel/vdso/vdso.lds.S
index 041cd6c39c83..731529f3f06f 100644
--- a/arch/tile/kernel/vdso/vdso.lds.S
+++ b/arch/tile/kernel/vdso/vdso.lds.S
@@ -82,6 +82,8 @@ VERSION
__vdso_rt_sigreturn;
__vdso_gettimeofday;
gettimeofday;
+ __vdso_clock_gettime;
+ clock_gettime;
local:*;
};
}
diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c
index e933fb9fbf5c..8bb21eda07d8 100644
--- a/arch/tile/kernel/vdso/vgettimeofday.c
+++ b/arch/tile/kernel/vdso/vgettimeofday.c
@@ -15,6 +15,7 @@
#define VDSO_BUILD /* avoid some shift warnings for -m32 in <asm/page.h> */
#include <linux/time.h>
#include <asm/timex.h>
+#include <asm/unistd.h>
#include <asm/vdso.h>
#if CHIP_HAS_SPLIT_CYCLE()
@@ -35,6 +36,11 @@ static inline cycles_t get_cycles_inline(void)
#define get_cycles get_cycles_inline
#endif
+struct syscall_return_value {
+ long value;
+ long error;
+};
+
/*
* Find out the vDSO data page address in the process address space.
*/
@@ -50,59 +56,143 @@ inline unsigned long get_datapage(void)
return ret;
}
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+static inline u64 vgetsns(struct vdso_data *vdso)
+{
+ return ((get_cycles() - vdso->cycle_last) & vdso->mask) * vdso->mult;
+}
+
+static inline int do_realtime(struct vdso_data *vdso, struct timespec *ts)
+{
+ unsigned count;
+ u64 ns;
+
+ do {
+ count = read_seqcount_begin(&vdso->tb_seq);
+ ts->tv_sec = vdso->wall_time_sec;
+ ns = vdso->wall_time_snsec;
+ ns += vgetsns(vdso);
+ ns >>= vdso->shift;
+ } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
+
+ ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+
+ return 0;
+}
+
+static inline int do_monotonic(struct vdso_data *vdso, struct timespec *ts)
+{
+ unsigned count;
+ u64 ns;
+
+ do {
+ count = read_seqcount_begin(&vdso->tb_seq);
+ ts->tv_sec = vdso->monotonic_time_sec;
+ ns = vdso->monotonic_time_snsec;
+ ns += vgetsns(vdso);
+ ns >>= vdso->shift;
+ } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
+
+ ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+
+ return 0;
+}
+
+static inline int do_realtime_coarse(struct vdso_data *vdso,
+ struct timespec *ts)
+{
+ unsigned count;
+
+ do {
+ count = read_seqcount_begin(&vdso->tb_seq);
+ ts->tv_sec = vdso->wall_time_coarse_sec;
+ ts->tv_nsec = vdso->wall_time_coarse_nsec;
+ } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
+
+ return 0;
+}
+
+static inline int do_monotonic_coarse(struct vdso_data *vdso,
+ struct timespec *ts)
{
- cycles_t cycles;
- unsigned long count, sec, ns;
- volatile struct vdso_data *vdso_data;
+ unsigned count;
+
+ do {
+ count = read_seqcount_begin(&vdso->tb_seq);
+ ts->tv_sec = vdso->monotonic_time_coarse_sec;
+ ts->tv_nsec = vdso->monotonic_time_coarse_nsec;
+ } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
+
+ return 0;
+}
+
+struct syscall_return_value __vdso_gettimeofday(struct timeval *tv,
+ struct timezone *tz)
+{
+ struct syscall_return_value ret = { 0, 0 };
+ unsigned count;
+ struct vdso_data *vdso = (struct vdso_data *)get_datapage();
- vdso_data = (struct vdso_data *)get_datapage();
/* The use of the timezone is obsolete, normally tz is NULL. */
if (unlikely(tz != NULL)) {
- while (1) {
- /* Spin until the update finish. */
- count = vdso_data->tz_update_count;
- if (count & 1)
- continue;
-
- tz->tz_minuteswest = vdso_data->tz_minuteswest;
- tz->tz_dsttime = vdso_data->tz_dsttime;
-
- /* Check whether updated, read again if so. */
- if (count == vdso_data->tz_update_count)
- break;
- }
+ do {
+ count = read_seqcount_begin(&vdso->tz_seq);
+ tz->tz_minuteswest = vdso->tz_minuteswest;
+ tz->tz_dsttime = vdso->tz_dsttime;
+ } while (unlikely(read_seqcount_retry(&vdso->tz_seq, count)));
}
if (unlikely(tv == NULL))
- return 0;
-
- while (1) {
- /* Spin until the update finish. */
- count = vdso_data->tb_update_count;
- if (count & 1)
- continue;
-
- sec = vdso_data->xtime_clock_sec;
- cycles = get_cycles() - vdso_data->xtime_tod_stamp;
- ns = (cycles * vdso_data->mult) + vdso_data->xtime_clock_nsec;
- ns >>= vdso_data->shift;
-
- if (ns >= NSEC_PER_SEC) {
- ns -= NSEC_PER_SEC;
- sec += 1;
- }
-
- /* Check whether updated, read again if so. */
- if (count == vdso_data->tb_update_count)
- break;
- }
+ return ret;
- tv->tv_sec = sec;
- tv->tv_usec = ns / 1000;
+ do_realtime(vdso, (struct timespec *)tv);
+ tv->tv_usec /= 1000;
- return 0;
+ return ret;
}
int gettimeofday(struct timeval *tv, struct timezone *tz)
__attribute__((weak, alias("__vdso_gettimeofday")));
+
+static struct syscall_return_value vdso_fallback_gettime(long clock,
+ struct timespec *ts)
+{
+ struct syscall_return_value ret;
+ __asm__ __volatile__ (
+ "swint1"
+ : "=R00" (ret.value), "=R01" (ret.error)
+ : "R10" (__NR_clock_gettime), "R00" (clock), "R01" (ts)
+ : "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r11", "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+ "r24", "r25", "r26", "r27", "r28", "r29", "memory");
+ return ret;
+}
+
+struct syscall_return_value __vdso_clock_gettime(clockid_t clock,
+ struct timespec *ts)
+{
+ struct vdso_data *vdso = (struct vdso_data *)get_datapage();
+ struct syscall_return_value ret = { 0, 0 };
+
+ switch (clock) {
+ case CLOCK_REALTIME:
+ do_realtime(vdso, ts);
+ return ret;
+ case CLOCK_MONOTONIC:
+ do_monotonic(vdso, ts);
+ return ret;
+ case CLOCK_REALTIME_COARSE:
+ do_realtime_coarse(vdso, ts);
+ return ret;
+ case CLOCK_MONOTONIC_COARSE:
+ do_monotonic_coarse(vdso, ts);
+ return ret;
+ default:
+ return vdso_fallback_gettime(clock, ts);
+ }
+}
+
+int clock_gettime(clockid_t clock, struct timespec *ts)
+ __attribute__((weak, alias("__vdso_clock_gettime")));
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index f1819423ffc9..0e059a0101ea 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -66,11 +66,9 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__init_begin = .;
- VMLINUX_SYMBOL(_sinitdata) = .;
INIT_DATA_SECTION(16) :data =0
PERCPU_SECTION(L2_CACHE_BYTES)
. = ALIGN(PAGE_SIZE);
- VMLINUX_SYMBOL(_einitdata) = .;
__init_end = .;
_sdata = .; /* Start of data section */
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index bfb3127b4df9..a092e393bd20 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -254,8 +254,8 @@ static pgprot_t __init init_pgprot(ulong address)
* Everything else that isn't data or bss is heap, so mark it
* with the initial heap home (hash-for-home, or this cpu). This
* includes any addresses after the loaded image and any address before
- * _einitdata, since we already captured the case of text before
- * _sinittext, and __pa(einittext) is approximately __pa(sinitdata).
+ * __init_end, since we already captured the case of text before
+ * _sinittext, and __pa(einittext) is approximately __pa(__init_begin).
*
* All the LOWMEM pages that we mark this way will get their
* struct page homecache properly marked later, in set_page_homes().
@@ -263,7 +263,7 @@ static pgprot_t __init init_pgprot(ulong address)
* homes, but with a zero free_time we don't have to actually
* do a flush action the first time we use them, either.
*/
- if (address >= (ulong) _end || address < (ulong) _einitdata)
+ if (address >= (ulong) _end || address < (ulong) __init_end)
return construct_pgprot(PAGE_KERNEL, initial_heap_home());
/* Use hash-for-home if requested for data/bss. */
@@ -632,7 +632,7 @@ int devmem_is_allowed(unsigned long pagenr)
{
return pagenr < kaddr_to_pfn(_end) &&
!(pagenr >= kaddr_to_pfn(&init_thread_union) ||
- pagenr < kaddr_to_pfn(_einitdata)) &&
+ pagenr < kaddr_to_pfn(__init_end)) &&
!(pagenr >= kaddr_to_pfn(_sinittext) ||
pagenr <= kaddr_to_pfn(_einittext-1));
}
@@ -975,8 +975,8 @@ void free_initmem(void)
/* Free the data pages that we won't use again after init. */
free_init_pages("unused kernel data",
- (unsigned long)_sinitdata,
- (unsigned long)_einitdata);
+ (unsigned long)__init_begin,
+ (unsigned long)__init_end);
/*
* Free the pages mapped from 0xc0000000 that correspond to code
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 36327438caf0..e4b1f431c7ed 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -137,6 +137,7 @@ config X86
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
+ select X86_FEATURE_NAMES if PROC_FS
config INSTRUCTION_DECODER
def_bool y
@@ -314,6 +315,17 @@ config SMP
If you don't know what to do here, say N.
+config X86_FEATURE_NAMES
+ bool "Processor feature human-readable names" if EMBEDDED
+ default y
+ ---help---
+ This option compiles in a table of x86 feature bits and corresponding
+ names. This is required to support /proc/cpuinfo and a few kernel
+ messages. You can disable this to save space, at the expense of
+ making those few kernel messages show numeric feature bits instead.
+
+ If in doubt, say Y.
+
config X86_X2APIC
bool "Support x2apic"
depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 60087ca37679..5692d6ac0f18 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -253,12 +253,6 @@ archclean:
$(Q)$(MAKE) $(clean)=arch/x86/tools
$(Q)$(MAKE) $(clean)=arch/x86/purgatory
-PHONY += kvmconfig
-kvmconfig:
- $(if $(wildcard $(objtree)/.config),, $(error You need an existing .config for this target))
- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config $(srctree)/arch/x86/configs/kvm_guest.config
- $(Q)yes "" | $(MAKE) -f $(srctree)/Makefile oldconfig
-
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
echo ' install - Install kernel using'
@@ -272,5 +266,4 @@ define archhelp
echo ' bzdisk/fdimage*/isoimage also accept:'
echo ' FDARGS="..." arguments for the booted kernel'
echo ' FDINITRD=file initrd for the booted kernel'
- echo ' kvmconfig - Enable additional options for guest kernel support'
endef
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index dbe8dd2fe247..5b016e2498f3 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -35,19 +35,22 @@ setup-y += video-vesa.o
setup-y += video-bios.o
targets += $(setup-y)
-hostprogs-y := mkcpustr tools/build
+hostprogs-y := tools/build
+hostprogs-$(CONFIG_X86_FEATURE_NAMES) += mkcpustr
HOST_EXTRACFLAGS += -I$(srctree)/tools/include \
-include include/generated/autoconf.h \
-D__EXPORTED_HEADERS__
+ifdef CONFIG_X86_FEATURE_NAMES
$(obj)/cpu.o: $(obj)/cpustr.h
quiet_cmd_cpustr = CPUSTR $@
cmd_cpustr = $(obj)/mkcpustr > $@
-targets += cpustr.h
+targets += cpustr.h
$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
$(call if_changed,cpustr)
+endif
# ---------------------------------------------------------------------------
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 0fcd9133790c..704f58aa79cd 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -26,17 +26,18 @@ LDFLAGS_vmlinux := -T
hostprogs-y := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
-VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
- $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
- $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o
+vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
+ $(obj)/string.o $(obj)/cmdline.o \
+ $(obj)/piggy.o $(obj)/cpuflags.o
+
+vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
+vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
-ifeq ($(CONFIG_EFI_STUB), y)
- VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
-endif
+vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
-$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE
+$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
$(call if_changed,ld)
@:
@@ -44,7 +45,7 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-targets += $(patsubst $(obj)/%,%,$(VMLINUX_OBJS)) vmlinux.bin.all vmlinux.relocs
+targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
CMD_RELOCS = arch/x86/tools/relocs
quiet_cmd_relocs = RELOCS $@
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index d39189ba7f8e..7c68808edeb7 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -1,6 +1,5 @@
#include "misc.h"
-#ifdef CONFIG_RANDOMIZE_BASE
#include <asm/msr.h>
#include <asm/archrandom.h>
#include <asm/e820.h>
@@ -335,5 +334,3 @@ unsigned char *choose_kernel_location(unsigned char *input,
out:
return (unsigned char *)choice;
}
-
-#endif /* CONFIG_RANDOMIZE_BASE */
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c
index d3d003cb5481..261e81fb9582 100644
--- a/arch/x86/boot/compressed/early_serial_console.c
+++ b/arch/x86/boot/compressed/early_serial_console.c
@@ -1,9 +1,5 @@
#include "misc.h"
-#ifdef CONFIG_EARLY_PRINTK
-
int early_serial_base;
#include "../early_serial_console.c"
-
-#endif
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 6ec6bb6e9957..29207f69ae8c 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -16,7 +16,9 @@
*/
#include "boot.h"
+#ifdef CONFIG_X86_FEATURE_NAMES
#include "cpustr.h"
+#endif
static char *cpu_name(int level)
{
@@ -32,11 +34,48 @@ static char *cpu_name(int level)
}
}
+static void show_cap_strs(u32 *err_flags)
+{
+ int i, j;
+#ifdef CONFIG_X86_FEATURE_NAMES
+ const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs;
+ for (i = 0; i < NCAPINTS; i++) {
+ u32 e = err_flags[i];
+ for (j = 0; j < 32; j++) {
+ if (msg_strs[0] < i ||
+ (msg_strs[0] == i && msg_strs[1] < j)) {
+ /* Skip to the next string */
+ msg_strs += 2;
+ while (*msg_strs++)
+ ;
+ }
+ if (e & 1) {
+ if (msg_strs[0] == i &&
+ msg_strs[1] == j &&
+ msg_strs[2])
+ printf("%s ", msg_strs+2);
+ else
+ printf("%d:%d ", i, j);
+ }
+ e >>= 1;
+ }
+ }
+#else
+ for (i = 0; i < NCAPINTS; i++) {
+ u32 e = err_flags[i];
+ for (j = 0; j < 32; j++) {
+ if (e & 1)
+ printf("%d:%d ", i, j);
+ e >>= 1;
+ }
+ }
+#endif
+}
+
int validate_cpu(void)
{
u32 *err_flags;
int cpu_level, req_level;
- const unsigned char *msg_strs;
check_cpu(&cpu_level, &req_level, &err_flags);
@@ -49,34 +88,9 @@ int validate_cpu(void)
}
if (err_flags) {
- int i, j;
puts("This kernel requires the following features "
"not present on the CPU:\n");
-
- msg_strs = (const unsigned char *)x86_cap_strs;
-
- for (i = 0; i < NCAPINTS; i++) {
- u32 e = err_flags[i];
-
- for (j = 0; j < 32; j++) {
- if (msg_strs[0] < i ||
- (msg_strs[0] == i && msg_strs[1] < j)) {
- /* Skip to the next string */
- msg_strs += 2;
- while (*msg_strs++)
- ;
- }
- if (e & 1) {
- if (msg_strs[0] == i &&
- msg_strs[1] == j &&
- msg_strs[2])
- printf("%s ", msg_strs+2);
- else
- printf("%d:%d ", i, j);
- }
- e >>= 1;
- }
- }
+ show_cap_strs(err_flags);
putchar('\n');
return -1;
} else {
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
new file mode 100644
index 000000000000..4e2ecfa23c15
--- /dev/null
+++ b/arch/x86/configs/tiny.config
@@ -0,0 +1 @@
+CONFIG_NOHIGHMEM=y
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb9b258d60e7..094292a63e74 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -202,6 +202,7 @@
#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */
#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
@@ -250,8 +251,15 @@
#include <asm/asm.h>
#include <linux/bitops.h>
+#ifdef CONFIG_X86_FEATURE_NAMES
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
+#define X86_CAP_FMT "%s"
+#define x86_cap_flag(flag) x86_cap_flags[flag]
+#else
+#define X86_CAP_FMT "%d:%d"
+#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31)
+#endif
/*
* In order to save room, we index into this array by doing
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7c492ed9087b..7d603a71ab3a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
#define ASYNC_PF_PER_VCPU 64
-struct kvm_vcpu;
-struct kvm;
-struct kvm_async_pf;
-
enum kvm_reg {
VCPU_REGS_RAX = 0,
VCPU_REGS_RCX = 1,
@@ -266,7 +262,8 @@ struct kvm_mmu {
struct x86_exception *fault);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
struct x86_exception *exception);
- gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
+ gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+ struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
@@ -481,6 +478,7 @@ struct kvm_vcpu_arch {
u64 mmio_gva;
unsigned access;
gfn_t mmio_gfn;
+ u64 mmio_gen;
struct kvm_pmu pmu;
@@ -576,11 +574,10 @@ struct kvm_arch {
struct kvm_apic_map *apic_map;
unsigned int tss_addr;
- struct page *apic_access_page;
+ bool apic_access_page_done;
gpa_t wall_clock;
- struct page *ept_identity_pagetable;
bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr;
@@ -665,8 +662,8 @@ struct msr_data {
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
- int (*hardware_enable)(void *dummy);
- void (*hardware_disable)(void *dummy);
+ int (*hardware_enable)(void);
+ void (*hardware_disable)(void);
void (*check_processor_compatibility)(void *rtn);
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
@@ -710,7 +707,6 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
- void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
@@ -740,6 +736,7 @@ struct kvm_x86_ops {
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+ void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
@@ -772,6 +769,8 @@ struct kvm_x86_ops {
bool (*mpx_supported)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
+
+ void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
};
struct kvm_arch_async_pf {
@@ -895,7 +894,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gfn_t gfn, void *data, int offset, int len,
u32 access);
-void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
static inline int __kvm_irq_line_state(unsigned long *irq_state,
@@ -917,7 +915,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
int fx_init(struct kvm_vcpu *vcpu);
-void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
@@ -926,7 +923,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+ struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
@@ -946,7 +944,8 @@ void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
void kvm_enable_tdp(void);
void kvm_disable_tdp(void);
-static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+ struct x86_exception *exception)
{
return gpa;
}
@@ -1037,7 +1036,7 @@ asmlinkage void kvm_spurious_fault(void);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
@@ -1046,6 +1045,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address);
void kvm_define_shared_msr(unsigned index, u32 msr);
void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c7678e43465b..e62cf897f781 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
#define _ASM_X86_KVM_PARA_H
#include <asm/processor.h>
+#include <asm/alternative.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
@@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void)
}
#endif /* CONFIG_KVM_GUEST */
-/* This instruction is vmcall. On non-VT architectures, it will generate a
- * trap that we will then rewrite to the appropriate instruction.
+#ifdef CONFIG_DEBUG_RODATA
+#define KVM_HYPERCALL \
+ ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
+#else
+/* On AMD processors, vmcall will generate a trap that we will
+ * then rewrite to the appropriate instruction.
*/
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
+#endif
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 7fd54f09b011..77dcab277710 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -13,10 +13,13 @@ nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
obj-y := intel_cacheinfo.o scattered.o topology.o
-obj-y += proc.o capflags.o powerflags.o common.o
+obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
+obj-$(CONFIG_PROC_FS) += proc.o
+obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
+
obj-$(CONFIG_X86_32) += bugs.o
obj-$(CONFIG_X86_64) += bugs_64.o
@@ -48,6 +51,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
+ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
@@ -56,3 +60,4 @@ cpufeature = $(src)/../../include/asm/cpufeature.h
targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
+endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 60e5497681f5..813d29d00a17 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c)
}
#endif
+ /*
+ * This is only needed to tell the kernel whether to use VMCALL
+ * and VMMCALL. VMMCALL is never executed except under virt, so
+ * we can set it unconditionally.
+ */
+ set_cpu_cap(c, X86_FEATURE_VMMCALL);
+
/* F16h erratum 793, CVE-2013-6885 */
if (c->x86 == 0x16 && c->x86_model <= 0xf)
msr_set_bit(MSR_AMD64_LS_CFG, 15);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e4ab2b42bd6f..c649f236e288 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -346,8 +346,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
continue;
printk(KERN_WARNING
- "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
- x86_cap_flags[df->feature], df->level);
+ "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
+ x86_cap_flag(df->feature), df->level);
}
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 38a0afe83c6b..976e3a57f9ea 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -53,14 +53,14 @@ u64 kvm_supported_xcr0(void)
return xcr0;
}
-void kvm_update_cpuid(struct kvm_vcpu *vcpu)
+int kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
struct kvm_lapic *apic = vcpu->arch.apic;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (!best)
- return;
+ return 0;
/* Update OSXSAVE bit */
if (cpu_has_xsave && best->function == 0x1) {
@@ -88,7 +88,17 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
xstate_required_size(vcpu->arch.xcr0);
}
+ /*
+ * The existing code assumes virtual address is 48-bit in the canonical
+ * address checks; exit if it is ever changed.
+ */
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ if (best && ((best->eax & 0xff00) >> 8) != 48 &&
+ ((best->eax & 0xff00) >> 8) != 0)
+ return -EINVAL;
+
kvm_pmu_cpuid_update(vcpu);
+ return 0;
}
static int is_efer_nx(void)
@@ -112,8 +122,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
break;
}
}
- if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
- entry->edx &= ~(1 << 20);
+ if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
+ entry->edx &= ~bit(X86_FEATURE_NX);
printk(KERN_INFO "kvm: guest NX capability removed\n");
}
}
@@ -151,10 +161,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
}
vcpu->arch.cpuid_nent = cpuid->nent;
cpuid_fix_nx_cap(vcpu);
- r = 0;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
- kvm_update_cpuid(vcpu);
+ r = kvm_update_cpuid(vcpu);
out_free:
vfree(cpuid_entries);
@@ -178,9 +187,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
- kvm_update_cpuid(vcpu);
- return 0;
-
+ r = kvm_update_cpuid(vcpu);
out:
return r;
}
@@ -767,6 +774,12 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
if (!best)
best = check_cpuid_limit(vcpu, function, index);
+ /*
+ * Perfmon not yet supported for L2 guest.
+ */
+ if (is_guest_mode(vcpu) && function == 0xa)
+ best = NULL;
+
if (best) {
*eax = best->eax;
*ebx = best->ebx;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a5380590ab0e..4452eedfaedd 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -3,7 +3,7 @@
#include "x86.h"
-void kvm_update_cpuid(struct kvm_vcpu *vcpu);
+int kvm_update_cpuid(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
@@ -88,6 +88,14 @@ static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
return best && (best->ecx & bit(X86_FEATURE_X2APIC));
}
+static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0, 0);
+ return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
+}
+
static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 03954f7900f5..a46207a05835 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -527,6 +527,7 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
u32 error, bool valid)
{
+ WARN_ON(vec > 0x1f);
ctxt->exception.vector = vec;
ctxt->exception.error_code = error;
ctxt->exception.error_code_valid = valid;
@@ -1468,7 +1469,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
return ret;
err_code = selector & 0xfffc;
- err_vec = GP_VECTOR;
+ err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR;
/* can't load system descriptor into segment selector */
if (seg <= VCPU_SREG_GS && !seg_desc.s)
@@ -1503,6 +1504,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (rpl > cpl || dpl != cpl)
goto exception;
}
+ /* in long-mode d/b must be clear if l is set */
+ if (seg_desc.d && seg_desc.l) {
+ u64 efer = 0;
+
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+ if (efer & EFER_LMA)
+ goto exception;
+ }
+
/* CS(RPL) <- CPL */
selector = (selector & 0xfffc) | cpl;
break;
@@ -1549,8 +1559,7 @@ load:
ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
return X86EMUL_CONTINUE;
exception:
- emulate_exception(ctxt, err_vec, err_code, true);
- return X86EMUL_PROPAGATE_FAULT;
+ return emulate_exception(ctxt, err_vec, err_code, true);
}
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -2723,8 +2732,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
if (!next_tss_desc.p ||
((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
desc_limit < 0x2b)) {
- emulate_ts(ctxt, tss_selector & 0xfffc);
- return X86EMUL_PROPAGATE_FAULT;
+ return emulate_ts(ctxt, tss_selector & 0xfffc);
}
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
@@ -3016,7 +3024,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
ctxt->dst.val = swab64(ctxt->src.val);
break;
default:
- return X86EMUL_PROPAGATE_FAULT;
+ BUG();
}
return X86EMUL_CONTINUE;
}
@@ -3140,12 +3148,8 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
static int em_vmcall(struct x86_emulate_ctxt *ctxt)
{
- int rc;
-
- if (ctxt->modrm_mod != 3 || ctxt->modrm_rm != 1)
- return X86EMUL_UNHANDLEABLE;
+ int rc = ctxt->ops->fix_hypercall(ctxt);
- rc = ctxt->ops->fix_hypercall(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -3563,6 +3567,12 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
+static const struct opcode group7_rm0[] = {
+ N,
+ I(SrcNone | Priv | EmulateOnUD, em_vmcall),
+ N, N, N, N, N, N,
+};
+
static const struct opcode group7_rm1[] = {
DI(SrcNone | Priv, monitor),
DI(SrcNone | Priv, mwait),
@@ -3656,7 +3666,7 @@ static const struct group_dual group7 = { {
II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
}, {
- I(SrcNone | Priv | EmulateOnUD, em_vmcall),
+ EXT(0, group7_rm0),
EXT(0, group7_rm1),
N, EXT(0, group7_rm3),
II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
@@ -3687,14 +3697,18 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
};
-static const struct gprefix pfx_vmovntpx = {
- I(0, em_mov), N, N, N,
+static const struct gprefix pfx_0f_2b = {
+ I(0, em_mov), I(0, em_mov), N, N,
};
static const struct gprefix pfx_0f_28_0f_29 = {
I(Aligned, em_mov), I(Aligned, em_mov), N, N,
};
+static const struct gprefix pfx_0f_e7 = {
+ N, I(Sse, em_mov), N, N,
+};
+
static const struct escape escape_d9 = { {
N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
}, {
@@ -3901,7 +3915,7 @@ static const struct opcode twobyte_table[256] = {
N, N, N, N,
GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
- N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
+ N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
N, N, N, N,
/* 0x30 - 0x3F */
II(ImplicitOps | Priv, em_wrmsr, wrmsr),
@@ -3965,7 +3979,8 @@ static const struct opcode twobyte_table[256] = {
/* 0xD0 - 0xDF */
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
/* 0xE0 - 0xEF */
- N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
+ N, N, N, N, N, N, N, N,
/* 0xF0 - 0xFF */
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
};
@@ -4829,8 +4844,10 @@ writeback:
ctxt->eip = ctxt->_eip;
done:
- if (rc == X86EMUL_PROPAGATE_FAULT)
+ if (rc == X86EMUL_PROPAGATE_FAULT) {
+ WARN_ON(ctxt->exception.vector > 0x1f);
ctxt->have_exception = true;
+ }
if (rc == X86EMUL_INTERCEPTED)
return EMULATION_INTERCEPTED;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 08e8a899e005..b8345dd41b25 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -112,17 +112,6 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
struct static_key_deferred apic_hw_disabled __read_mostly;
struct static_key_deferred apic_sw_disabled __read_mostly;
-static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
-{
- if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
- if (val & APIC_SPIV_APIC_ENABLED)
- static_key_slow_dec_deferred(&apic_sw_disabled);
- else
- static_key_slow_inc(&apic_sw_disabled.key);
- }
- apic_set_reg(apic, APIC_SPIV, val);
-}
-
static inline int apic_enabled(struct kvm_lapic *apic)
{
return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
@@ -210,6 +199,20 @@ out:
kvm_vcpu_request_scan_ioapic(kvm);
}
+static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
+{
+ u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+
+ apic_set_reg(apic, APIC_SPIV, val);
+ if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
+ if (val & APIC_SPIV_APIC_ENABLED) {
+ static_key_slow_dec_deferred(&apic_sw_disabled);
+ recalculate_apic_map(apic->vcpu->kvm);
+ } else
+ static_key_slow_inc(&apic_sw_disabled.key);
+ }
+}
+
static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
{
apic_set_reg(apic, APIC_ID, id << 24);
@@ -706,6 +709,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int result = 0;
struct kvm_vcpu *vcpu = apic->vcpu;
+ trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
+ trig_mode, vector);
switch (delivery_mode) {
case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
@@ -727,8 +732,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
- trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
- trig_mode, vector, false);
break;
case APIC_DM_REMRD:
@@ -1352,6 +1355,9 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
return;
hrtimer_cancel(&apic->lapic_timer.timer);
+ /* Inject here so clearing tscdeadline won't override new value */
+ if (apic_has_pending_timer(vcpu))
+ kvm_inject_apic_timer_irqs(vcpu);
apic->lapic_timer.tscdeadline = data;
start_apic_timer(apic);
}
@@ -1639,6 +1645,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
if (atomic_read(&apic->lapic_timer.pending) > 0) {
kvm_apic_local_deliver(apic, APIC_LVTT);
+ if (apic_lvtt_tscdeadline(apic))
+ apic->lapic_timer.tscdeadline = 0;
atomic_set(&apic->lapic_timer.pending, 0);
}
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 931467881da7..3201e93ebd07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -199,16 +199,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
/*
- * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number,
- * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation
- * number.
+ * the low bit of the generation number is always presumed to be zero.
+ * This disables mmio caching during memslot updates. The concept is
+ * similar to a seqcount but instead of retrying the access we just punt
+ * and ignore the cache.
+ *
+ * spte bits 3-11 are used as bits 1-9 of the generation number,
+ * the bits 52-61 are used as bits 10-19 of the generation number.
*/
-#define MMIO_SPTE_GEN_LOW_SHIFT 3
+#define MMIO_SPTE_GEN_LOW_SHIFT 2
#define MMIO_SPTE_GEN_HIGH_SHIFT 52
-#define MMIO_GEN_SHIFT 19
-#define MMIO_GEN_LOW_SHIFT 9
-#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1)
+#define MMIO_GEN_SHIFT 20
+#define MMIO_GEN_LOW_SHIFT 10
+#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2)
#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
@@ -236,12 +240,7 @@ static unsigned int get_mmio_spte_generation(u64 spte)
static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
{
- /*
- * Init kvm generation close to MMIO_MAX_GEN to easily test the
- * code of handling generation number wrap-around.
- */
- return (kvm_memslots(kvm)->generation +
- MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
+ return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
}
static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
@@ -296,11 +295,6 @@ static bool check_mmio_spte(struct kvm *kvm, u64 spte)
return likely(kvm_gen == spte_gen);
}
-static inline u64 rsvd_bits(int s, int e)
-{
- return ((1ULL << (e - s + 1)) - 1) << s;
-}
-
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask)
{
@@ -1180,7 +1174,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
* Write-protect on the specified @sptep, @pt_protect indicates whether
* spte write-protection is caused by protecting shadow page table.
*
- * Note: write protection is difference between drity logging and spte
+ * Note: write protection is difference between dirty logging and spte
* protection:
* - for dirty logging, the spte can be set to writable at anytime if
* its dirty bitmap is properly set.
@@ -1268,7 +1262,8 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1276,7 +1271,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
while ((sptep = rmap_get_first(*rmapp, &iter))) {
BUG_ON(!(*sptep & PT_PRESENT_MASK));
- rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
+ rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n",
+ sptep, *sptep, gfn, level);
drop_spte(kvm, sptep);
need_tlb_flush = 1;
@@ -1286,7 +1282,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
}
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1300,7 +1297,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
BUG_ON(!is_shadow_present_pte(*sptep));
- rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
+ rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
+ sptep, *sptep, gfn, level);
need_flush = 1;
@@ -1334,6 +1332,8 @@ static int kvm_handle_hva_range(struct kvm *kvm,
int (*handler)(struct kvm *kvm,
unsigned long *rmapp,
struct kvm_memory_slot *slot,
+ gfn_t gfn,
+ int level,
unsigned long data))
{
int j;
@@ -1363,6 +1363,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
unsigned long idx, idx_end;
unsigned long *rmapp;
+ gfn_t gfn = gfn_start;
/*
* {idx(page_j) | page_j intersects with
@@ -1373,8 +1374,10 @@ static int kvm_handle_hva_range(struct kvm *kvm,
rmapp = __gfn_to_rmap(gfn_start, j, memslot);
- for (; idx <= idx_end; ++idx)
- ret |= handler(kvm, rmapp++, memslot, data);
+ for (; idx <= idx_end;
+ ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j)))
+ ret |= handler(kvm, rmapp++, memslot,
+ gfn, j, data);
}
}
@@ -1385,6 +1388,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
unsigned long data,
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
struct kvm_memory_slot *slot,
+ gfn_t gfn, int level,
unsigned long data))
{
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
@@ -1406,24 +1410,14 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
}
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ unsigned long data)
{
u64 *sptep;
struct rmap_iterator uninitialized_var(iter);
int young = 0;
- /*
- * In case of absence of EPT Access and Dirty Bits supports,
- * emulate the accessed bit for EPT, by checking if this page has
- * an EPT mapping, and clearing it if it does. On the next access,
- * a new EPT mapping will be established.
- * This has some overhead, but not as much as the cost of swapping
- * out actively used pages or breaking up actively used hugepages.
- */
- if (!shadow_accessed_mask) {
- young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
- goto out;
- }
+ BUG_ON(!shadow_accessed_mask);
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
sptep = rmap_get_next(&iter)) {
@@ -1435,14 +1429,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
(unsigned long *)sptep);
}
}
-out:
- /* @data has hva passed to kvm_age_hva(). */
- trace_kvm_age_page(data, slot, young);
+ trace_kvm_age_page(gfn, level, slot, young);
return young;
}
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn,
+ int level, unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1480,13 +1473,33 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
- kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
+ kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
- return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
+ /*
+ * In case of absence of EPT Access and Dirty Bits supports,
+ * emulate the accessed bit for EPT, by checking if this page has
+ * an EPT mapping, and clearing it if it does. On the next access,
+ * a new EPT mapping will be established.
+ * This has some overhead, but not as much as the cost of swapping
+ * out actively used pages or breaking up actively used hugepages.
+ */
+ if (!shadow_accessed_mask) {
+ /*
+ * We are holding the kvm->mmu_lock, and we are blowing up
+ * shadow PTEs. MMU notifier consumers need to be kept at bay.
+ * This is correct as long as we don't decouple the mmu_lock
+ * protected regions (like invalidate_range_start|end does).
+ */
+ kvm->mmu_notifier_seq++;
+ return kvm_handle_hva_range(kvm, start, end, 0,
+ kvm_unmap_rmapp);
+ }
+
+ return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
}
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
@@ -1749,7 +1762,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return 1;
}
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
return 0;
}
@@ -1802,7 +1815,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
if (flush)
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
struct mmu_page_path {
@@ -2536,7 +2549,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
true, host_writable)) {
if (write_fault)
*emulate = 1;
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
if (unlikely(is_mmio_spte(*sptep) && emulate))
@@ -3163,7 +3176,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
- vcpu_clear_mmio_info(vcpu, ~0ul);
+ vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
@@ -3206,7 +3219,7 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
{
if (exception)
exception->error_code = 0;
- return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access);
+ return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
}
static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
@@ -3450,13 +3463,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
context->nx = false;
}
-void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
-{
- ++vcpu->stat.tlb_flush;
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
-
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
{
mmu_free_roots(vcpu);
@@ -3518,6 +3524,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
int maxphyaddr = cpuid_maxphyaddr(vcpu);
u64 exb_bit_rsvd = 0;
u64 gbpages_bit_rsvd = 0;
+ u64 nonleaf_bit8_rsvd = 0;
context->bad_mt_xwr = 0;
@@ -3525,6 +3532,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
exb_bit_rsvd = rsvd_bits(63, 63);
if (!guest_cpuid_has_gbpages(vcpu))
gbpages_bit_rsvd = rsvd_bits(7, 7);
+
+ /*
+ * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
+ * leaf entries) on AMD CPUs only.
+ */
+ if (guest_cpuid_is_amd(vcpu))
+ nonleaf_bit8_rsvd = rsvd_bits(8, 8);
+
switch (context->root_level) {
case PT32_ROOT_LEVEL:
/* no rsvd bits for 2 level 4K page table entries */
@@ -3559,9 +3574,9 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
break;
case PT64_ROOT_LEVEL:
context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
- rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7);
+ nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
- gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
+ nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
@@ -3962,7 +3977,7 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
if (remote_flush)
kvm_flush_remote_tlbs(vcpu->kvm);
else if (local_flush)
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
@@ -4223,7 +4238,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
vcpu->arch.mmu.invlpg(vcpu, gva);
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
++vcpu->stat.invlpg;
}
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@@ -4433,7 +4448,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
* The very rare case: if the generation-number is round,
* zap all shadow pages.
*/
- if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) {
+ if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
kvm_mmu_invalidate_zap_all_pages(kvm);
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b982112d2ca5..bde8ee725754 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -56,6 +56,11 @@
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
+static inline u64 rsvd_bits(int s, int e)
+{
+ return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 410776528265..806d58e3c320 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,8 +298,7 @@ retry_walk:
}
#endif
walker->max_level = walker->level;
- ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
- (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
+ ASSERT(!is_long_mode(vcpu) && is_pae(vcpu));
accessed_dirty = PT_GUEST_ACCESSED_MASK;
pt_access = pte_access = ACC_ALL;
@@ -321,9 +320,22 @@ retry_walk:
walker->pte_gpa[walker->level - 1] = pte_gpa;
real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
- PFERR_USER_MASK|PFERR_WRITE_MASK);
+ PFERR_USER_MASK|PFERR_WRITE_MASK,
+ &walker->fault);
+
+ /*
+ * FIXME: This can happen if emulation (for of an INS/OUTS
+ * instruction) triggers a nested page fault. The exit
+ * qualification / exit info field will incorrectly have
+ * "guest page access" as the nested page fault's cause,
+ * instead of "guest page structure access". To fix this,
+ * the x86_exception struct should be augmented with enough
+ * information to fix the exit_qualification or exit_info_1
+ * fields.
+ */
if (unlikely(real_gfn == UNMAPPED_GVA))
- goto error;
+ return 0;
+
real_gfn = gpa_to_gfn(real_gfn);
host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
@@ -364,7 +376,7 @@ retry_walk:
if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
gfn += pse36_gfn_delta(pte);
- real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
+ real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
if (real_gpa == UNMAPPED_GVA)
return 0;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 3dd6accb64ec..8e6b7d869d2f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -15,6 +15,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <asm/perf_event.h>
#include "x86.h"
#include "cpuid.h"
#include "lapic.h"
@@ -463,7 +464,8 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_cpuid_entry2 *entry;
- unsigned bitmap_len;
+ union cpuid10_eax eax;
+ union cpuid10_edx edx;
pmu->nr_arch_gp_counters = 0;
pmu->nr_arch_fixed_counters = 0;
@@ -475,25 +477,27 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
if (!entry)
return;
+ eax.full = entry->eax;
+ edx.full = entry->edx;
- pmu->version = entry->eax & 0xff;
+ pmu->version = eax.split.version_id;
if (!pmu->version)
return;
- pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff,
- INTEL_PMC_MAX_GENERIC);
- pmu->counter_bitmask[KVM_PMC_GP] =
- ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;
- bitmap_len = (entry->eax >> 24) & 0xff;
- pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
+ pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
+ INTEL_PMC_MAX_GENERIC);
+ pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+ pmu->available_event_types = ~entry->ebx &
+ ((1ull << eax.split.mask_length) - 1);
if (pmu->version == 1) {
pmu->nr_arch_fixed_counters = 0;
} else {
- pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
+ pmu->nr_arch_fixed_counters =
+ min_t(int, edx.split.num_counters_fixed,
INTEL_PMC_MAX_FIXED);
pmu->counter_bitmask[KVM_PMC_FIXED] =
- ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
+ ((u64)1 << edx.split.bit_width_fixed) - 1;
}
pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ddf742768ecf..f7f6a4a157a6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -622,7 +622,7 @@ static int has_svm(void)
return 1;
}
-static void svm_hardware_disable(void *garbage)
+static void svm_hardware_disable(void)
{
/* Make sure we clean up behind us */
if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage)
amd_pmu_disable_virt();
}
-static int svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void)
{
struct svm_cpu_data *sd;
@@ -1257,7 +1257,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->asid_generation = 0;
init_vmcb(svm);
- svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+ MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
@@ -1974,10 +1975,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
{
struct vcpu_svm *svm = to_svm(vcpu);
- svm->vmcb->control.exit_code = SVM_EXIT_NPF;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = fault->error_code;
- svm->vmcb->control.exit_info_2 = fault->address;
+ if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
+ /*
+ * TODO: track the cause of the nested page fault, and
+ * correctly fill in the high bits of exit_info_1.
+ */
+ svm->vmcb->control.exit_code = SVM_EXIT_NPF;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = (1ULL << 32);
+ svm->vmcb->control.exit_info_2 = fault->address;
+ }
+
+ svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
+ svm->vmcb->control.exit_info_1 |= fault->error_code;
+
+ /*
+ * The present bit is always zero for page structure faults on real
+ * hardware.
+ */
+ if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
+ svm->vmcb->control.exit_info_1 &= ~1;
nested_svm_vmexit(svm);
}
@@ -3031,7 +3048,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
}
-u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
+static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
return vmcb->control.tsc_offset +
@@ -4305,6 +4322,10 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
local_irq_enable();
}
+static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -4349,7 +4370,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
- .fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate,
.tlb_flush = svm_flush_tlb,
@@ -4406,6 +4426,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.check_intercept = svm_check_intercept,
.handle_external_intr = svm_handle_external_intr,
+
+ .sched_in = svm_sched_in,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index e850a7d332be..6b06ab8748dd 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -415,15 +415,14 @@ TRACE_EVENT(kvm_apic_ipi,
);
TRACE_EVENT(kvm_apic_accept_irq,
- TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced),
- TP_ARGS(apicid, dm, tm, vec, coalesced),
+ TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
+ TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
__field( __u8, tm )
__field( __u8, vec )
- __field( bool, coalesced )
),
TP_fast_assign(
@@ -431,14 +430,12 @@ TRACE_EVENT(kvm_apic_accept_irq,
__entry->dm = dm;
__entry->tm = tm;
__entry->vec = vec;
- __entry->coalesced = coalesced;
),
- TP_printk("apicid %x vec %u (%s|%s)%s",
+ TP_printk("apicid %x vec %u (%s|%s)",
__entry->apicid, __entry->vec,
__print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
- __entry->tm ? "level" : "edge",
- __entry->coalesced ? " (coalesced)" : "")
+ __entry->tm ? "level" : "edge")
);
TRACE_EVENT(kvm_eoi,
@@ -850,6 +847,36 @@ TRACE_EVENT(kvm_track_tsc,
#endif /* CONFIG_X86_64 */
+TRACE_EVENT(kvm_ple_window,
+ TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
+ TP_ARGS(grow, vcpu_id, new, old),
+
+ TP_STRUCT__entry(
+ __field( bool, grow )
+ __field( unsigned int, vcpu_id )
+ __field( int, new )
+ __field( int, old )
+ ),
+
+ TP_fast_assign(
+ __entry->grow = grow;
+ __entry->vcpu_id = vcpu_id;
+ __entry->new = new;
+ __entry->old = old;
+ ),
+
+ TP_printk("vcpu %u: ple_window %d (%s %d)",
+ __entry->vcpu_id,
+ __entry->new,
+ __entry->grow ? "grow" : "shrink",
+ __entry->old)
+);
+
+#define trace_kvm_ple_window_grow(vcpu_id, new, old) \
+ trace_kvm_ple_window(true, vcpu_id, new, old)
+#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
+ trace_kvm_ple_window(false, vcpu_id, new, old)
+
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe11cf124a1..04fa1b8298c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO);
* Time is measured based on a counter that runs at the same rate as the TSC,
* refer SDM volume 3b section 21.6.13 & 22.1.3.
*/
-#define KVM_VMX_DEFAULT_PLE_GAP 128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_GAP 128
+#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2
+#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \
+ INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+
static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
module_param(ple_gap, int, S_IRUGO);
static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
module_param(ple_window, int, S_IRUGO);
+/* Default doubles per-vcpu window every exit. */
+static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu window every exit to ple_window. */
+static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, int, S_IRUGO);
+
+/* Default is to compute the maximum so we can never overflow. */
+static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, int, S_IRUGO);
+
extern const ulong vmx_return;
#define NR_AUTOLOAD_MSRS 8
@@ -379,6 +397,7 @@ struct nested_vmx {
* we must keep them pinned while L2 runs.
*/
struct page *apic_access_page;
+ struct page *virtual_apic_page;
u64 msr_ia32_feature_control;
struct hrtimer preemption_timer;
@@ -484,6 +503,10 @@ struct vcpu_vmx {
/* Support for a guest hypervisor (nested VMX) */
struct nested_vmx nested;
+
+ /* Dynamic PLE window. */
+ int ple_window;
+ bool ple_window_dirty;
};
enum segment_cache_field {
@@ -533,6 +556,7 @@ static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
static unsigned long shadow_read_write_fields[] = {
+ TPR_THRESHOLD,
GUEST_RIP,
GUEST_RSP,
GUEST_CR0,
@@ -743,6 +767,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+static int alloc_identity_pagetable(struct kvm *kvm);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2135,7 +2160,7 @@ static u64 guest_read_tsc(void)
* Like guest_read_tsc, but always returns L1's notion of the timestamp
* counter, even if a nested guest (L2) is currently running.
*/
-u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
+static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
u64 tsc_offset;
@@ -2330,7 +2355,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
- CPU_BASED_PAUSE_EXITING |
+ CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
/*
* We can allow some features even when not supported by the
@@ -2601,6 +2626,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
@@ -2704,7 +2731,7 @@ static void kvm_cpu_vmxon(u64 addr)
: "memory", "cc");
}
-static int hardware_enable(void *garbage)
+static int hardware_enable(void)
{
int cpu = raw_smp_processor_id();
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2768,7 +2795,7 @@ static void kvm_cpu_vmxoff(void)
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
}
-static void hardware_disable(void *garbage)
+static void hardware_disable(void)
{
if (vmm_exclusive) {
vmclear_local_loaded_vmcss();
@@ -3107,9 +3134,17 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_unrestricted_guest())
enable_unrestricted_guest = 0;
- if (!cpu_has_vmx_flexpriority())
+ if (!cpu_has_vmx_flexpriority()) {
flexpriority_enabled = 0;
+ /*
+ * set_apic_access_page_addr() is used to reload apic access
+ * page upon invalidation. No need to do anything if the
+ * processor does not have the APIC_ACCESS_ADDR VMCS field.
+ */
+ kvm_x86_ops->set_apic_access_page_addr = NULL;
+ }
+
if (!cpu_has_vmx_tpr_shadow())
kvm_x86_ops->update_cr8_intercept = NULL;
@@ -3905,7 +3940,7 @@ static int init_rmode_tss(struct kvm *kvm)
{
gfn_t fn;
u16 data = 0;
- int r, idx, ret = 0;
+ int idx, r;
idx = srcu_read_lock(&kvm->srcu);
fn = kvm->arch.tss_addr >> PAGE_SHIFT;
@@ -3927,32 +3962,32 @@ static int init_rmode_tss(struct kvm *kvm)
r = kvm_write_guest_page(kvm, fn, &data,
RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
sizeof(u8));
- if (r < 0)
- goto out;
-
- ret = 1;
out:
srcu_read_unlock(&kvm->srcu, idx);
- return ret;
+ return r;
}
static int init_rmode_identity_map(struct kvm *kvm)
{
- int i, idx, r, ret;
+ int i, idx, r = 0;
pfn_t identity_map_pfn;
u32 tmp;
if (!enable_ept)
- return 1;
- if (unlikely(!kvm->arch.ept_identity_pagetable)) {
- printk(KERN_ERR "EPT: identity-mapping pagetable "
- "haven't been allocated!\n");
return 0;
- }
+
+ /* Protect kvm->arch.ept_identity_pagetable_done. */
+ mutex_lock(&kvm->slots_lock);
+
if (likely(kvm->arch.ept_identity_pagetable_done))
- return 1;
- ret = 0;
+ goto out2;
+
identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
+
+ r = alloc_identity_pagetable(kvm);
+ if (r < 0)
+ goto out2;
+
idx = srcu_read_lock(&kvm->srcu);
r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
if (r < 0)
@@ -3967,10 +4002,13 @@ static int init_rmode_identity_map(struct kvm *kvm)
goto out;
}
kvm->arch.ept_identity_pagetable_done = true;
- ret = 1;
+
out:
srcu_read_unlock(&kvm->srcu, idx);
- return ret;
+
+out2:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
}
static void seg_setup(int seg)
@@ -3995,23 +4033,28 @@ static int alloc_apic_access_page(struct kvm *kvm)
int r = 0;
mutex_lock(&kvm->slots_lock);
- if (kvm->arch.apic_access_page)
+ if (kvm->arch.apic_access_page_done)
goto out;
kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
+ kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
kvm_userspace_mem.memory_size = PAGE_SIZE;
r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
if (r)
goto out;
- page = gfn_to_page(kvm, 0xfee00);
+ page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
if (is_error_page(page)) {
r = -EFAULT;
goto out;
}
- kvm->arch.apic_access_page = page;
+ /*
+ * Do not pin the page in memory, so that memory hot-unplug
+ * is able to migrate it.
+ */
+ put_page(page);
+ kvm->arch.apic_access_page_done = true;
out:
mutex_unlock(&kvm->slots_lock);
return r;
@@ -4019,31 +4062,20 @@ out:
static int alloc_identity_pagetable(struct kvm *kvm)
{
- struct page *page;
+ /* Called with kvm->slots_lock held. */
+
struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
- mutex_lock(&kvm->slots_lock);
- if (kvm->arch.ept_identity_pagetable)
- goto out;
+ BUG_ON(kvm->arch.ept_identity_pagetable_done);
+
kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
kvm_userspace_mem.flags = 0;
kvm_userspace_mem.guest_phys_addr =
kvm->arch.ept_identity_map_addr;
kvm_userspace_mem.memory_size = PAGE_SIZE;
r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
- if (r)
- goto out;
-
- page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
- if (is_error_page(page)) {
- r = -EFAULT;
- goto out;
- }
- kvm->arch.ept_identity_pagetable = page;
-out:
- mutex_unlock(&kvm->slots_lock);
return r;
}
@@ -4402,7 +4434,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
if (ple_gap) {
vmcs_write32(PLE_GAP, ple_gap);
- vmcs_write32(PLE_WINDOW, ple_window);
+ vmx->ple_window = ple_window;
+ vmx->ple_window_dirty = true;
}
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
@@ -4477,7 +4510,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(&vmx->vcpu, 0);
- apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_bsp(&vmx->vcpu))
apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
apic_base_msr.host_initiated = true;
@@ -4537,9 +4570,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write32(TPR_THRESHOLD, 0);
}
- if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
- vmcs_write64(APIC_ACCESS_ADDR,
- page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
+ kvm_vcpu_reload_apic_access_page(vcpu);
if (vmx_vm_has_apicv(vcpu->kvm))
memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
@@ -4729,10 +4760,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
if (ret)
return ret;
kvm->arch.tss_addr = addr;
- if (!init_rmode_tss(kvm))
- return -ENOMEM;
-
- return 0;
+ return init_rmode_tss(kvm);
}
static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
@@ -5521,17 +5549,18 @@ static u64 ept_rsvd_mask(u64 spte, int level)
for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
mask |= (1ULL << i);
- if (level > 2)
+ if (level == 4)
/* bits 7:3 reserved */
mask |= 0xf8;
- else if (level == 2) {
- if (spte & (1ULL << 7))
- /* 2MB ref, bits 20:12 reserved */
- mask |= 0x1ff000;
- else
- /* bits 6:3 reserved */
- mask |= 0x78;
- }
+ else if (spte & (1ULL << 7))
+ /*
+ * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
+ * level == 1 if the hypervisor is using the ignored bit 7.
+ */
+ mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
+ else if (level > 1)
+ /* bits 6:3 reserved */
+ mask |= 0x78;
return mask;
}
@@ -5561,7 +5590,8 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
WARN_ON(1);
}
- if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) {
+ /* bits 5:3 are _not_ reserved for large page or leaf page */
+ if ((rsvd_bits & 0x38) == 0) {
u64 ept_mem_type = (spte & 0x38) >> 3;
if (ept_mem_type == 2 || ept_mem_type == 3 ||
@@ -5676,12 +5706,85 @@ out:
return ret;
}
+static int __grow_ple_window(int val)
+{
+ if (ple_window_grow < 1)
+ return ple_window;
+
+ val = min(val, ple_window_actual_max);
+
+ if (ple_window_grow < ple_window)
+ val *= ple_window_grow;
+ else
+ val += ple_window_grow;
+
+ return val;
+}
+
+static int __shrink_ple_window(int val, int modifier, int minimum)
+{
+ if (modifier < 1)
+ return ple_window;
+
+ if (modifier < ple_window)
+ val /= modifier;
+ else
+ val -= modifier;
+
+ return max(val, minimum);
+}
+
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int old = vmx->ple_window;
+
+ vmx->ple_window = __grow_ple_window(old);
+
+ if (vmx->ple_window != old)
+ vmx->ple_window_dirty = true;
+
+ trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int old = vmx->ple_window;
+
+ vmx->ple_window = __shrink_ple_window(old,
+ ple_window_shrink, ple_window);
+
+ if (vmx->ple_window != old)
+ vmx->ple_window_dirty = true;
+
+ trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+/*
+ * ple_window_actual_max is computed to be one grow_ple_window() below
+ * ple_window_max. (See __grow_ple_window for the reason.)
+ * This prevents overflows, because ple_window_max is int.
+ * ple_window_max effectively rounded down to a multiple of ple_window_grow in
+ * this process.
+ * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
+ */
+static void update_ple_window_actual_max(void)
+{
+ ple_window_actual_max =
+ __shrink_ple_window(max(ple_window_max, ple_window),
+ ple_window_grow, INT_MIN);
+}
+
/*
* Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
* exiting, so only get here on cpu with PAUSE-Loop-Exiting.
*/
static int handle_pause(struct kvm_vcpu *vcpu)
{
+ if (ple_gap)
+ grow_ple_window(vcpu);
+
skip_emulated_instruction(vcpu);
kvm_vcpu_on_spin(vcpu);
@@ -6146,7 +6249,11 @@ static void free_nested(struct vcpu_vmx *vmx)
/* Unpin physical memory we referred to in current vmcs02 */
if (vmx->nested.apic_access_page) {
nested_release_page(vmx->nested.apic_access_page);
- vmx->nested.apic_access_page = 0;
+ vmx->nested.apic_access_page = NULL;
+ }
+ if (vmx->nested.virtual_apic_page) {
+ nested_release_page(vmx->nested.virtual_apic_page);
+ vmx->nested.virtual_apic_page = NULL;
}
nested_free_all_saved_vmcss(vmx);
@@ -6617,7 +6724,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_GLOBAL:
kvm_mmu_sync_roots(vcpu);
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
nested_vmx_succeed(vcpu);
break;
default:
@@ -6892,6 +6999,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_TASK_SWITCH:
return 1;
case EXIT_REASON_CPUID:
+ if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
+ return 0;
return 1;
case EXIT_REASON_HLT:
return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
@@ -6936,7 +7045,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_MCE_DURING_VMENTRY:
return 0;
case EXIT_REASON_TPR_BELOW_THRESHOLD:
- return 1;
+ return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
case EXIT_REASON_APIC_ACCESS:
return nested_cpu_has2(vmcs12,
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
@@ -7057,6 +7166,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+ if (is_guest_mode(vcpu) &&
+ nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
+ return;
+
if (irr == -1 || tpr < irr) {
vmcs_write32(TPR_THRESHOLD, 0);
return;
@@ -7094,6 +7209,29 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
vmx_set_msr_bitmap(vcpu);
}
+static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /*
+ * Currently we do not handle the nested case where L2 has an
+ * APIC access page of its own; that page is still pinned.
+ * Hence, we skip the case where the VCPU is in guest mode _and_
+ * L1 prepared an APIC access page for L2.
+ *
+ * For the case where L1 and L2 share the same APIC access page
+ * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
+ * in the vmcs12), this function will only update either the vmcs01
+ * or the vmcs02. If the former, the vmcs02 will be updated by
+ * prepare_vmcs02. If the latter, the vmcs01 will be updated in
+ * the next L2->L1 exit.
+ */
+ if (!is_guest_mode(vcpu) ||
+ !nested_cpu_has2(vmx->nested.current_vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+ vmcs_write64(APIC_ACCESS_ADDR, hpa);
+}
+
static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
{
u16 status;
@@ -7387,6 +7525,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vmx->emulation_required)
return;
+ if (vmx->ple_window_dirty) {
+ vmx->ple_window_dirty = false;
+ vmcs_write32(PLE_WINDOW, vmx->ple_window);
+ }
+
if (vmx->nested.sync_shadow_vmcs) {
copy_vmcs12_to_shadow(vmx);
vmx->nested.sync_shadow_vmcs = false;
@@ -7642,10 +7785,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (!kvm->arch.ept_identity_map_addr)
kvm->arch.ept_identity_map_addr =
VMX_EPT_IDENTITY_PAGETABLE_ADDR;
- err = -ENOMEM;
- if (alloc_identity_pagetable(kvm) != 0)
- goto free_vmcs;
- if (!init_rmode_identity_map(kvm))
+ err = init_rmode_identity_map(kvm);
+ if (err)
goto free_vmcs;
}
@@ -7824,6 +7965,55 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
kvm_inject_page_fault(vcpu, fault);
}
+static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ /* TODO: Also verify bits beyond physical address width are 0 */
+ if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
+ return false;
+
+ /*
+ * Translate L1 physical address to host physical
+ * address for vmcs02. Keep the page pinned, so this
+ * physical address remains valid. We keep a reference
+ * to it so we can release it later.
+ */
+ if (vmx->nested.apic_access_page) /* shouldn't happen */
+ nested_release_page(vmx->nested.apic_access_page);
+ vmx->nested.apic_access_page =
+ nested_get_page(vcpu, vmcs12->apic_access_addr);
+ }
+
+ if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
+ /* TODO: Also verify bits beyond physical address width are 0 */
+ if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr))
+ return false;
+
+ if (vmx->nested.virtual_apic_page) /* shouldn't happen */
+ nested_release_page(vmx->nested.virtual_apic_page);
+ vmx->nested.virtual_apic_page =
+ nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
+
+ /*
+ * Failing the vm entry is _not_ what the processor does
+ * but it's basically the only possibility we have.
+ * We could still enter the guest if CR8 load exits are
+ * enabled, CR8 store exits are enabled, and virtualize APIC
+ * access is disabled; in this case the processor would never
+ * use the TPR shadow and we could simply clear the bit from
+ * the execution control. But such a configuration is useless,
+ * so let's keep the code simple.
+ */
+ if (!vmx->nested.virtual_apic_page)
+ return false;
+ }
+
+ return true;
+}
+
static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
{
u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
@@ -7849,7 +8039,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
- * with L0's requirements for its guest (a.k.a. vmsc01), so we can run the L2
+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
* guest in a way that will both be appropriate to L1's requests, and our
* needs. In addition to modifying the active vmcs (which is vmcs02), this
* function also has additional necessary side-effects, like setting various
@@ -7970,16 +8160,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) {
/*
- * Translate L1 physical address to host physical
- * address for vmcs02. Keep the page pinned, so this
- * physical address remains valid. We keep a reference
- * to it so we can release it later.
- */
- if (vmx->nested.apic_access_page) /* shouldn't happen */
- nested_release_page(vmx->nested.apic_access_page);
- vmx->nested.apic_access_page =
- nested_get_page(vcpu, vmcs12->apic_access_addr);
- /*
* If translation failed, no matter: This feature asks
* to exit when accessing the given address, and if it
* can never be accessed, this feature won't do
@@ -7994,8 +8174,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
} else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
exec_control |=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- vmcs_write64(APIC_ACCESS_ADDR,
- page_to_phys(vcpu->kvm->arch.apic_access_page));
+ kvm_vcpu_reload_apic_access_page(vcpu);
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -8024,6 +8203,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
exec_control &= ~CPU_BASED_TPR_SHADOW;
exec_control |= vmcs12->cpu_based_vm_exec_control;
+
+ if (exec_control & CPU_BASED_TPR_SHADOW) {
+ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
+ page_to_phys(vmx->nested.virtual_apic_page));
+ vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+ }
+
/*
* Merging of IO and MSR bitmaps not currently supported.
* Rather, exit every time.
@@ -8185,8 +8371,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
}
- if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
- !PAGE_ALIGNED(vmcs12->apic_access_addr)) {
+ if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
/*TODO: Also verify bits beyond physical address width are 0*/
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
return 1;
@@ -8790,10 +8975,20 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
/* Unpin physical memory we referred to in vmcs02 */
if (vmx->nested.apic_access_page) {
nested_release_page(vmx->nested.apic_access_page);
- vmx->nested.apic_access_page = 0;
+ vmx->nested.apic_access_page = NULL;
+ }
+ if (vmx->nested.virtual_apic_page) {
+ nested_release_page(vmx->nested.virtual_apic_page);
+ vmx->nested.virtual_apic_page = NULL;
}
/*
+ * We are now running in L2, mmu_notifier will force to reload the
+ * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
+ */
+ kvm_vcpu_reload_apic_access_page(vcpu);
+
+ /*
* Exiting from L2 to L1, we're now back to L1 which thinks it just
* finished a VMLAUNCH or VMRESUME instruction, so we need to set the
* success or failure flag accordingly.
@@ -8846,6 +9041,12 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
return X86EMUL_CONTINUE;
}
+static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+ if (ple_gap)
+ shrink_ple_window(vcpu);
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -8890,7 +9091,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
- .fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate,
.tlb_flush = vmx_flush_tlb,
@@ -8913,6 +9113,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+ .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.vm_has_apicv = vmx_vm_has_apicv,
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.hwapic_irr_update = vmx_hwapic_irr_update,
@@ -8951,6 +9152,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.mpx_supported = vmx_mpx_supported,
.check_nested_events = vmx_check_nested_events,
+
+ .sched_in = vmx_sched_in,
};
static int __init vmx_init(void)
@@ -9065,6 +9268,8 @@ static int __init vmx_init(void)
} else
kvm_disable_tdp();
+ update_ple_window_actual_max();
+
return 0;
out7:
@@ -9098,7 +9303,7 @@ static void __exit vmx_exit(void)
free_page((unsigned long)vmx_vmread_bitmap);
#ifdef CONFIG_KEXEC
- rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
+ RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f1e22d3b286..5430e4b0af29 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
}
EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
{
unsigned int cpu = smp_processor_id();
struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -408,12 +408,14 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
}
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
-void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
{
if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
else
vcpu->arch.mmu.inject_page_fault(vcpu, fault);
+
+ return fault->nested_page_fault;
}
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -457,11 +459,12 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gfn_t ngfn, void *data, int offset, int len,
u32 access)
{
+ struct x86_exception exception;
gfn_t real_gfn;
gpa_t ngpa;
ngpa = gfn_to_gpa(ngfn);
- real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
+ real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
if (real_gfn == UNMAPPED_GVA)
return -EFAULT;
@@ -726,7 +729,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
kvm_mmu_sync_roots(vcpu);
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
return 0;
}
@@ -1518,7 +1521,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
pvclock_update_vm_gtod_copy(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
- set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
/* guest entries allowed */
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -1661,7 +1664,7 @@ static void kvmclock_update_fn(struct work_struct *work)
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
- set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
kvm_vcpu_kick(vcpu);
}
}
@@ -1670,7 +1673,7 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
{
struct kvm *kvm = v->kvm;
- set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
schedule_delayed_work(&kvm->arch.kvmclock_update_work,
KVMCLOCK_UPDATE_DELAY);
}
@@ -1723,9 +1726,10 @@ static bool valid_mtrr_type(unsigned t)
return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
}
-static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
int i;
+ u64 mask;
if (!msr_mtrr_valid(msr))
return false;
@@ -1747,14 +1751,31 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
}
/* variable MTRRs */
- return valid_mtrr_type(data & 0xff);
+ WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
+
+ mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
+ if ((msr & 1) == 0) {
+ /* MTRR base */
+ if (!valid_mtrr_type(data & 0xff))
+ return false;
+ mask |= 0xf00;
+ } else
+ /* MTRR mask */
+ mask |= 0x7ff;
+ if (data & mask) {
+ kvm_inject_gp(vcpu, 0);
+ return false;
+ }
+
+ return true;
}
+EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
- if (!mtrr_valid(vcpu, msr, data))
+ if (!kvm_mtrr_valid(vcpu, msr, data))
return 1;
if (msr == MSR_MTRRdefType) {
@@ -1805,7 +1826,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
default:
if (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+ msr < MSR_IA32_MCx_CTL(bank_num)) {
u32 offset = msr - MSR_IA32_MC0_CTL;
/* only 0 or all 1s can be written to IA32_MCi_CTL
* some Linux kernels though clear bit 10 in bank 4 to
@@ -2164,7 +2185,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return set_msr_mce(vcpu, msr, data);
/* Performance counters are not protected by a CPUID bit,
@@ -2330,7 +2351,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
break;
default:
if (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+ msr < MSR_IA32_MCx_CTL(bank_num)) {
u32 offset = msr - MSR_IA32_MC0_CTL;
data = vcpu->arch.mce_banks[offset];
break;
@@ -2419,7 +2440,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_K7_HWCR:
case MSR_VM_HSAVE_PA:
case MSR_K7_EVNTSEL0:
+ case MSR_K7_EVNTSEL1:
+ case MSR_K7_EVNTSEL2:
+ case MSR_K7_EVNTSEL3:
case MSR_K7_PERFCTR0:
+ case MSR_K7_PERFCTR1:
+ case MSR_K7_PERFCTR2:
+ case MSR_K7_PERFCTR3:
case MSR_K8_INT_PENDING_MSG:
case MSR_AMD64_NB_CFG:
case MSR_FAM10H_MMIO_CONF_BASE:
@@ -2505,7 +2532,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_MCG_CAP:
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return get_msr_mce(vcpu, msr, pdata);
case MSR_K7_CLK_CTL:
/*
@@ -2823,7 +2850,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
vcpu->arch.tsc_offset_adjustment = 0;
- set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
}
if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
@@ -4040,16 +4067,16 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
kvm_x86_ops->get_segment(vcpu, var, seg);
}
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+ struct x86_exception *exception)
{
gpa_t t_gpa;
- struct x86_exception exception;
BUG_ON(!mmu_is_nested(vcpu));
/* NPT walks are always user-walks */
access |= PFERR_USER_MASK;
- t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
+ t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
return t_gpa;
}
@@ -4906,16 +4933,18 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
}
}
-static void inject_emulated_exception(struct kvm_vcpu *vcpu)
+static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
if (ctxt->exception.vector == PF_VECTOR)
- kvm_propagate_fault(vcpu, &ctxt->exception);
- else if (ctxt->exception.error_code_valid)
+ return kvm_propagate_fault(vcpu, &ctxt->exception);
+
+ if (ctxt->exception.error_code_valid)
kvm_queue_exception_e(vcpu, ctxt->exception.vector,
ctxt->exception.error_code);
else
kvm_queue_exception(vcpu, ctxt->exception.vector);
+ return false;
}
static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
@@ -4972,7 +5001,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
++vcpu->stat.insn_emulation_fail;
trace_kvm_emulate_insn_failed(vcpu);
- if (!is_guest_mode(vcpu)) {
+ if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
@@ -5224,6 +5253,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
ctxt->interruptibility = 0;
ctxt->have_exception = false;
+ ctxt->exception.vector = -1;
ctxt->perm_ok = false;
ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
@@ -5276,8 +5306,9 @@ restart:
}
if (ctxt->have_exception) {
- inject_emulated_exception(vcpu);
r = EMULATE_DONE;
+ if (inject_emulated_exception(vcpu))
+ return r;
} else if (vcpu->arch.pio.count) {
if (!vcpu->arch.pio.in) {
/* FIXME: return into emulator if single-stepping. */
@@ -5545,7 +5576,7 @@ static void kvm_set_mmio_spte_mask(void)
* entry to generate page fault with PFER.RSV = 1.
*/
/* Mask the reserved physical address bits. */
- mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr;
+ mask = rsvd_bits(maxphyaddr, 51);
/* Bit 62 is always reserved for 32bit host. */
mask |= 0x3ull << 62;
@@ -5576,7 +5607,7 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
- set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
+ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
atomic_set(&kvm_guest_has_master_clock, 0);
spin_unlock(&kvm_lock);
}
@@ -5989,6 +6020,44 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_apic_update_tmr(vcpu, tmr);
}
+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+ ++vcpu->stat.tlb_flush;
+ kvm_x86_ops->tlb_flush(vcpu);
+}
+
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+{
+ struct page *page = NULL;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return;
+
+ if (!kvm_x86_ops->set_apic_access_page_addr)
+ return;
+
+ page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
+
+ /*
+ * Do not pin apic access page in memory, the MMU notifier
+ * will call us again if it is migrated or swapped out.
+ */
+ put_page(page);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
+
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address)
+{
+ /*
+ * The physical address of apic access page is stored in the VMCS.
+ * Update it when it becomes invalid.
+ */
+ if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
/*
* Returns 1 to let __vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
@@ -6018,7 +6087,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- kvm_x86_ops->tlb_flush(vcpu);
+ kvm_vcpu_flush_tlb(vcpu);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
@@ -6049,6 +6118,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_deliver_pmi(vcpu);
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
+ if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
+ kvm_vcpu_reload_apic_access_page(vcpu);
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -6934,7 +7005,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
kvm_rip_write(vcpu, 0);
}
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
struct kvm *kvm;
struct kvm_vcpu *vcpu;
@@ -6945,7 +7016,7 @@ int kvm_arch_hardware_enable(void *garbage)
bool stable, backwards_tsc = false;
kvm_shared_msr_cpu_online();
- ret = kvm_x86_ops->hardware_enable(garbage);
+ ret = kvm_x86_ops->hardware_enable();
if (ret != 0)
return ret;
@@ -6954,7 +7025,7 @@ int kvm_arch_hardware_enable(void *garbage)
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!stable && vcpu->cpu == smp_processor_id())
- set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
if (stable && vcpu->arch.last_host_tsc > local_tsc) {
backwards_tsc = true;
if (vcpu->arch.last_host_tsc > max_tsc)
@@ -7008,8 +7079,7 @@ int kvm_arch_hardware_enable(void *garbage)
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.tsc_offset_adjustment += delta_cyc;
vcpu->arch.last_host_tsc = local_tsc;
- set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
- &vcpu->requests);
+ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
}
/*
@@ -7026,10 +7096,10 @@ int kvm_arch_hardware_enable(void *garbage)
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
{
- kvm_x86_ops->hardware_disable(garbage);
- drop_user_return_notifiers(garbage);
+ kvm_x86_ops->hardware_disable();
+ drop_user_return_notifiers();
}
int kvm_arch_hardware_setup(void)
@@ -7146,6 +7216,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
static_key_slow_dec(&kvm_no_apic_vcpu);
}
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+ kvm_x86_ops->sched_in(vcpu, cpu);
+}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
if (type)
@@ -7237,10 +7312,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
- if (kvm->arch.apic_access_page)
- put_page(kvm->arch.apic_access_page);
- if (kvm->arch.ept_identity_pagetable)
- put_page(kvm->arch.ept_identity_pagetable);
kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
}
@@ -7643,3 +7714,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 306a1b77581f..7cb9c45a5fe0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -88,15 +88,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
vcpu->arch.mmio_gva = gva & PAGE_MASK;
vcpu->arch.access = access;
vcpu->arch.mmio_gfn = gfn;
+ vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
+}
+
+static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation;
}
/*
- * Clear the mmio cache info for the given gva,
- * specially, if gva is ~0ul, we clear all mmio cache info.
+ * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we
+ * clear all mmio cache info.
*/
+#define MMIO_GVA_ANY (~(gva_t)0)
+
static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
{
- if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
+ if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
return;
vcpu->arch.mmio_gva = 0;
@@ -104,7 +112,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
{
- if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK))
+ if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva &&
+ vcpu->arch.mmio_gva == (gva & PAGE_MASK))
return true;
return false;
@@ -112,7 +121,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{
- if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
+ if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn &&
+ vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
return true;
return false;
@@ -149,6 +159,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception);
+bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+
#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
| XSTATE_BNDREGS | XSTATE_BNDCSR)
extern u64 host_xcr0;