108 files changed, 2609 insertions, 3234 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 136f263ed47b..18a9f5ef643a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -19,7 +19,6 @@ config ARM
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HARDIRQS_SW_RESEND
-	select HAVE_AOUT
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER
@@ -213,7 +212,8 @@ config VECTORS_BASE
 	default DRAM_BASE if REMAP_VECTORS_TO_RAM
 	default 0x00000000
 	help
-	  The base address of exception vectors.
+	  The base address of exception vectors.  This must be two pages
+	  in size.
 
 config ARM_PATCH_PHYS_VIRT
 	bool "Patch physical to virtual translations at runtime" if EMBEDDED
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
index aabc02a68482..d1153c8a765a 100644
--- a/arch/arm/boot/compressed/atags_to_fdt.c
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -53,6 +53,17 @@ static const void *getprop(const void *fdt, const char *node_path,
 	return fdt_getprop(fdt, offset, property, len);
 }
 
+static uint32_t get_cell_size(const void *fdt)
+{
+	int len;
+	uint32_t cell_size = 1;
+	const uint32_t *size_len =  getprop(fdt, "/", "#size-cells", &len);
+
+	if (size_len)
+		cell_size = fdt32_to_cpu(*size_len);
+	return cell_size;
+}
+
 static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline)
 {
 	char cmdline[COMMAND_LINE_SIZE];
@@ -95,9 +106,11 @@ static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline)
 int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 {
 	struct tag *atag = atag_list;
-	uint32_t mem_reg_property[2 * NR_BANKS];
+	/* In the case of 64 bits memory size, need to reserve 2 cells for
+	 * address and size for each bank */
+	uint32_t mem_reg_property[2 * 2 * NR_BANKS];
 	int memcount = 0;
-	int ret;
+	int ret, memsize;
 
 	/* make sure we've got an aligned pointer */
 	if ((u32)atag_list & 0x3)
@@ -137,8 +150,25 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 				continue;
 			if (!atag->u.mem.size)
 				continue;
-			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start);
-			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size);
+			memsize = get_cell_size(fdt);
+
+			if (memsize == 2) {
+				/* if memsize is 2, that means that
+				 * each data needs 2 cells of 32 bits,
+				 * so the data are 64 bits */
+				uint64_t *mem_reg_prop64 =
+					(uint64_t *)mem_reg_property;
+				mem_reg_prop64[memcount++] =
+					cpu_to_fdt64(atag->u.mem.start);
+				mem_reg_prop64[memcount++] =
+					cpu_to_fdt64(atag->u.mem.size);
+			} else {
+				mem_reg_property[memcount++] =
+					cpu_to_fdt32(atag->u.mem.start);
+				mem_reg_property[memcount++] =
+					cpu_to_fdt32(atag->u.mem.size);
+			}
+
 		} else if (atag->hdr.tag == ATAG_INITRD2) {
 			uint32_t initrd_start, initrd_size;
 			initrd_start = atag->u.initrd.start;
@@ -150,8 +180,10 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 		}
 	}
 
-	if (memcount)
-		setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount);
+	if (memcount) {
+		setprop(fdt, "/memory", "reg", mem_reg_property,
+			4 * memcount * memsize);
+	}
 
 	return fdt_pack(fdt);
 }
diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi
index 5d3ed5aafc69..0af879a4eafa 100644
--- a/arch/arm/boot/dts/at91rm9200.dtsi
+++ b/arch/arm/boot/dts/at91rm9200.dtsi
@@ -35,8 +35,12 @@
 		ssc2 = &ssc2;
 	};
 	cpus {
-		cpu@0 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
 			compatible = "arm,arm920t";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi
index 84c4bef2d726..0dbdb846f90a 100644
--- a/arch/arm/boot/dts/at91sam9260.dtsi
+++ b/arch/arm/boot/dts/at91sam9260.dtsi
@@ -32,8 +32,12 @@
 		ssc0 = &ssc0;
 	};
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
@@ -340,6 +344,14 @@
 					};
 				};
 
+				i2c_gpio0 {
+					pinctrl_i2c_gpio0: i2c_gpio0-0 {
+						atmel,pins =
+							<0 23 0x0 0x3   /* PA23 gpio I2C_SDA pin */
+							 0 24 0x0 0x3>; /* PA24 gpio I2C_SCL pin */
+					};
+				};
+
 				pioA: gpio@fffff400 {
 					compatible = "atmel,at91rm9200-gpio";
 					reg = <0xfffff400 0x200>;
@@ -592,6 +604,8 @@
 		i2c-gpio,delay-us = <2>;	/* ~100 kHz */
 		#address-cells = <1>;
 		#size-cells = <0>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c_gpio0>;
 		status = "disabled";
 	};
 };
diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
index 94b58ab2cc08..fcd38f89904e 100644
--- a/arch/arm/boot/dts/at91sam9263.dtsi
+++ b/arch/arm/boot/dts/at91sam9263.dtsi
@@ -29,8 +29,12 @@
 		ssc1 = &ssc1;
 	};
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi
index bf18a735c37d..479a0622cdb8 100644
--- a/arch/arm/boot/dts/at91sam9g45.dtsi
+++ b/arch/arm/boot/dts/at91sam9g45.dtsi
@@ -35,8 +35,12 @@
 		ssc1 = &ssc1;
 	};
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi
index 8d25f889928e..a92ec78349a2 100644
--- a/arch/arm/boot/dts/at91sam9n12.dtsi
+++ b/arch/arm/boot/dts/at91sam9n12.dtsi
@@ -31,8 +31,12 @@
 		ssc0 = &ssc0;
 	};
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts
index d30e48bd1e9d..28ba79893877 100644
--- a/arch/arm/boot/dts/at91sam9n12ek.dts
+++ b/arch/arm/boot/dts/at91sam9n12ek.dts
@@ -14,11 +14,11 @@
 	compatible = "atmel,at91sam9n12ek", "atmel,at91sam9n12", "atmel,at91sam9";
 
 	chosen {
-		bootargs = "mem=128M console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=jffs2";
+		bootargs = "console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=jffs2";
 	};
 
 	memory {
-		reg = <0x20000000 0x10000000>;
+		reg = <0x20000000 0x8000000>;
 	};
 
 	clocks {
diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index 1145ac330fb7..2b2b6923d16b 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -33,8 +33,12 @@
 		ssc0 = &ssc0;
 	};
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
@@ -643,7 +647,7 @@
 			};
 
 			rtc@fffffeb0 {
-				compatible = "atmel,at91rm9200-rtc";
+				compatible = "atmel,at91sam9x5-rtc";
 				reg = <0xfffffeb0 0x40>;
 				interrupts = <1 4 7>;
 				status = "disabled";
diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
index 73fd7d0887b5..587ceef81e45 100644
--- a/arch/arm/boot/dts/imx23.dtsi
+++ b/arch/arm/boot/dts/imx23.dtsi
@@ -23,8 +23,12 @@
 	};
 
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 600f7cb51f3e..4c10a1968c0e 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -32,8 +32,12 @@
 	};
 
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi
index 5bcdf3a90bb3..62dc78126795 100644
--- a/arch/arm/boot/dts/imx6dl.dtsi
+++ b/arch/arm/boot/dts/imx6dl.dtsi
@@ -18,12 +18,14 @@
 
 		cpu@0 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <0>;
 			next-level-cache = <&L2>;
 		};
 
 		cpu@1 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <1>;
 			next-level-cache = <&L2>;
 		};
diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi
index 21e675848bd1..dc54a72a3bcd 100644
--- a/arch/arm/boot/dts/imx6q.dtsi
+++ b/arch/arm/boot/dts/imx6q.dtsi
@@ -18,6 +18,7 @@
 
 		cpu@0 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <0>;
 			next-level-cache = <&L2>;
 			operating-points = <
@@ -39,18 +40,21 @@
 
 		cpu@1 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <1>;
 			next-level-cache = <&L2>;
 		};
 
 		cpu@2 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <2>;
 			next-level-cache = <&L2>;
 		};
 
 		cpu@3 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <3>;
 			next-level-cache = <&L2>;
 		};
diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi
index 5000e0d42849..642775d7ca67 100644
--- a/arch/arm/boot/dts/sama5d3.dtsi
+++ b/arch/arm/boot/dts/sama5d3.dtsi
@@ -35,8 +35,12 @@
 		ssc1 = &ssc1;
 	};
 	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
 		cpu@0 {
+			device_type = "cpu";
 			compatible = "arm,cortex-a5";
+			reg = <0x0>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index e7ef619a70a2..06ef8b625dba 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -16,8 +16,12 @@
 	interrupt-parent = <&intc>;
 
 	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
 		cpu@0 {
+			device_type = "cpu";
 			compatible = "arm,cortex-a8";
+			reg = <0x0>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index 31fa38f8cc98..d2852547b572 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -17,8 +17,12 @@
 	interrupt-parent = <&intc>;
 
 	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
 		cpu@0 {
+			device_type = "cpu";
 			compatible = "arm,cortex-a8";
+			reg = <0x0>;
 		};
 	};
 
diff --git a/arch/arm/include/asm/a.out-core.h b/arch/arm/include/asm/a.out-core.h
deleted file mode 100644
index 92f10cb5c70c..000000000000
--- a/arch/arm/include/asm/a.out-core.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* a.out coredump register dumper
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_A_OUT_CORE_H
-#define _ASM_A_OUT_CORE_H
-
-#ifdef __KERNEL__
-
-#include <linux/user.h>
-#include <linux/elfcore.h>
-
-/*
- * fill in the user structure for an a.out core dump
- */
-static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
-{
-	struct task_struct *tsk = current;
-
-	dump->magic = CMAGIC;
-	dump->start_code = tsk->mm->start_code;
-	dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1);
-
-	dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT;
-	dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	dump->u_ssize = 0;
-
-	memset(dump->u_debugreg, 0, sizeof(dump->u_debugreg));
-
-	if (dump->start_stack < 0x04000000)
-		dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT;
-
-	dump->regs = *regs;
-	dump->u_fpvalid = dump_fpu (regs, &dump->u_fp);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_A_OUT_CORE_H */
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 7c1bfc0aea0c..accefe099182 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -80,15 +80,6 @@ static inline u32 arch_timer_get_cntfrq(void)
 	return val;
 }
 
-static inline u64 arch_counter_get_cntpct(void)
-{
-	u64 cval;
-
-	isb();
-	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
-	return cval;
-}
-
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 05ee9eebad6b..cab788045029 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -30,8 +30,8 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define lspull          lsr
+#define lspush          lsl
 #define get_byte_0      lsl #0
 #define get_byte_1	lsr #8
 #define get_byte_2	lsr #16
@@ -41,8 +41,8 @@
 #define put_byte_2	lsl #16
 #define put_byte_3	lsl #24
 #else
-#define pull            lsl
-#define push            lsr
+#define lspull          lsl
+#define lspush          lsr
 #define get_byte_0	lsr #24
 #define get_byte_1	lsr #16
 #define get_byte_2	lsr #8
@@ -212,9 +212,9 @@
 #ifdef CONFIG_SMP
 #if __LINUX_ARM_ARCH__ >= 7
 	.ifeqs "\mode","arm"
-	ALT_SMP(dmb)
+	ALT_SMP(dmb	ish)
 	.else
-	ALT_SMP(W(dmb))
+	ALT_SMP(W(dmb)	ish)
 	.endif
 #elif __LINUX_ARM_ARCH__ == 6
 	ALT_SMP(mcr	p15, 0, r0, c7, c10, 5)	@ dmb
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 8dcd9c702d90..60f15e274e6d 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -14,27 +14,27 @@
 #endif
 
 #if __LINUX_ARM_ARCH__ >= 7
-#define isb() __asm__ __volatile__ ("isb" : : : "memory")
-#define dsb() __asm__ __volatile__ ("dsb" : : : "memory")
-#define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
+#define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory")
+#define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory")
+#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
 #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6
-#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
 				    : : "r" (0) : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
+#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
 				    : : "r" (0) : "memory")
 #elif defined(CONFIG_CPU_FA526)
-#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
 				    : : "r" (0) : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
+#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #else
-#define isb() __asm__ __volatile__ ("" : : : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define isb(x) __asm__ __volatile__ ("" : : : "memory")
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
+#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #endif
 
 #ifdef CONFIG_ARCH_HAS_BARRIERS
@@ -42,7 +42,7 @@
 #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
 #define mb()		do { dsb(); outer_sync(); } while (0)
 #define rmb()		dsb()
-#define wmb()		mb()
+#define wmb()		do { dsb(st); outer_sync(); } while (0)
 #else
 #define mb()		barrier()
 #define rmb()		barrier()
@@ -54,9 +54,9 @@
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #else
-#define smp_mb()	dmb()
-#define smp_rmb()	dmb()
-#define smp_wmb()	dmb()
+#define smp_mb()	dmb(ish)
+#define smp_rmb()	smp_mb()
+#define smp_wmb()	dmb(ishst)
 #endif
 
 #define read_barrier_depends()		do { } while(0)
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index dba62cb1ad08..3392fe2d3174 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -43,15 +43,18 @@
 #define ARM_CPU_IMP_ARM			0x41
 #define ARM_CPU_IMP_INTEL		0x69
 
-#define ARM_CPU_PART_ARM1136		0xB360
-#define ARM_CPU_PART_ARM1156		0xB560
-#define ARM_CPU_PART_ARM1176		0xB760
-#define ARM_CPU_PART_ARM11MPCORE	0xB020
-#define ARM_CPU_PART_CORTEX_A8		0xC080
-#define ARM_CPU_PART_CORTEX_A9		0xC090
-#define ARM_CPU_PART_CORTEX_A5		0xC050
-#define ARM_CPU_PART_CORTEX_A15		0xC0F0
-#define ARM_CPU_PART_CORTEX_A7		0xC070
+/* ARM implemented processors */
+#define ARM_CPU_PART_ARM1136		0x4100b360
+#define ARM_CPU_PART_ARM1156		0x4100b560
+#define ARM_CPU_PART_ARM1176		0x4100b760
+#define ARM_CPU_PART_ARM11MPCORE	0x4100b020
+#define ARM_CPU_PART_CORTEX_A8		0x4100c080
+#define ARM_CPU_PART_CORTEX_A9		0x4100c090
+#define ARM_CPU_PART_CORTEX_A5		0x4100c050
+#define ARM_CPU_PART_CORTEX_A7		0x4100c070
+#define ARM_CPU_PART_CORTEX_A12		0x4100c0d0
+#define ARM_CPU_PART_CORTEX_A17		0x4100c0e0
+#define ARM_CPU_PART_CORTEX_A15		0x4100c0f0
 
 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
@@ -122,14 +125,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
 	return (read_cpuid_id() & 0xFF000000) >> 24;
 }
 
-static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
+/*
+ * The CPU part number is meaningless without referring to the CPU
+ * implementer: implementers are free to define their own part numbers
+ * which are permitted to clash with other implementer part numbers.
+ */
+static inline unsigned int __attribute_const__ read_cpuid_part(void)
+{
+	return read_cpuid_id() & 0xff00fff0;
+}
+
+static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void)
 {
 	return read_cpuid_id() & 0xFFF0;
 }
 
 static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
 {
-	return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK;
+	return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
 }
 
 static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
index 3ed37b4d93da..4f8e9e5514b1 100644
--- a/arch/arm/include/asm/dma-contiguous.h
+++ b/arch/arm/include/asm/dma-contiguous.h
@@ -2,10 +2,9 @@
 #define ASMARM_DMA_CONTIGUOUS_H
 
 #ifdef __KERNEL__
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 #include <linux/types.h>
-#include <asm-generic/dma-contiguous.h>
 
 void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size);
 
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index 38050b1c4800..f4b46d39b9cf 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -19,8 +19,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 typedef struct user_fp elf_fpregset_t;
 
-#define EM_ARM	40
-
 #define EF_ARM_EABI_MASK	0xff000000
 #define EF_ARM_EABI_UNKNOWN	0x00000000
 #define EF_ARM_EABI_VER1	0x01000000
@@ -130,4 +128,10 @@ struct mm_struct;
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
+#ifdef CONFIG_MMU
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+int arch_setup_additional_pages(struct linux_binprm *, int);
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/arch/arm/include/asm/kvm_arch_timer.h
deleted file mode 100644
index 68cb9e1dfb81..000000000000
--- a/arch/arm/include/asm/kvm_arch_timer.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __ASM_ARM_KVM_ARCH_TIMER_H
-#define __ASM_ARM_KVM_ARCH_TIMER_H
-
-#include <linux/clocksource.h>
-#include <linux/hrtimer.h>
-#include <linux/workqueue.h>
-
-struct arch_timer_kvm {
-#ifdef CONFIG_KVM_ARM_TIMER
-	/* Is the timer enabled */
-	bool			enabled;
-
-	/* Virtual offset */
-	cycle_t			cntvoff;
-#endif
-};
-
-struct arch_timer_cpu {
-#ifdef CONFIG_KVM_ARM_TIMER
-	/* Registers: control register, timer value */
-	u32				cntv_ctl;	/* Saved/restored */
-	cycle_t				cntv_cval;	/* Saved/restored */
-
-	/*
-	 * Anything that is not used directly from assembly code goes
-	 * here.
-	 */
-
-	/* Background timer used when the guest is not running */
-	struct hrtimer			timer;
-
-	/* Work queued with the above timer expires */
-	struct work_struct		expired;
-
-	/* Background timer active */
-	bool				armed;
-
-	/* Timer IRQ */
-	const struct kvm_irq_level	*irq;
-#endif
-};
-
-#ifdef CONFIG_KVM_ARM_TIMER
-int kvm_timer_hyp_init(void);
-int kvm_timer_init(struct kvm *kvm);
-void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
-void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
-void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
-void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
-#else
-static inline int kvm_timer_hyp_init(void)
-{
-	return 0;
-};
-
-static inline int kvm_timer_init(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {}
-#endif
-
-#endif
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 124623e5ef14..816db0bf2dd8 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -55,8 +55,10 @@
  * The bits we set in HCR:
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
+ * TVM:		Trap VM ops (until MMU and caches are on)
  * TSW:		Trap cache operations by set/way
  * TWI:		Trap WFI
+ * TWE:		Trap WFE
  * TIDCP:	Trap L2CTLR/L2ECTLR
  * BSU_IS:	Upgrade barriers to the inner shareable domain
  * FB:		Force broadcast of all maintainance operations
@@ -67,8 +69,7 @@
  */
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
 			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
-			HCR_SWIO | HCR_TIDCP)
-#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+			HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP)
 
 /* System Control Register (SCTLR) bits */
 #define SCTLR_TE	(1 << 30)
@@ -95,12 +96,12 @@
 #define TTBCR_IRGN1	(3 << 24)
 #define TTBCR_EPD1	(1 << 23)
 #define TTBCR_A1	(1 << 22)
-#define TTBCR_T1SZ	(3 << 16)
+#define TTBCR_T1SZ	(7 << 16)
 #define TTBCR_SH0	(3 << 12)
 #define TTBCR_ORGN0	(3 << 10)
 #define TTBCR_IRGN0	(3 << 8)
 #define TTBCR_EPD0	(1 << 7)
-#define TTBCR_T0SZ	3
+#define TTBCR_T0SZ	(7 << 0)
 #define HTCR_MASK	(TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
 
 /* Hyp System Trap Register */
@@ -135,7 +136,6 @@
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1ULL)
 #define PTRS_PER_S2_PGD	(1ULL << (KVM_PHYS_SHIFT - 30))
 #define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
-#define S2_PGD_SIZE	(1 << S2_PGD_ORDER)
 
 /* Virtualization Translation Control Register (VTCR) bits */
 #define VTCR_SH0	(3 << 12)
@@ -209,6 +209,8 @@
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT_HYP	(0x25)
 
+#define HSR_WFI_IS_WFE		(1U << 0)
+
 #define HSR_HVC_IMM_MASK	((1UL << 16) - 1)
 
 #define HSR_DABT_S1PTW		(1U << 7)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 18d50322a9e2..3a67bec72d0c 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -37,16 +37,20 @@
 #define c5_AIFSR	15	/* Auxilary Instrunction Fault Status R */
 #define c6_DFAR		16	/* Data Fault Address Register */
 #define c6_IFAR		17	/* Instruction Fault Address Register */
-#define c9_L2CTLR	18	/* Cortex A15 L2 Control Register */
-#define c10_PRRR	19	/* Primary Region Remap Register */
-#define c10_NMRR	20	/* Normal Memory Remap Register */
-#define c12_VBAR	21	/* Vector Base Address Register */
-#define c13_CID		22	/* Context ID Register */
-#define c13_TID_URW	23	/* Thread ID, User R/W */
-#define c13_TID_URO	24	/* Thread ID, User R/O */
-#define c13_TID_PRIV	25	/* Thread ID, Privileged */
-#define c14_CNTKCTL	26	/* Timer Control Register (PL1) */
-#define NR_CP15_REGS	27	/* Number of regs (incl. invalid) */
+#define c7_PAR		18	/* Physical Address Register */
+#define c7_PAR_high	19	/* PAR top 32 bits */
+#define c9_L2CTLR	20	/* Cortex A15/A7 L2 Control Register */
+#define c10_PRRR	21	/* Primary Region Remap Register */
+#define c10_NMRR	22	/* Normal Memory Remap Register */
+#define c12_VBAR	23	/* Vector Base Address Register */
+#define c13_CID		24	/* Context ID Register */
+#define c13_TID_URW	25	/* Thread ID, User R/W */
+#define c13_TID_URO	26	/* Thread ID, User R/O */
+#define c13_TID_PRIV	27	/* Thread ID, Privileged */
+#define c14_CNTKCTL	28	/* Timer Control Register (PL1) */
+#define c10_AMAIR0	29	/* Auxilary Memory Attribute Indirection Reg0 */
+#define c10_AMAIR1	30	/* Auxilary Memory Attribute Indirection Reg1 */
+#define NR_CP15_REGS	31	/* Number of regs (incl. invalid) */
 
 #define ARM_EXCEPTION_RESET	  0
 #define ARM_EXCEPTION_UNDEFINED   1
@@ -57,6 +61,24 @@
 #define ARM_EXCEPTION_FIQ	  6
 #define ARM_EXCEPTION_HVC	  7
 
+/*
+ * The rr_lo_hi macro swaps a pair of registers depending on
+ * current endianness. It is used in conjunction with ldrd and strd
+ * instructions that load/store a 64-bit value from/to memory to/from
+ * a pair of registers which are used with the mrrc and mcrr instructions.
+ * If used with the ldrd/strd instructions, the a1 parameter is the first
+ * source/destination register and the a2 parameter is the second
+ * source/destination register. Note that the ldrd/strd instructions
+ * already swap the bytes within the words correctly according to the
+ * endianness setting, but the order of the registers need to be effectively
+ * swapped when used with the mrrc/mcrr instructions.
+ */
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define rr_lo_hi(a1, a2) a2, a1
+#else
+#define rr_lo_hi(a1, a2) a1, a2
+#endif
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
@@ -72,8 +94,6 @@ extern char __kvm_hyp_vector[];
 extern char __kvm_hyp_code_start[];
 extern char __kvm_hyp_code_end[];
 
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 82b4babead2c..b9db269c6e61 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
 	return cpsr_mode > USR_MODE;;
 }
 
-static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg)
-{
-	return reg == 15;
-}
-
 static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault.hsr;
@@ -154,6 +149,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
 
 static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
 {
+	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
+{
 	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
 }
 
@@ -162,4 +162,69 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
 }
 
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.cp15[c0_MPIDR];
+}
+
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
+}
+
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
+{
+	return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT);
+}
+
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return be16_to_cpu(data & 0xffff);
+		default:
+			return be32_to_cpu(data);
+		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return le16_to_cpu(data & 0xffff);
+		default:
+			return le32_to_cpu(data);
+		}
+	}
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_be16(data & 0xffff);
+		default:
+			return cpu_to_be32(data);
+		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_le16(data & 0xffff);
+		default:
+			return cpu_to_le32(data);
+		}
+	}
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 57cb786a6203..46e5d4da1989 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,30 +19,31 @@
 #ifndef __ARM_KVM_HOST_H__
 #define __ARM_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/fpstate.h>
-#include <asm/kvm_arch_timer.h>
+#include <kvm/arm_arch_timer.h>
 
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HAVE_ONE_REG
 
-#define KVM_VCPU_MAX_FEATURES 1
-
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x)	0
-#define KVM_NR_PAGE_SIZES	1
-#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
+#define KVM_VCPU_MAX_FEATURES 2
 
-#include <asm/kvm_vgic.h>
+#include <kvm/arm_vgic.h>
 
-struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
@@ -101,6 +102,12 @@ struct kvm_vcpu_arch {
 	/* The CPU type we expose to the VM */
 	u32 midr;
 
+	/* HYP trapping configuration */
+	u32 hcr;
+
+	/* Interrupt related fields */
+	u32 irq_lines;		/* IRQ and FIQ levels */
+
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
@@ -128,9 +135,6 @@ struct kvm_vcpu_arch {
 	/* IO related fields */
 	struct kvm_decode mmio_decode;
 
-	/* Interrupt related fields */
-	u32 irq_lines;		/* IRQ and FIQ levels */
-
 	/* Cache some mmu pages needed inside spinlock regions */
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
@@ -146,19 +150,17 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 u64 kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
@@ -183,15 +185,14 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
 int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
 unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-struct kvm_one_reg;
 int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
 
-static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
-				       unsigned long long pgd_ptr,
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+				       phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
@@ -221,7 +222,18 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 	return 0;
 }
 
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+	BUG_ON(vgic->type != VGIC_V2);
+}
+
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 472ac7091003..3f688b458143 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -62,9 +62,15 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
+static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
+{
+	*pmd = new_pmd;
+	flush_pmd_entry(pmd);
+}
+
 static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 {
-	pte_val(*pte) = new_pte;
+	*pte = new_pte;
 	/*
 	 * flush_pmd_entry just takes a void pointer and cleans the necessary
 	 * cache entries, so we can reuse the function for ptes.
@@ -72,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 	flush_pmd_entry(pte);
 }
 
-static inline bool kvm_is_write_fault(unsigned long hsr)
-{
-	unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
-	if (hsr_ec == HSR_EC_IABT)
-		return false;
-	else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR))
-		return false;
-	else
-		return true;
-}
-
 static inline void kvm_clean_pgd(pgd_t *pgd)
 {
 	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
@@ -103,10 +98,51 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
 	pte_val(*pte) |= L_PTE_S2_RDWR;
 }
 
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+	pmd_val(*pmd) |= L_PMD_S2_RDWR;
+}
+
+/* Open coded p*d_addr_end that can deal with 64bit addresses */
+#define kvm_pgd_addr_end(addr, end)					\
+({	u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;		\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+
+#define kvm_pud_addr_end(addr,end)		(end)
+
+#define kvm_pmd_addr_end(addr, end)					\
+({	u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK;		\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define kvm_pud_table_empty(pudp) (0)
+
+
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
+}
+
+static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+					     unsigned long size)
 {
+	if (!vcpu_has_cache_enabled(vcpu))
+		kvm_flush_dcache_to_poc((void *)hva, size);
+	
 	/*
 	 * If we are going to insert an instruction page and the icache is
 	 * either VIPT or PIPT, there is a potential problem where the host
@@ -120,15 +156,16 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
 	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
 	 */
 	if (icache_is_pipt()) {
-		unsigned long hva = gfn_to_hva(kvm, gfn);
-		__cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
+		__cpuc_coherent_user_range(hva, hva + size);
 	} else if (!icache_is_vivt_asid_tagged()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
 	}
 }
 
-#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
+#define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
+
+void stage2_flush_vm(struct kvm *kvm);
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
index 9a83d98bf170..6bda945d31fa 100644
--- a/arch/arm/include/asm/kvm_psci.h
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
 #ifndef __ARM_KVM_PSCI_H__
 #define __ARM_KVM_PSCI_H__
 
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+#define KVM_ARM_PSCI_0_1	1
+#define KVM_ARM_PSCI_0_2	2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
deleted file mode 100644
index 343744e4809c..000000000000
--- a/arch/arm/include/asm/kvm_vgic.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __ASM_ARM_KVM_VGIC_H
-#define __ASM_ARM_KVM_VGIC_H
-
-#include <linux/kernel.h>
-#include <linux/kvm.h>
-#include <linux/irqreturn.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/irqchip/arm-gic.h>
-
-#define VGIC_NR_IRQS		128
-#define VGIC_NR_SGIS		16
-#define VGIC_NR_PPIS		16
-#define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_NR_SHARED_IRQS	(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
-#define VGIC_MAX_CPUS		KVM_MAX_VCPUS
-#define VGIC_MAX_LRS		(1 << 6)
-
-/* Sanity checks... */
-#if (VGIC_MAX_CPUS > 8)
-#error	Invalid number of CPU interfaces
-#endif
-
-#if (VGIC_NR_IRQS & 31)
-#error "VGIC_NR_IRQS must be a multiple of 32"
-#endif
-
-#if (VGIC_NR_IRQS > 1024)
-#error "VGIC_NR_IRQS must be <= 1024"
-#endif
-
-/*
- * The GIC distributor registers describing interrupts have two parts:
- * - 32 per-CPU interrupts (SGI + PPI)
- * - a bunch of shared interrupts (SPI)
- */
-struct vgic_bitmap {
-	union {
-		u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
-	} percpu[VGIC_MAX_CPUS];
-	union {
-		u32 reg[VGIC_NR_SHARED_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
-	} shared;
-};
-
-struct vgic_bytemap {
-	u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
-	u32 shared[VGIC_NR_SHARED_IRQS  / 4];
-};
-
-struct vgic_dist {
-#ifdef CONFIG_KVM_ARM_VGIC
-	spinlock_t		lock;
-	bool			ready;
-
-	/* Virtual control interface mapping */
-	void __iomem		*vctrl_base;
-
-	/* Distributor and vcpu interface mapping in the guest */
-	phys_addr_t		vgic_dist_base;
-	phys_addr_t		vgic_cpu_base;
-
-	/* Distributor enabled */
-	u32			enabled;
-
-	/* Interrupt enabled (one bit per IRQ) */
-	struct vgic_bitmap	irq_enabled;
-
-	/* Interrupt 'pin' level */
-	struct vgic_bitmap	irq_state;
-
-	/* Level-triggered interrupt in progress */
-	struct vgic_bitmap	irq_active;
-
-	/* Interrupt priority. Not used yet. */
-	struct vgic_bytemap	irq_priority;
-
-	/* Level/edge triggered */
-	struct vgic_bitmap	irq_cfg;
-
-	/* Source CPU per SGI and target CPU */
-	u8			irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
-
-	/* Target CPU for each IRQ */
-	u8			irq_spi_cpu[VGIC_NR_SHARED_IRQS];
-	struct vgic_bitmap	irq_spi_target[VGIC_MAX_CPUS];
-
-	/* Bitmap indicating which CPU has something pending */
-	unsigned long		irq_pending_on_cpu;
-#endif
-};
-
-struct vgic_cpu {
-#ifdef CONFIG_KVM_ARM_VGIC
-	/* per IRQ to LR mapping */
-	u8		vgic_irq_lr_map[VGIC_NR_IRQS];
-
-	/* Pending interrupts on this VCPU */
-	DECLARE_BITMAP(	pending_percpu, VGIC_NR_PRIVATE_IRQS);
-	DECLARE_BITMAP(	pending_shared, VGIC_NR_SHARED_IRQS);
-
-	/* Bitmap of used/free list registers */
-	DECLARE_BITMAP(	lr_used, VGIC_MAX_LRS);
-
-	/* Number of list registers on this CPU */
-	int		nr_lr;
-
-	/* CPU vif control registers for world switch */
-	u32		vgic_hcr;
-	u32		vgic_vmcr;
-	u32		vgic_misr;	/* Saved only */
-	u32		vgic_eisr[2];	/* Saved only */
-	u32		vgic_elrsr[2];	/* Saved only */
-	u32		vgic_apr;
-	u32		vgic_lr[VGIC_MAX_LRS];
-#endif
-};
-
-#define LR_EMPTY	0xff
-
-struct kvm;
-struct kvm_vcpu;
-struct kvm_run;
-struct kvm_exit_mmio;
-
-#ifdef CONFIG_KVM_ARM_VGIC
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
-int kvm_vgic_hyp_init(void);
-int kvm_vgic_init(struct kvm *kvm);
-int kvm_vgic_create(struct kvm *kvm);
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
-void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
-void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
-			bool level);
-int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		      struct kvm_exit_mmio *mmio);
-
-#define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
-#define vgic_initialized(k)	((k)->arch.vgic.ready)
-
-#else
-static inline int kvm_vgic_hyp_init(void)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_init(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_create(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
-
-static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
-				      unsigned int irq_num, bool level)
-{
-	return 0;
-}
-
-static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-				    struct kvm_exit_mmio *mmio)
-{
-	return false;
-}
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline bool vgic_initialized(struct kvm *kvm)
-{
-	return true;
-}
-#endif
-
-#endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 57870ab313c5..21b458e6b0b8 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -157,6 +157,7 @@
  */
 #define __PV_BITS_31_24	0x81000000
 
+extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
 extern unsigned long __pv_phys_offset;
 #define PHYS_OFFSET __pv_phys_offset
 
@@ -233,6 +234,21 @@ static inline void *phys_to_virt(phys_addr_t x)
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
 
 /*
+ * These are for systems that have a hardware interconnect supported alias of
+ * physical memory for idmap purposes.  Most cases should leave these
+ * untouched.
+ */
+static inline phys_addr_t __virt_to_idmap(unsigned long x)
+{
+	if (arch_virt_to_idmap)
+		return arch_virt_to_idmap(x);
+	else
+		return __virt_to_phys(x);
+}
+
+#define virt_to_idmap(x)	__virt_to_idmap((unsigned long)(x))
+
+/*
  * Virtual <-> DMA view memory address translations
  * Again, these are *only* valid on the kernel direct mapped RAM
  * memory.  Use of these is *deprecated* (and that doesn't mean
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index e3d55547e755..6f18da09668b 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -6,8 +6,11 @@
 typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
 	atomic64_t	id;
+#else
+	int		switch_pending;
 #endif
 	unsigned int	vmalloc_seq;
+	unsigned long	sigpage;
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index a7b85e0d0cc1..e0b10f19d679 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -27,7 +27,15 @@ void __check_vmalloc_seq(struct mm_struct *mm);
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
 #define init_new_context(tsk,mm)	({ atomic64_set(&mm->context.id, 0); 0; })
 
-DECLARE_PER_CPU(atomic64_t, active_asids);
+#ifdef CONFIG_ARM_ERRATA_798181
+void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+			     cpumask_t *mask);
+#else  /* !CONFIG_ARM_ERRATA_798181 */
+static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+					   cpumask_t *mask)
+{
+}
+#endif /* CONFIG_ARM_ERRATA_798181 */
 
 #else	/* !CONFIG_CPU_HAS_ASID */
 
@@ -47,7 +55,7 @@ static inline void check_and_switch_context(struct mm_struct *mm,
 		 * on non-ASID CPUs, the old mm will remain valid until the
 		 * finish_arch_post_lock_switch() call.
 		 */
-		set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
+		mm->context.switch_pending = 1;
 	else
 		cpu_switch_mm(mm->pgd, mm);
 }
@@ -56,9 +64,21 @@ static inline void check_and_switch_context(struct mm_struct *mm,
 	finish_arch_post_lock_switch
 static inline void finish_arch_post_lock_switch(void)
 {
-	if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
-		struct mm_struct *mm = current->mm;
-		cpu_switch_mm(mm->pgd, mm);
+	struct mm_struct *mm = current->mm;
+
+	if (mm && mm->context.switch_pending) {
+		/*
+		 * Preemption must be disabled during cpu_switch_mm() as we
+		 * have some stateful cache flush implementations. Check
+		 * switch_pending again in case we were preempted and the
+		 * switch to this mm was already done.
+		 */
+		preempt_disable();
+		if (mm->context.switch_pending) {
+			mm->context.switch_pending = 0;
+			cpu_switch_mm(mm->pgd, mm);
+		}
+		preempt_enable_no_resched();
 	}
 }
 
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 812a4944e783..cbdc7a21f869 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -142,7 +142,9 @@ extern void __cpu_copy_user_highpage(struct page *to, struct page *from,
 #define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
 extern void copy_page(void *to, const void *from);
 
+#ifdef CONFIG_KUSER_HELPERS
 #define __HAVE_ARCH_GATE_AREA 1
+#endif
 
 #ifdef CONFIG_ARM_LPAE
 #include <asm/pgtable-3level-types.h>
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 86b8fe398b95..ad52938bb264 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -113,6 +113,8 @@
 #define L_PTE_S2_RDONLY		 (_AT(pteval_t, 1) << 6)   /* HAP[1]   */
 #define L_PTE_S2_RDWR		 (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 
+#define L_PMD_S2_RDWR		 (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+
 /*
  * Hyp-mode PL2 PTE definitions for LPAE.
  */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 9bcd262a9008..8afc60c55e82 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -97,7 +97,7 @@ extern pgprot_t		pgprot_s2_device;
 #define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_HYP)
 #define PAGE_HYP_DEVICE		_MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
 #define PAGE_S2			_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_USER | L_PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR)
 
 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 06e7d509eaac..413f3876341c 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -54,7 +54,6 @@ struct thread_struct {
 
 #define start_thread(regs,pc,sp)					\
 ({									\
-	unsigned long *stack = (unsigned long *)sp;			\
 	memset(regs->uregs, 0, sizeof(regs->uregs));			\
 	if (current->personality & ADDR_LIMIT_32BIT)			\
 		regs->ARM_cpsr = USR_MODE;				\
@@ -65,9 +64,6 @@ struct thread_struct {
 	regs->ARM_cpsr |= PSR_ENDSTATE;					\
 	regs->ARM_pc = pc & ~1;		/* pc */			\
 	regs->ARM_sp = sp;		/* sp */			\
-	regs->ARM_r2 = stack[2];	/* r2 (envp) */			\
-	regs->ARM_r1 = stack[1];	/* r1 (argv) */			\
-	regs->ARM_r0 = stack[0];	/* r0 (argc) */			\
 	nommu_start_thread(regs);					\
 })
 
diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
index 18d169373612..1a292d8be988 100644
--- a/arch/arm/include/asm/smp_scu.h
+++ b/arch/arm/include/asm/smp_scu.h
@@ -11,7 +11,7 @@
 
 static inline bool scu_a9_has_base(void)
 {
-	return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+	return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
 }
 
 static inline unsigned long scu_a9_get_base(void)
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 1995d1a84060..f00b5692cd9d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -156,7 +156,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	20
-#define TIF_SWITCH_MM		22	/* deferred switch_mm */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index bdf2b8458ec1..aa9b4ac3fdf6 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -43,6 +43,7 @@ struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
 	struct vm_area_struct	*vma;
+	unsigned long		start, end;
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
@@ -107,10 +108,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = fullmm;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->vma = NULL;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 47bcb2d254af..18d76fd5a2af 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -1,7 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += a.out.h
 header-y += byteorder.h
 header-y += fcntl.h
 header-y += hwcap.h
diff --git a/arch/arm/include/uapi/asm/a.out.h b/arch/arm/include/uapi/asm/a.out.h
deleted file mode 100644
index 083894b2e3bc..000000000000
--- a/arch/arm/include/uapi/asm/a.out.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __ARM_A_OUT_H__
-#define __ARM_A_OUT_H__
-
-#include <linux/personality.h>
-#include <linux/types.h>
-
-struct exec
-{
-  __u32 a_info;		/* Use macros N_MAGIC, etc for access */
-  __u32 a_text;		/* length of text, in bytes */
-  __u32 a_data;		/* length of data, in bytes */
-  __u32 a_bss;		/* length of uninitialized data area for file, in bytes */
-  __u32 a_syms;		/* length of symbol table data in file, in bytes */
-  __u32 a_entry;	/* start address */
-  __u32 a_trsize;	/* length of relocation info for text, in bytes */
-  __u32 a_drsize;	/* length of relocation info for data, in bytes */
-};
-
-/*
- * This is always the same
- */
-#define N_TXTADDR(a)	(0x00008000)
-
-#define N_TRSIZE(a)	((a).a_trsize)
-#define N_DRSIZE(a)	((a).a_drsize)
-#define N_SYMSIZE(a)	((a).a_syms)
-
-#define M_ARM 103
-
-#ifndef LIBRARY_START_TEXT
-#define LIBRARY_START_TEXT	(0x00c00000)
-#endif
-
-#endif /* __A_OUT_GNU_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c1ee007523d7..09ee408c1a67 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -20,10 +20,12 @@
 #define __ARM_KVM_H__
 
 #include <linux/types.h>
+#include <linux/psci.h>
 #include <asm/ptrace.h>
 
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -63,7 +65,8 @@ struct kvm_regs {
 
 /* Supported Processor Types */
 #define KVM_ARM_TARGET_CORTEX_A15	0
-#define KVM_ARM_NUM_TARGETS		1
+#define KVM_ARM_TARGET_CORTEX_A7	1
+#define KVM_ARM_NUM_TARGETS		2
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
@@ -82,6 +85,7 @@ struct kvm_regs {
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
 
 struct kvm_vcpu_init {
 	__u32 target;
@@ -118,6 +122,26 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_32_CRN_MASK		0x0000000000007800
 #define KVM_REG_ARM_32_CRN_SHIFT	11
 
+#define ARM_CP15_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
+
+#define __ARM_CP15_REG(op1,crn,crm,op2) \
+	(KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
+	ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
+	ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
+	ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
+	ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
+
+#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
+
+#define __ARM_CP15_REG64(op1,crm) \
+	(__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
+#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14) 
+#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14) 
+
 /* Normal registers are mapped as coprocessor 16. */
 #define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
 #define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / 4)
@@ -142,6 +166,15 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_VFP_FPINST		0x1009
 #define KVM_REG_ARM_VFP_FPINST2		0x100A
 
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
@@ -172,9 +205,9 @@ struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
 
-#define KVM_PSCI_RET_SUCCESS		0
-#define KVM_PSCI_RET_NI			((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
 
 #endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index ee68cce6b48e..776d9186e9c1 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -168,6 +168,7 @@ int main(void)
   DEFINE(VCPU_FIQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
   DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
   DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
+  DEFINE(VCPU_HCR,		offsetof(struct kvm_vcpu, arch.hcr));
   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
   DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.fault.hsr));
   DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.fault.hxfar));
@@ -175,13 +176,13 @@ int main(void)
   DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
 #ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
-  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
-  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
-  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
-  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
-  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
-  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
 #ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 5859c8bc727c..a44e7d11ab02 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -212,7 +212,7 @@ struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
 	}
 	if (!mdesc_best) {
 		const char *prop;
-		long size;
+		int size;
 
 		early_print("\nError: unrecognized/unsupported "
 			    "device tree compatible list:\n[ ");
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 582b405befc5..d43c7e54ec6c 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -741,6 +741,18 @@ ENDPROC(__switch_to)
 #endif
 	.endm
 
+	.macro	kuser_pad, sym, size
+	.if	(. - \sym) & 3
+	.rept	4 - (. - \sym) & 3
+	.byte	0
+	.endr
+	.endif
+	.rept	(\size - (. - \sym)) / 4
+	.word	0xe7fddef1
+	.endr
+	.endm
+
+#ifdef CONFIG_KUSER_HELPERS
 	.align	5
 	.globl	__kuser_helper_start
 __kuser_helper_start:
@@ -831,18 +843,13 @@ kuser_cmpxchg64_fixup:
 #error "incoherent kernel configuration"
 #endif
 
-	/* pad to next slot */
-	.rept	(16 - (. - __kuser_cmpxchg64)/4)
-	.word	0
-	.endr
-
-	.align	5
+	kuser_pad __kuser_cmpxchg64, 64
 
 __kuser_memory_barrier:				@ 0xffff0fa0
 	smp_dmb	arm
 	usr_ret	lr
 
-	.align	5
+	kuser_pad __kuser_memory_barrier, 32
 
 __kuser_cmpxchg:				@ 0xffff0fc0
 
@@ -915,13 +922,14 @@ kuser_cmpxchg32_fixup:
 
 #endif
 
-	.align	5
+	kuser_pad __kuser_cmpxchg, 32
 
 __kuser_get_tls:				@ 0xffff0fe0
 	ldr	r0, [pc, #(16 - 8)]	@ read TLS, set in kuser_get_tls_init
 	usr_ret	lr
 	mrc	p15, 0, r0, c13, c0, 3	@ 0xffff0fe8 hardware TLS code
-	.rep	4
+	kuser_pad __kuser_get_tls, 16
+	.rep	3
 	.word	0			@ 0xffff0ff0 software TLS value, then
 	.endr				@ pad up to __kuser_helper_version
 
@@ -931,14 +939,16 @@ __kuser_helper_version:				@ 0xffff0ffc
 	.globl	__kuser_helper_end
 __kuser_helper_end:
 
+#endif
+
  THUMB(	.thumb	)
 
 /*
  * Vector stubs.
  *
- * This code is copied to 0xffff0200 so we can use branches in the
- * vectors, rather than ldr's.  Note that this code must not
- * exceed 0x300 bytes.
+ * This code is copied to 0xffff1000 so we can use branches in the
+ * vectors, rather than ldr's.  Note that this code must not exceed
+ * a page size.
  *
  * Common stub entry macro:
  *   Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
@@ -985,8 +995,17 @@ ENDPROC(vector_\name)
 1:
 	.endm
 
-	.globl	__stubs_start
+	.section .stubs, "ax", %progbits
 __stubs_start:
+	@ This must be the first word
+	.word	vector_swi
+
+vector_rst:
+ ARM(	swi	SYS_ERROR0	)
+ THUMB(	svc	#0		)
+ THUMB(	nop			)
+	b	vector_und
+
 /*
  * Interrupt dispatcher
  */
@@ -1081,6 +1100,16 @@ __stubs_start:
 	.align	5
 
 /*=============================================================================
+ * Address exception handler
+ *-----------------------------------------------------------------------------
+ * These aren't too critical.
+ * (they're not supposed to happen, and won't happen in 32-bit data mode).
+ */
+
+vector_addrexcptn:
+	b	vector_addrexcptn
+
+/*=============================================================================
  * Undefined FIQs
  *-----------------------------------------------------------------------------
  * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC
@@ -1093,45 +1122,19 @@ __stubs_start:
 vector_fiq:
 	subs	pc, lr, #4
 
-/*=============================================================================
- * Address exception handler
- *-----------------------------------------------------------------------------
- * These aren't too critical.
- * (they're not supposed to happen, and won't happen in 32-bit data mode).
- */
-
-vector_addrexcptn:
-	b	vector_addrexcptn
-
-/*
- * We group all the following data together to optimise
- * for CPUs with separate I & D caches.
- */
-	.align	5
-
-.LCvswi:
-	.word	vector_swi
-
-	.globl	__stubs_end
-__stubs_end:
-
-	.equ	stubs_offset, __vectors_start + 0x200 - __stubs_start
+	.globl	vector_fiq_offset
+	.equ	vector_fiq_offset, vector_fiq
 
-	.globl	__vectors_start
+	.section .vectors, "ax", %progbits
 __vectors_start:
- ARM(	swi	SYS_ERROR0	)
- THUMB(	svc	#0		)
- THUMB(	nop			)
-	W(b)	vector_und + stubs_offset
-	W(ldr)	pc, .LCvswi + stubs_offset
-	W(b)	vector_pabt + stubs_offset
-	W(b)	vector_dabt + stubs_offset
-	W(b)	vector_addrexcptn + stubs_offset
-	W(b)	vector_irq + stubs_offset
-	W(b)	vector_fiq + stubs_offset
-
-	.globl	__vectors_end
-__vectors_end:
+	W(b)	vector_rst
+	W(b)	vector_und
+	W(ldr)	pc, __vectors_start + 0x1000
+	W(b)	vector_pabt
+	W(b)	vector_dabt
+	W(b)	vector_addrexcptn
+	W(b)	vector_irq
+	W(b)	vector_fiq
 
 	.data
 
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index 2adda11f712f..25442f451148 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -47,6 +47,11 @@
 #include <asm/irq.h>
 #include <asm/traps.h>
 
+#define FIQ_OFFSET ({					\
+		extern void *vector_fiq_offset;		\
+		(unsigned)&vector_fiq_offset;		\
+	})
+
 static unsigned long no_fiq_insn;
 
 /* Default reacquire function
@@ -80,13 +85,16 @@ int show_fiq_list(struct seq_file *p, int prec)
 void set_fiq_handler(void *start, unsigned int length)
 {
 #if defined(CONFIG_CPU_USE_DOMAINS)
-	memcpy((void *)0xffff001c, start, length);
+	void *base = (void *)0xffff0000;
 #else
-	memcpy(vectors_page + 0x1c, start, length);
+	void *base = vectors_page;
 #endif
-	flush_icache_range(0xffff001c, 0xffff001c + length);
+	unsigned offset = FIQ_OFFSET;
+
+	memcpy(base + offset, start, length);
+	flush_icache_range(0xffff0000 + offset, 0xffff0000 + offset + length);
 	if (!vectors_high())
-		flush_icache_range(0x1c, 0x1c + length);
+		flush_icache_range(offset, offset + length);
 }
 
 int claim_fiq(struct fiq_handler *f)
@@ -144,6 +152,7 @@ EXPORT_SYMBOL(disable_fiq);
 
 void __init init_FIQ(int start)
 {
-	no_fiq_insn = *(unsigned long *)0xffff001c;
+	unsigned offset = FIQ_OFFSET;
+	no_fiq_insn = *(unsigned long *)(0xffff0000 + offset);
 	fiq_start = start;
 }
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 8c3094d0f7b7..e19edc6f2d15 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -53,7 +53,12 @@ armpmu_map_cache_event(const unsigned (*cache_map)
 static int
 armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 {
-	int mapping = (*event_map)[config];
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -ENOENT;
+
+	mapping = (*event_map)[config];
 	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 }
 
@@ -253,6 +258,9 @@ validate_event(struct pmu_hw_events *hw_events,
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct pmu *leader_pmu = event->group_leader->pmu;
 
+	if (is_software_event(event))
+		return 1;
+
 	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
 		return 1;
 
@@ -569,6 +577,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 		return;
 	}
 
+	perf_callchain_store(entry, regs->ARM_pc);
 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
 	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 1f2740e3dbc0..0e9609657c79 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -201,49 +201,39 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
 static int probe_current_pmu(struct arm_pmu *pmu)
 {
 	int cpu = get_cpu();
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
 	int ret = -ENODEV;
 
 	pr_info("probing PMU on CPU %d\n", cpu);
 
+	switch (read_cpuid_part()) {
 	/* ARM Ltd CPUs. */
-	if (implementor == ARM_CPU_IMP_ARM) {
-		switch (part_number) {
-		case ARM_CPU_PART_ARM1136:
-		case ARM_CPU_PART_ARM1156:
-		case ARM_CPU_PART_ARM1176:
-			ret = armv6pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_ARM11MPCORE:
-			ret = armv6mpcore_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A8:
-			ret = armv7_a8_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A9:
-			ret = armv7_a9_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A5:
-			ret = armv7_a5_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A15:
-			ret = armv7_a15_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A7:
-			ret = armv7_a7_pmu_init(pmu);
-			break;
-		}
-	/* Intel CPUs [xscale]. */
-	} else if (implementor == ARM_CPU_IMP_INTEL) {
-		switch (xscale_cpu_arch_version()) {
-		case ARM_CPU_XSCALE_ARCH_V1:
-			ret = xscale1pmu_init(pmu);
-			break;
-		case ARM_CPU_XSCALE_ARCH_V2:
-			ret = xscale2pmu_init(pmu);
-			break;
+	case ARM_CPU_PART_ARM1136:
+	case ARM_CPU_PART_ARM1156:
+	case ARM_CPU_PART_ARM1176:
+		ret = armv6pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_ARM11MPCORE:
+		ret = armv6mpcore_pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_CORTEX_A8:
+		ret = armv7_a8_pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_CORTEX_A9:
+		ret = armv7_a9_pmu_init(pmu);
+		break;
+
+	default:
+		if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
+			switch (xscale_cpu_arch_version()) {
+			case ARM_CPU_XSCALE_ARCH_V1:
+				ret = xscale1pmu_init(pmu);
+				break;
+			case ARM_CPU_XSCALE_ARCH_V2:
+				ret = xscale2pmu_init(pmu);
+				break;
+			}
 		}
+		break;
 	}
 
 	put_cpu();
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 6e8931ccf13e..5bc2615268d4 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -433,10 +433,11 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 }
 
 #ifdef CONFIG_MMU
+#ifdef CONFIG_KUSER_HELPERS
 /*
  * The vectors page is always readable from user space for the
- * atomic helpers and the signal restart code. Insert it into the
- * gate_vma so that it is visible through ptrace and /proc/<pid>/mem.
+ * atomic helpers. Insert it into the gate_vma so that it is visible
+ * through ptrace and /proc/<pid>/mem.
  */
 static struct vm_area_struct gate_vma = {
 	.vm_start	= 0xffff0000,
@@ -465,9 +466,48 @@ int in_gate_area_no_mm(unsigned long addr)
 {
 	return in_gate_area(NULL, addr);
 }
+#define is_gate_vma(vma)	((vma) = &gate_vma)
+#else
+#define is_gate_vma(vma)	0
+#endif
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
-	return (vma == &gate_vma) ? "[vectors]" : NULL;
+	return is_gate_vma(vma) ? "[vectors]" :
+		(vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ?
+		 "[sigpage]" : NULL;
+}
+
+static struct page *signal_page;
+extern struct page *get_signal_page(void);
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret;
+
+	if (!signal_page)
+		signal_page = get_signal_page();
+	if (!signal_page)
+		return -ENOMEM;
+
+	down_write(&mm->mmap_sem);
+	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+	if (IS_ERR_VALUE(addr)) {
+		ret = addr;
+		goto up_fail;
+	}
+
+	ret = install_special_mapping(mm, addr, PAGE_SIZE,
+		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+		&signal_page);
+
+	if (ret == 0)
+		mm->context.sigpage = addr;
+
+ up_fail:
+	up_write(&mm->mmap_sem);
+	return ret;
 }
 #endif
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 296786bdbb73..5a42c12767af 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/errno.h>
+#include <linux/random.h>
 #include <linux/signal.h>
 #include <linux/personality.h>
 #include <linux/uaccess.h>
@@ -15,12 +16,11 @@
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
+#include <asm/traps.h>
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
 #include <asm/vfp.h>
 
-#include "signal.h"
-
 /*
  * For ARM syscalls, we encode the syscall number into the instruction.
  */
@@ -40,11 +40,13 @@
 #define SWI_THUMB_SIGRETURN	(0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE))
 #define SWI_THUMB_RT_SIGRETURN	(0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE))
 
-const unsigned long sigreturn_codes[7] = {
+static const unsigned long sigreturn_codes[7] = {
 	MOV_R7_NR_SIGRETURN,    SWI_SYS_SIGRETURN,    SWI_THUMB_SIGRETURN,
 	MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN,
 };
 
+static unsigned long signal_return_offset;
+
 #ifdef CONFIG_CRUNCH
 static int preserve_crunch_context(struct crunch_sigframe __user *frame)
 {
@@ -396,13 +398,19 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
 		    __put_user(sigreturn_codes[idx+1], rc+1))
 			return 1;
 
+#ifdef CONFIG_MMU
 		if (cpsr & MODE32_BIT) {
+			struct mm_struct *mm = current->mm;
 			/*
-			 * 32-bit code can use the new high-page
-			 * signal return code support.
+			 * 32-bit code can use the signal return page
+			 * except when the MPU has protected the vectors
+			 * page from PL0
 			 */
-			retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb;
-		} else {
+			retcode = mm->context.sigpage + signal_return_offset +
+				  (idx << 2) + thumb;
+		} else
+#endif
+		{
 			/*
 			 * Ensure that the instruction cache sees
 			 * the return code written onto the stack.
@@ -603,3 +611,33 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 	} while (thread_flags & _TIF_WORK_MASK);
 	return 0;
 }
+
+struct page *get_signal_page(void)
+{
+	unsigned long ptr;
+	unsigned offset;
+	struct page *page;
+	void *addr;
+
+	page = alloc_pages(GFP_KERNEL, 0);
+
+	if (!page)
+		return NULL;
+
+	addr = page_address(page);
+
+	/* Give the signal return code some randomness */
+	offset = 0x200 + (get_random_int() & 0x7fc);
+	signal_return_offset = offset;
+
+	/*
+	 * Copy signal return handlers into the vector page, and
+	 * set sigreturn to be a pointer to these.
+	 */
+	memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes));
+
+	ptr = (unsigned long)addr + offset;
+	flush_icache_range(ptr, ptr + sizeof(sigreturn_codes));
+
+	return page;
+}
diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h
deleted file mode 100644
index 5ff067b7c752..000000000000
--- a/arch/arm/kernel/signal.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- *  linux/arch/arm/kernel/signal.h
- *
- *  Copyright (C) 2005-2009 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define KERN_SIGRETURN_CODE	(CONFIG_VECTORS_BASE + 0x00000500)
-
-extern const unsigned long sigreturn_codes[7];
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5919eb451bb9..5a8ad2c8eda0 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -87,8 +87,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
-	secondary_data.pgdir = virt_to_phys(idmap_pgd);
-	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+	secondary_data.pgdir = virt_to_idmap(idmap_pgd);
+	secondary_data.swapper_pg_dir = virt_to_idmap(swapper_pg_dir);
 	__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
 
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 9a52a07aa40e..a98b62dca2fa 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -103,7 +103,7 @@ static void broadcast_tlb_a15_erratum(void)
 
 static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
 {
-	int cpu, this_cpu;
+	int this_cpu;
 	cpumask_t mask = { CPU_BITS_NONE };
 
 	if (!erratum_a15_798181())
@@ -111,21 +111,7 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
 
 	dummy_flush_tlb_a15_erratum();
 	this_cpu = get_cpu();
-	for_each_online_cpu(cpu) {
-		if (cpu == this_cpu)
-			continue;
-		/*
-		 * We only need to send an IPI if the other CPUs are running
-		 * the same ASID as the one being invalidated. There is no
-		 * need for locking around the active_asids check since the
-		 * switch_mm() function has at least one dmb() (as required by
-		 * this workaround) in case a context switch happens on
-		 * another CPU after the condition below.
-		 */
-		if (atomic64_read(&mm->context.id) ==
-		    atomic64_read(&per_cpu(active_asids, cpu)))
-			cpumask_set_cpu(cpu, &mask);
-	}
+	a15_erratum_get_cpumask(this_cpu, mm, &mask);
 	smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1);
 	put_cpu();
 }
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 90525d9d290b..f6fd1d4398c6 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -120,7 +120,7 @@ static int twd_rate_change(struct notifier_block *nb,
 	 * changing cpu.
 	 */
 	if (flags == POST_RATE_CHANGE)
-		smp_call_function(twd_update_frequency,
+		on_each_cpu(twd_update_frequency,
 				  (void *)&cnd->new_rate, 1);
 
 	return NOTIFY_OK;
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 18b32e8e4497..6b9567e19bdc 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -35,8 +35,6 @@
 #include <asm/tls.h>
 #include <asm/system_misc.h>
 
-#include "signal.h"
-
 static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
 
 void *vectors_page;
@@ -800,47 +798,55 @@ void __init trap_init(void)
 	return;
 }
 
-static void __init kuser_get_tls_init(unsigned long vectors)
+#ifdef CONFIG_KUSER_HELPERS
+static void __init kuser_init(void *vectors)
 {
+	extern char __kuser_helper_start[], __kuser_helper_end[];
+	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+
+	memcpy(vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
+
 	/*
 	 * vectors + 0xfe0 = __kuser_get_tls
 	 * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8
 	 */
 	if (tls_emu || has_tls_reg)
-		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
+		memcpy(vectors + 0xfe0, vectors + 0xfe8, 4);
 }
+#else
+static void __init kuser_init(void *vectors)
+{
+}
+#endif
 
 void __init early_trap_init(void *vectors_base)
 {
 	unsigned long vectors = (unsigned long)vectors_base;
 	extern char __stubs_start[], __stubs_end[];
 	extern char __vectors_start[], __vectors_end[];
-	extern char __kuser_helper_start[], __kuser_helper_end[];
-	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+	unsigned i;
 
 	vectors_page = vectors_base;
 
 	/*
+	 * Poison the vectors page with an undefined instruction.  This
+	 * instruction is chosen to be undefined for both ARM and Thumb
+	 * ISAs.  The Thumb version is an undefined instruction with a
+	 * branch back to the undefined instruction.
+	 */
+	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+		((u32 *)vectors_base)[i] = 0xe7fddef1;
+
+	/*
 	 * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
 	 * into the vector page, mapped at 0xffff0000, and ensure these
 	 * are visible to the instruction stream.
 	 */
 	memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
-	memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start);
-	memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
+	memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start);
 
-	/*
-	 * Do processor specific fixups for the kuser helpers
-	 */
-	kuser_get_tls_init(vectors);
-
-	/*
-	 * Copy signal return handlers into the vector page, and
-	 * set sigreturn to be a pointer to these.
-	 */
-	memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE),
-	       sigreturn_codes, sizeof(sigreturn_codes));
+	kuser_init(vectors_base);
 
-	flush_icache_range(vectors, vectors + PAGE_SIZE);
+	flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
 	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
 }
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index a871b8e00fca..33f2ea32f5a0 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -152,6 +152,23 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	__init_begin = .;
 #endif
+	/*
+	 * The vectors and stubs are relocatable code, and the
+	 * only thing that matters is their relative offsets
+	 */
+	__vectors_start = .;
+	.vectors 0 : AT(__vectors_start) {
+		*(.vectors)
+	}
+	. = __vectors_start + SIZEOF(.vectors);
+	__vectors_end = .;
+
+	__stubs_start = .;
+	.stubs 0x1000 : AT(__stubs_start) {
+		*(.stubs)
+	}
+	. = __stubs_start + SIZEOF(.stubs);
+	__stubs_end = .;
 
 	INIT_TEXT_SECTION(8)
 	.exit.text : {
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 370e1a8af6ac..466bd299b1a8 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
 	depends on ARM_VIRT_EXT && ARM_LPAE
@@ -41,9 +42,9 @@ config KVM_ARM_HOST
 	  Provides host support for ARM processors.
 
 config KVM_ARM_MAX_VCPUS
-	int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
-	default 4 if KVM_ARM_HOST
-	default 0
+	int "Number maximum supported virtual CPUs per VM"
+	depends on KVM_ARM_HOST
+	default 4
 	help
 	  Static number of max supported virtual CPUs per VM.
 
@@ -67,6 +68,4 @@ config KVM_ARM_TIMER
 	---help---
 	  Adds support for the Architected Timers in virtual machines
 
-source drivers/virtio/Kconfig
-
 endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 53c5ed83d16f..f7057ed045b6 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -14,10 +14,12 @@ CFLAGS_mmu.o := -I.
 AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
-kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+KVM := ../../../virt/kvm
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
-obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c
deleted file mode 100644
index c55b6089e923..000000000000
--- a/arch/arm/kvm/arch_timer.c
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/cpu.h>
-#include <linux/of_irq.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/interrupt.h>
-
-#include <clocksource/arm_arch_timer.h>
-#include <asm/arch_timer.h>
-
-#include <asm/kvm_vgic.h>
-#include <asm/kvm_arch_timer.h>
-
-static struct timecounter *timecounter;
-static struct workqueue_struct *wqueue;
-static struct kvm_irq_level timer_irq = {
-	.level	= 1,
-};
-
-static cycle_t kvm_phys_timer_read(void)
-{
-	return timecounter->cc->read(timecounter->cc);
-}
-
-static bool timer_is_armed(struct arch_timer_cpu *timer)
-{
-	return timer->armed;
-}
-
-/* timer_arm: as in "arm the timer", not as in ARM the company */
-static void timer_arm(struct arch_timer_cpu *timer, u64 ns)
-{
-	timer->armed = true;
-	hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
-		      HRTIMER_MODE_ABS);
-}
-
-static void timer_disarm(struct arch_timer_cpu *timer)
-{
-	if (timer_is_armed(timer)) {
-		hrtimer_cancel(&timer->timer);
-		cancel_work_sync(&timer->expired);
-		timer->armed = false;
-	}
-}
-
-static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
-	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-			    vcpu->arch.timer_cpu.irq->irq,
-			    vcpu->arch.timer_cpu.irq->level);
-}
-
-static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
-{
-	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
-
-	/*
-	 * We disable the timer in the world switch and let it be
-	 * handled by kvm_timer_sync_hwstate(). Getting a timer
-	 * interrupt at this point is a sure sign of some major
-	 * breakage.
-	 */
-	pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
-	return IRQ_HANDLED;
-}
-
-static void kvm_timer_inject_irq_work(struct work_struct *work)
-{
-	struct kvm_vcpu *vcpu;
-
-	vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
-	vcpu->arch.timer_cpu.armed = false;
-	kvm_timer_inject_irq(vcpu);
-}
-
-static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
-{
-	struct arch_timer_cpu *timer;
-	timer = container_of(hrt, struct arch_timer_cpu, timer);
-	queue_work(wqueue, &timer->expired);
-	return HRTIMER_NORESTART;
-}
-
-/**
- * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
- * @vcpu: The vcpu pointer
- *
- * Disarm any pending soft timers, since the world-switch code will write the
- * virtual timer state back to the physical CPU.
- */
-void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-	/*
-	 * We're about to run this vcpu again, so there is no need to
-	 * keep the background timer running, as we're about to
-	 * populate the CPU timer again.
-	 */
-	timer_disarm(timer);
-}
-
-/**
- * kvm_timer_sync_hwstate - sync timer state from cpu
- * @vcpu: The vcpu pointer
- *
- * Check if the virtual timer was armed and either schedule a corresponding
- * soft timer or inject directly if already expired.
- */
-void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-	cycle_t cval, now;
-	u64 ns;
-
-	if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
-		!(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
-		return;
-
-	cval = timer->cntv_cval;
-	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
-	BUG_ON(timer_is_armed(timer));
-
-	if (cval <= now) {
-		/*
-		 * Timer has already expired while we were not
-		 * looking. Inject the interrupt and carry on.
-		 */
-		kvm_timer_inject_irq(vcpu);
-		return;
-	}
-
-	ns = cyclecounter_cyc2ns(timecounter->cc, cval - now);
-	timer_arm(timer, ns);
-}
-
-void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-	INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
-	hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	timer->timer.function = kvm_timer_expire;
-	timer->irq = &timer_irq;
-}
-
-static void kvm_timer_init_interrupt(void *info)
-{
-	enable_percpu_irq(timer_irq.irq, 0);
-}
-
-
-static int kvm_timer_cpu_notify(struct notifier_block *self,
-				unsigned long action, void *cpu)
-{
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		kvm_timer_init_interrupt(NULL);
-		break;
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		disable_percpu_irq(timer_irq.irq);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block kvm_timer_cpu_nb = {
-	.notifier_call = kvm_timer_cpu_notify,
-};
-
-static const struct of_device_id arch_timer_of_match[] = {
-	{ .compatible	= "arm,armv7-timer",	},
-	{},
-};
-
-int kvm_timer_hyp_init(void)
-{
-	struct device_node *np;
-	unsigned int ppi;
-	int err;
-
-	timecounter = arch_timer_get_timecounter();
-	if (!timecounter)
-		return -ENODEV;
-
-	np = of_find_matching_node(NULL, arch_timer_of_match);
-	if (!np) {
-		kvm_err("kvm_arch_timer: can't find DT node\n");
-		return -ENODEV;
-	}
-
-	ppi = irq_of_parse_and_map(np, 2);
-	if (!ppi) {
-		kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	err = request_percpu_irq(ppi, kvm_arch_timer_handler,
-				 "kvm guest timer", kvm_get_running_vcpus());
-	if (err) {
-		kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
-			ppi, err);
-		goto out;
-	}
-
-	timer_irq.irq = ppi;
-
-	err = register_cpu_notifier(&kvm_timer_cpu_nb);
-	if (err) {
-		kvm_err("Cannot register timer CPU notifier\n");
-		goto out_free;
-	}
-
-	wqueue = create_singlethread_workqueue("kvm_arch_timer");
-	if (!wqueue) {
-		err = -ENOMEM;
-		goto out_free;
-	}
-
-	kvm_info("%s IRQ%d\n", np->name, ppi);
-	on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
-
-	goto out;
-out_free:
-	free_percpu_irq(ppi, kvm_get_running_vcpus());
-out:
-	of_node_put(np);
-	return err;
-}
-
-void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-	timer_disarm(timer);
-}
-
-int kvm_timer_init(struct kvm *kvm)
-{
-	if (timecounter && wqueue) {
-		kvm->arch.timer.cntvoff = kvm_phys_timer_read();
-		kvm->arch.timer.enabled = 1;
-	}
-
-	return 0;
-}
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ef1703b9587b..d0c8ee654bbf 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/cpu.h>
+#include <linux/cpu_pm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
@@ -81,12 +82,12 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
 /**
  * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
  */
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
 {
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
@@ -96,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 void kvm_arch_check_processor_compat(void *rtn)
 {
 	*(int *)rtn = 0;
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
 
 /**
  * kvm_arch_init_vm - initializes a VM data structure
@@ -137,6 +127,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (ret)
 		goto out_free_stage2_pgd;
 
+	kvm_timer_init(kvm);
+
 	/* Mark the initial VMID generation invalid */
 	kvm->arch.vmid_gen = 0;
 
@@ -152,15 +144,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
-{
-	return 0;
-}
 
 /**
  * kvm_arch_destroy_vm - destroy the VM data structure
@@ -178,20 +161,25 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 			kvm->vcpus[i] = NULL;
 		}
 	}
+
+	kvm_vgic_destroy(kvm);
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
 		r = vgic_present;
 		break;
+	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ARM_PSCI:
+	case KVM_CAP_ARM_PSCI_0_2:
+	case KVM_CAP_READONLY_MEM:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -219,29 +207,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot,
-				   struct kvm_userspace_memory_region *mem,
-				   enum kvm_mr_change change)
-{
-	return 0;
-}
-
-void kvm_arch_commit_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem,
-				   const struct kvm_memory_slot *old,
-				   enum kvm_mr_change change)
-{
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-}
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
@@ -280,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_memory_caches(vcpu);
 	kvm_timer_vcpu_terminate(vcpu);
+	kvm_vgic_vcpu_destroy(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
@@ -295,26 +261,15 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	int ret;
-
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
 
-	/* Set up VGIC */
-	ret = kvm_vgic_vcpu_init(vcpu);
-	if (ret)
-		return ret;
-
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
 
 	return 0;
 }
 
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
@@ -334,6 +289,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * The arch-generic KVM code expects the cpu field of a vcpu to be -1
+	 * if the vcpu is no longer assigned to a cpu.  This is used for the
+	 * optimized make_all_cpus_request path.
+	 */
+	vcpu->cpu = -1;
+
 	kvm_arm_set_running_vcpu(NULL);
 }
 
@@ -448,15 +410,17 @@ static void update_vttbr(struct kvm *kvm)
 
 	/* update vttbr to be used with the new vmid */
 	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
 	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
-	kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
-	kvm->arch.vttbr |= vmid;
+	kvm->arch.vttbr = pgd_phys | vmid;
 
 	spin_unlock(&kvm_vmid_lock);
 }
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	if (likely(vcpu->arch.has_run_once))
 		return 0;
 
@@ -466,22 +430,12 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	 * Initialize the VGIC before running a vcpu the first time on
 	 * this VM.
 	 */
-	if (irqchip_in_kernel(vcpu->kvm) &&
-	    unlikely(!vgic_initialized(vcpu->kvm))) {
-		int ret = kvm_vgic_init(vcpu->kvm);
+	if (unlikely(!vgic_initialized(vcpu->kvm))) {
+		ret = kvm_vgic_init(vcpu->kvm);
 		if (ret)
 			return ret;
 	}
 
-	/*
-	 * Handle the "start in power-off" case by calling into the
-	 * PSCI code.
-	 */
-	if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
-		*vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
-		kvm_psci_call(vcpu);
-	}
-
 	return 0;
 }
 
@@ -695,6 +649,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 	return -EINVAL;
 }
 
+static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
+					 struct kvm_vcpu_init *init)
+{
+	int ret;
+
+	ret = kvm_vcpu_set_target(vcpu, init);
+	if (ret)
+		return ret;
+
+	/*
+	 * Handle the "start in power-off" case by marking the VCPU as paused.
+	 */
+	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+		vcpu->arch.pause = true;
+
+	return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -708,8 +680,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (copy_from_user(&init, argp, sizeof(init)))
 			return -EFAULT;
 
-		return kvm_vcpu_set_target(vcpu, &init);
-
+		return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
 	}
 	case KVM_SET_ONE_REG:
 	case KVM_GET_ONE_REG: {
@@ -767,7 +738,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 	case KVM_ARM_DEVICE_VGIC_V2:
 		if (!vgic_present)
 			return -ENXIO;
-		return kvm_vgic_set_addr(kvm, type, dev_addr->addr);
+		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 	default:
 		return -ENODEV;
 	}
@@ -793,6 +764,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			return -EFAULT;
 		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
 	}
+	case KVM_ARM_PREFERRED_TARGET: {
+		int err;
+		struct kvm_vcpu_init init;
+
+		err = kvm_vcpu_preferred_target(&init);
+		if (err)
+			return err;
+
+		if (copy_to_user(argp, &init, sizeof(init)))
+			return -EFAULT;
+
+		return 0;
+	}
 	default:
 		return -EINVAL;
 	}
@@ -800,8 +784,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 static void cpu_init_hyp_mode(void *dummy)
 {
-	unsigned long long boot_pgd_ptr;
-	unsigned long long pgd_ptr;
+	phys_addr_t boot_pgd_ptr;
+	phys_addr_t pgd_ptr;
 	unsigned long hyp_stack_ptr;
 	unsigned long stack_page;
 	unsigned long vector_ptr;
@@ -809,8 +793,8 @@ static void cpu_init_hyp_mode(void *dummy)
 	/* Switch from the HYP stub to our own HYP init vector */
 	__hyp_set_vectors(kvm_get_idmap_vector());
 
-	boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
-	pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
+	boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+	pgd_ptr = kvm_mmu_get_httbr();
 	stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
 	vector_ptr = (unsigned long)__kvm_hyp_vector;
@@ -824,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self,
 	switch (action) {
 	case CPU_STARTING:
 	case CPU_STARTING_FROZEN:
-		cpu_init_hyp_mode(NULL);
+		if (__hyp_get_vectors() == hyp_default_vectors)
+			cpu_init_hyp_mode(NULL);
 		break;
 	}
 
@@ -835,6 +820,34 @@ static struct notifier_block hyp_init_cpu_nb = {
 	.notifier_call = hyp_init_cpu_notify,
 };
 
+#ifdef CONFIG_CPU_PM
+static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
+				    unsigned long cmd,
+				    void *v)
+{
+	if (cmd == CPU_PM_EXIT &&
+	    __hyp_get_vectors() == hyp_default_vectors) {
+		cpu_init_hyp_mode(NULL);
+		return NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block hyp_init_cpu_pm_nb = {
+	.notifier_call = hyp_init_cpu_pm_notifier,
+};
+
+static void __init hyp_cpu_pm_init(void)
+{
+	cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
+}
+#else
+static inline void hyp_cpu_pm_init(void)
+{
+}
+#endif
+
 /**
  * Inits Hyp-mode on all online CPUs
  */
@@ -995,6 +1008,8 @@ int kvm_arch_init(void *opaque)
 		goto out_err;
 	}
 
+	hyp_cpu_pm_init();
+
 	kvm_coproc_table_init();
 	return 0;
 out_err:
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 8eea97be1ed5..7928dbdf2102 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -23,6 +23,7 @@
 #include <asm/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <trace/events/kvm.h>
@@ -43,6 +44,31 @@ static u32 cache_levels;
 /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
 #define CSSELR_MAX 12
 
+/*
+ * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some
+ * of cp15 registers can be viewed either as couple of two u32 registers
+ * or one u64 register. Current u64 register encoding is that least
+ * significant u32 word is followed by most significant u32 word.
+ */
+static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
+				       const struct coproc_reg *r,
+				       u64 val)
+{
+	vcpu->arch.cp15[r->reg] = val & 0xffffffff;
+	vcpu->arch.cp15[r->reg + 1] = val >> 32;
+}
+
+static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
+				      const struct coproc_reg *r)
+{
+	u64 val;
+
+	val = vcpu->arch.cp15[r->reg + 1];
+	val = val << 32;
+	val = val | vcpu->arch.cp15[r->reg];
+	return val;
+}
+
 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	kvm_inject_undefined(vcpu);
@@ -71,6 +97,98 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	/*
+	 * Compute guest MPIDR. We build a virtual cluster out of the
+	 * vcpu_id, but we read the 'U' bit from the underlying
+	 * hardware directly.
+	 */
+	vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
+				     ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
+				     (vcpu->vcpu_id & 3));
+}
+
+/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
+static bool access_actlr(struct kvm_vcpu *vcpu,
+			 const struct coproc_params *p,
+			 const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+	return true;
+}
+
+/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
+static bool access_cbar(struct kvm_vcpu *vcpu,
+			const struct coproc_params *p,
+			const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return write_to_read_only(vcpu, p);
+	return read_zero(vcpu, p);
+}
+
+/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
+static bool access_l2ctlr(struct kvm_vcpu *vcpu,
+			  const struct coproc_params *p,
+			  const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+	return true;
+}
+
+static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	u32 l2ctlr, ncores;
+
+	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+	l2ctlr &= ~(3 << 24);
+	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+	/* How many cores in the current cluster and the next ones */
+	ncores -= (vcpu->vcpu_id & ~3);
+	/* Cap it to the maximum number of cores in a single cluster */
+	ncores = min(ncores, 3U);
+	l2ctlr |= (ncores & 3) << 24;
+
+	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	u32 actlr;
+
+	/* ACTLR contains SMP bit: make sure you create all cpus first! */
+	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+	/* Make the SMP bit consistent with the guest configuration */
+	if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+		actlr |= 1U << 6;
+	else
+		actlr &= ~(1U << 6);
+
+	vcpu->arch.cp15[c1_ACTLR] = actlr;
+}
+
+/*
+ * TRM entries: A7:4.3.50, A15:4.3.49
+ * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
+ */
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+			   const struct coproc_params *p,
+			   const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = 0;
+	return true;
+}
+
 /* See note at ARM ARM B1.14.4 */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
 			const struct coproc_params *p,
@@ -113,6 +231,44 @@ done:
 }
 
 /*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+			  const struct coproc_params *p,
+			  const struct coproc_reg *r)
+{
+	BUG_ON(!p->is_write);
+
+	vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
+	if (p->is_64bit)
+		vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
+
+	return true;
+}
+
+/*
+ * SCTLR accessor. Only called as long as HCR_TVM is set.  If the
+ * guest enables the MMU, we stop trapping the VM sys_regs and leave
+ * it in complete control of the caches.
+ *
+ * Used by the cpu-specific code.
+ */
+bool access_sctlr(struct kvm_vcpu *vcpu,
+		  const struct coproc_params *p,
+		  const struct coproc_reg *r)
+{
+	access_vm_reg(vcpu, p, r);
+
+	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */
+		vcpu->arch.hcr &= ~HCR_TVM;
+		stage2_flush_vm(vcpu->kvm);
+	}
+
+	return true;
+}
+
+/*
  * We could trap ID_DFR0 and tell the guest we don't support performance
  * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
  * NAKed, so it will read the PMCR anyway.
@@ -146,40 +302,63 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
 #define access_pmintenclr pm_fake
 
 /* Architected CP15 registers.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg cp15_regs[] = {
+	/* MPIDR: we use VMPIDR for guest access. */
+	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
+			NULL, reset_mpidr, c0_MPIDR },
+
 	/* CSSELR: swapped by interrupt.S. */
 	{ CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
 			NULL, reset_unknown, c0_CSSELR },
 
-	/* TTBR0/TTBR1: swapped by interrupt.S. */
-	{ CRm( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
-	{ CRm( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
+	/* ACTLR: trapped by HCR.TAC bit. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
+			access_actlr, reset_actlr, c1_ACTLR },
 
-	/* TTBCR: swapped by interrupt.S. */
+	/* CPACR: swapped by interrupt.S. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
+			NULL, reset_val, c1_CPACR, 0x00000000 },
+
+	/* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */
+	{ CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 },
+	{ CRn(2), CRm( 0), Op1( 0), Op2( 0), is32,
+			access_vm_reg, reset_unknown, c2_TTBR0 },
+	{ CRn(2), CRm( 0), Op1( 0), Op2( 1), is32,
+			access_vm_reg, reset_unknown, c2_TTBR1 },
 	{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_val, c2_TTBCR, 0x00000000 },
+			access_vm_reg, reset_val, c2_TTBCR, 0x00000000 },
+	{ CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 },
+
 
 	/* DACR: swapped by interrupt.S. */
 	{ CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c3_DACR },
+			access_vm_reg, reset_unknown, c3_DACR },
 
 	/* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */
 	{ CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c5_DFSR },
+			access_vm_reg, reset_unknown, c5_DFSR },
 	{ CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c5_IFSR },
+			access_vm_reg, reset_unknown, c5_IFSR },
 	{ CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c5_ADFSR },
+			access_vm_reg, reset_unknown, c5_ADFSR },
 	{ CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c5_AIFSR },
+			access_vm_reg, reset_unknown, c5_AIFSR },
 
 	/* DFAR/IFAR: swapped by interrupt.S. */
 	{ CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c6_DFAR },
+			access_vm_reg, reset_unknown, c6_DFAR },
 	{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_unknown, c6_IFAR },
+			access_vm_reg, reset_unknown, c6_IFAR },
+
+	/* PAR swapped by interrupt.S */
+	{ CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
+
 	/*
 	 * DC{C,I,CI}SW operations:
 	 */
@@ -187,6 +366,13 @@ static const struct coproc_reg cp15_regs[] = {
 	{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
 	{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
 	/*
+	 * L2CTLR access (guest wants to know #CPUs).
+	 */
+	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
+			access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
+	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
+
+	/*
 	 * Dummy performance monitor implementation.
 	 */
 	{ CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr},
@@ -205,9 +391,15 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */
 	{ CRn(10), CRm( 2), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c10_PRRR},
+			access_vm_reg, reset_unknown, c10_PRRR},
 	{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c10_NMRR},
+			access_vm_reg, reset_unknown, c10_NMRR},
+
+	/* AMAIR0/AMAIR1: swapped by interrupt.S. */
+	{ CRn(10), CRm( 3), Op1( 0), Op2( 0), is32,
+			access_vm_reg, reset_unknown, c10_AMAIR0},
+	{ CRn(10), CRm( 3), Op1( 0), Op2( 1), is32,
+			access_vm_reg, reset_unknown, c10_AMAIR1},
 
 	/* VBAR: swapped by interrupt.S. */
 	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
@@ -215,7 +407,7 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 1), is32,
-			NULL, reset_val, c13_CID, 0x00000000 },
+			access_vm_reg, reset_val, c13_CID, 0x00000000 },
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,
 			NULL, reset_unknown, c13_TID_URW },
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 3), is32,
@@ -226,6 +418,9 @@ static const struct coproc_reg cp15_regs[] = {
 	/* CNTKCTL: swapped by interrupt.S. */
 	{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+
+	/* The Configuration Base Address Register. */
+	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
 
 /* Target specific emulation tables */
@@ -233,6 +428,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
 
 void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
 {
+	unsigned int i;
+
+	for (i = 1; i < table->num; i++)
+		BUG_ON(cmp_reg(&table->table[i-1],
+			       &table->table[i]) >= 0);
+
 	target_tables[table->target] = table;
 }
 
@@ -315,7 +516,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	struct coproc_params params;
 
-	params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
+	params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
 	params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
 	params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
 	params.is_64bit = true;
@@ -323,7 +524,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
 	params.Op2 = 0;
 	params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
-	params.CRn = 0;
+	params.CRm = 0;
 
 	return emulate_cp15(vcpu, &params);
 }
@@ -395,12 +596,13 @@ static bool index_to_params(u64 id, struct coproc_params *params)
 			      | KVM_REG_ARM_OPC1_MASK))
 			return false;
 		params->is_64bit = true;
-		params->CRm = ((id & KVM_REG_ARM_CRM_MASK)
+		/* CRm to CRn: see cp15_to_index for details */
+		params->CRn = ((id & KVM_REG_ARM_CRM_MASK)
 			       >> KVM_REG_ARM_CRM_SHIFT);
 		params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK)
 			       >> KVM_REG_ARM_OPC1_SHIFT);
 		params->Op2 = 0;
-		params->CRn = 0;
+		params->CRm = 0;
 		return true;
 	default:
 		return false;
@@ -505,17 +707,23 @@ static struct coproc_reg invariant_cp15[] = {
 	{ CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
 };
 
+/*
+ * Reads a register value from a userspace address to a kernel
+ * variable. Make sure that register size matches sizeof(*__val).
+ */
 static int reg_from_user(void *val, const void __user *uaddr, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
 }
 
+/*
+ * Writes a register value to a userspace address from a kernel variable.
+ * Make sure that register size matches sizeof(*__val).
+ */
 static int reg_to_user(void __user *uaddr, const void *val, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
@@ -525,6 +733,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
 {
 	struct coproc_params params;
 	const struct coproc_reg *r;
+	int ret;
 
 	if (!index_to_params(id, &params))
 		return -ENOENT;
@@ -533,7 +742,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
 	if (!r)
 		return -ENOENT;
 
-	return reg_to_user(uaddr, &r->val, id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(id) == 4) {
+		u32 val = r->val;
+
+		ret = reg_to_user(uaddr, &val, id);
+	} else if (KVM_REG_SIZE(id) == 8) {
+		ret = reg_to_user(uaddr, &r->val, id);
+	}
+	return ret;
 }
 
 static int set_invariant_cp15(u64 id, void __user *uaddr)
@@ -541,7 +758,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
 	struct coproc_params params;
 	const struct coproc_reg *r;
 	int err;
-	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+	u64 val;
 
 	if (!index_to_params(id, &params))
 		return -ENOENT;
@@ -549,7 +766,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
 	if (!r)
 		return -ENOENT;
 
-	err = reg_from_user(&val, uaddr, id);
+	err = -ENOENT;
+	if (KVM_REG_SIZE(id) == 4) {
+		u32 val32;
+
+		err = reg_from_user(&val32, uaddr, id);
+		if (!err)
+			val = val32;
+	} else if (KVM_REG_SIZE(id) == 8) {
+		err = reg_from_user(&val, uaddr, id);
+	}
 	if (err)
 		return err;
 
@@ -565,7 +791,7 @@ static bool is_valid_cache(u32 val)
 	u32 level, ctype;
 
 	if (val >= CSSELR_MAX)
-		return -ENOENT;
+		return false;
 
 	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
         level = (val >> 1);
@@ -827,6 +1053,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	const struct coproc_reg *r;
 	void __user *uaddr = (void __user *)(long)reg->addr;
+	int ret;
 
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
 		return demux_c15_get(reg->id, uaddr);
@@ -838,14 +1065,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (!r)
 		return get_invariant_cp15(reg->id, uaddr);
 
-	/* Note: copies two regs if size is 64 bit. */
-	return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(reg->id) == 8) {
+		u64 val;
+
+		val = vcpu_cp15_reg64_get(vcpu, r);
+		ret = reg_to_user(uaddr, &val, reg->id);
+	} else if (KVM_REG_SIZE(reg->id) == 4) {
+		ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+	}
+
+	return ret;
 }
 
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	const struct coproc_reg *r;
 	void __user *uaddr = (void __user *)(long)reg->addr;
+	int ret;
 
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
 		return demux_c15_set(reg->id, uaddr);
@@ -857,8 +1094,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (!r)
 		return set_invariant_cp15(reg->id, uaddr);
 
-	/* Note: copies two regs if size is 64 bit */
-	return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(reg->id) == 8) {
+		u64 val;
+
+		ret = reg_from_user(&val, uaddr, reg->id);
+		if (!ret)
+			vcpu_cp15_reg64_set(vcpu, r, val);
+	} else if (KVM_REG_SIZE(reg->id) == 4) {
+		ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+	}
+
+	return ret;
 }
 
 static unsigned int num_demux_regs(void)
@@ -894,7 +1141,14 @@ static u64 cp15_to_index(const struct coproc_reg *reg)
 	if (reg->is_64) {
 		val |= KVM_REG_SIZE_U64;
 		val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
-		val |= (reg->CRm << KVM_REG_ARM_CRM_SHIFT);
+		/*
+		 * CRn always denotes the primary coproc. reg. nr. for the
+		 * in-kernel representation, but the user space API uses the
+		 * CRm for the encoding, because it is modelled after the
+		 * MRRC/MCRR instructions: see the ARM ARM rev. c page
+		 * B3-1445
+		 */
+		val |= (reg->CRn << KVM_REG_ARM_CRM_SHIFT);
 	} else {
 		val |= KVM_REG_SIZE_U32;
 		val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index b7301d3e4799..1a44bbe39643 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p)
 {
 	/* Look, we even formatted it for you to paste into the table! */
 	if (p->is_64bit) {
-		kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n",
-			      p->CRm, p->Op1, p->is_write ? "write" : "read");
+		kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n",
+			      p->CRn, p->Op1, p->is_write ? "write" : "read");
 	} else {
 		kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
 			      " func_%s },\n",
@@ -139,15 +139,22 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 		return i1->CRm - i2->CRm;
 	if (i1->Op1 != i2->Op1)
 		return i1->Op1 - i2->Op1;
-	return i1->Op2 - i2->Op2;
+	if (i1->Op2 != i2->Op2)
+		return i1->Op2 - i2->Op2;
+	return i2->is_64 - i1->is_64;
 }
 
 
 #define CRn(_x)		.CRn = _x
 #define CRm(_x) 	.CRm = _x
+#define CRm64(_x)       .CRn = _x, .CRm = 0
 #define Op1(_x) 	.Op1 = _x
 #define Op2(_x) 	.Op2 = _x
 #define is64		.is_64 = true
 #define is32		.is_64 = false
 
+bool access_sctlr(struct kvm_vcpu *vcpu,
+		  const struct coproc_params *p,
+		  const struct coproc_reg *r);
+
 #endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index 685063a6d0cf..e6f4ae48bda9 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -17,129 +17,24 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 #include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_host.h>
-#include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
 #include <linux/init.h>
 
-static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	/*
-	 * Compute guest MPIDR:
-	 * (Even if we present only one VCPU to the guest on an SMP
-	 * host we don't set the U bit in the MPIDR, or vice versa, as
-	 * revealing the underlying hardware properties is likely to
-	 * be the best choice).
-	 */
-	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
-		| (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
-}
-
 #include "coproc.h"
 
-/* A15 TRM 4.3.28: RO WI */
-static bool access_actlr(struct kvm_vcpu *vcpu,
-			 const struct coproc_params *p,
-			 const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
-	return true;
-}
-
-/* A15 TRM 4.3.60: R/O. */
-static bool access_cbar(struct kvm_vcpu *vcpu,
-			const struct coproc_params *p,
-			const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return write_to_read_only(vcpu, p);
-	return read_zero(vcpu, p);
-}
-
-/* A15 TRM 4.3.48: R/O WI. */
-static bool access_l2ctlr(struct kvm_vcpu *vcpu,
-			  const struct coproc_params *p,
-			  const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
-	return true;
-}
-
-static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	u32 l2ctlr, ncores;
-
-	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
-	l2ctlr &= ~(3 << 24);
-	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
-	l2ctlr |= (ncores & 3) << 24;
-
-	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	u32 actlr;
-
-	/* ACTLR contains SMP bit: make sure you create all cpus first! */
-	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
-	/* Make the SMP bit consistent with the guest configuration */
-	if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
-		actlr |= 1U << 6;
-	else
-		actlr &= ~(1U << 6);
-
-	vcpu->arch.cp15[c1_ACTLR] = actlr;
-}
-
-/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
-static bool access_l2ectlr(struct kvm_vcpu *vcpu,
-			   const struct coproc_params *p,
-			   const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = 0;
-	return true;
-}
-
 /*
  * A15-specific CP15 registers.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg a15_regs[] = {
-	/* MPIDR: we use VMPIDR for guest access. */
-	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
-			NULL, reset_mpidr, c0_MPIDR },
-
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_val, c1_SCTLR, 0x00C50078 },
-	/* ACTLR: trapped by HCR.TAC bit. */
-	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
-			access_actlr, reset_actlr, c1_ACTLR },
-	/* CPACR: swapped by interrupt.S. */
-	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_val, c1_CPACR, 0x00000000 },
-
-	/*
-	 * L2CTLR access (guest wants to know #CPUs).
-	 */
-	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
-			access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
-	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
-
-	/* The Configuration Base Address Register. */
-	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
+			access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
 };
 
 static struct kvm_coproc_target_table a15_target_table = {
@@ -150,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {
 
 static int __init coproc_a15_init(void)
 {
-	unsigned int i;
-
-	for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
-		BUG_ON(cmp_reg(&a15_regs[i-1],
-			       &a15_regs[i]) >= 0);
-
 	kvm_register_target_coproc_table(&a15_target_table);
 	return 0;
 }
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
new file mode 100644
index 000000000000..17fc7cd479d3
--- /dev/null
+++ b/arch/arm/kvm/coproc_a7.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Copyright (C) 2013 - ARM Ltd
+ *
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *          Jonathan Austin <jonathan.austin@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <linux/init.h>
+
+#include "coproc.h"
+
+/*
+ * Cortex-A7 specific CP15 registers.
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
+ */
+static const struct coproc_reg a7_regs[] = {
+	/* SCTLR: swapped by interrupt.S. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
+			access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
+};
+
+static struct kvm_coproc_target_table a7_target_table = {
+	.target = KVM_ARM_TARGET_CORTEX_A7,
+	.table = a7_regs,
+	.num = ARRAY_SIZE(a7_regs),
+};
+
+static int __init coproc_a7_init(void)
+{
+	kvm_register_target_coproc_table(&a7_target_table);
+	return 0;
+}
+late_initcall(coproc_a7_init);
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index bdede9e7da51..d6c005283678 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
 	*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
 
 	if (is_pabt) {
-		/* Set DFAR and DFSR */
+		/* Set IFAR and IFSR */
 		vcpu->arch.cp15[c6_IFAR] = addr;
 		is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
 		/* Always give debug fault for now - should give guest a clue */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 152d03612181..cc0b78769bd8 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.hcr = HCR_GUEST_MASK;
 	return 0;
 }
 
@@ -109,6 +110,73 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return -EINVAL;
 }
 
+#ifndef CONFIG_KVM_ARM_TIMER
+
+#define NUM_TIMER_REGS 0
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	return 0;
+}
+
+static bool is_timer_reg(u64 index)
+{
+	return false;
+}
+
+#else
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+	switch (index) {
+	case KVM_REG_ARM_TIMER_CTL:
+	case KVM_REG_ARM_TIMER_CNT:
+	case KVM_REG_ARM_TIMER_CVAL:
+		return true;
+	}
+	return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+		return -EFAULT;
+
+	return 0;
+}
+
+#endif
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int ret;
+
+	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+	if (ret != 0)
+		return -EFAULT;
+
+	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+
+	val = kvm_arm_timer_get_reg(vcpu, reg->id);
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
 static unsigned long num_core_regs(void)
 {
 	return sizeof(struct kvm_regs) / sizeof(u32);
@@ -121,7 +189,8 @@ static unsigned long num_core_regs(void)
  */
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 {
-	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu);
+	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
+		+ NUM_TIMER_REGS;
 }
 
 /**
@@ -133,6 +202,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
 	unsigned int i;
 	const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
+	int ret;
 
 	for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
 		if (put_user(core_reg | i, uindices))
@@ -140,6 +210,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 		uindices++;
 	}
 
+	ret = copy_timer_indices(vcpu, uindices);
+	if (ret)
+		return ret;
+	uindices += NUM_TIMER_REGS;
+
 	return kvm_arm_copy_coproc_indices(vcpu, uindices);
 }
 
@@ -153,6 +228,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return get_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return get_timer_reg(vcpu, reg);
+
 	return kvm_arm_coproc_get_reg(vcpu, reg);
 }
 
@@ -166,6 +244,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return set_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return set_timer_reg(vcpu, reg);
+
 	return kvm_arm_coproc_set_reg(vcpu, reg);
 }
 
@@ -183,13 +264,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 int __attribute_const__ kvm_target_cpu(void)
 {
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
-
-	if (implementor != ARM_CPU_IMP_ARM)
-		return -EINVAL;
-
-	switch (part_number) {
+	switch (read_cpuid_part()) {
+	case ARM_CPU_PART_CORTEX_A7:
+		return KVM_ARM_TARGET_CORTEX_A7;
 	case ARM_CPU_PART_CORTEX_A15:
 		return KVM_ARM_TARGET_CORTEX_A15;
 	default:
@@ -202,7 +279,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 {
 	unsigned int i;
 
-	/* We can only do a cortex A15 for now. */
+	/* We can only cope with guest==host and only on A15/A7 (for now). */
 	if (init->target != kvm_target_cpu())
 		return -EINVAL;
 
@@ -222,6 +299,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 	return kvm_reset_vcpu(vcpu);
 }
 
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+	int target = kvm_target_cpu();
+
+	if (target < 0)
+		return -ENODEV;
+
+	memset(init, 0, sizeof(*init));
+
+	/*
+	 * For now, we don't return any features.
+	 * In future, we might use features to return target
+	 * specific features available for the preferred
+	 * target type.
+	 */
+	init->target = (__u32)target;
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -EINVAL;
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 3d74a0be47db..4c979d466cc1 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -26,8 +26,6 @@
 
 #include "trace.h"
 
-#include "trace.h"
-
 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
@@ -40,21 +38,22 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
+	int ret;
+
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 		      kvm_vcpu_hvc_get_imm(vcpu));
 
-	if (kvm_psci_call(vcpu))
+	ret = kvm_psci_call(vcpu);
+	if (ret < 0) {
+		kvm_inject_undefined(vcpu);
 		return 1;
+	}
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	return ret;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
-		return 1;
-
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -76,23 +75,29 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 }
 
 /**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests
  * @vcpu:	the vcpu pointer
  * @run:	the kvm_run structure pointer
  *
- * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
- * halt execution of world-switches and schedule other host processes until
- * there is an incoming IRQ or FIQ to the VM.
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
  */
-static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	trace_kvm_wfi(*vcpu_pc(vcpu));
-	kvm_vcpu_block(vcpu);
+	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
+		kvm_vcpu_on_spin(vcpu);
+	else
+		kvm_vcpu_block(vcpu);
+
 	return 1;
 }
 
 static exit_handle_fn arm_exit_handlers[] = {
-	[HSR_EC_WFI]		= kvm_handle_wfi,
+	[HSR_EC_WFI]		= kvm_handle_wfx,
 	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
 	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
 	[HSR_EC_CP14_MR]	= kvm_handle_cp14_access,
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index f048338135f7..2cc14dfad049 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -71,7 +71,7 @@ __do_hyp_init:
 	bne	phase2			@ Yes, second stage init
 
 	@ Set the HTTBR to point to the hypervisor PGD pointer passed
-	mcrr	p15, 4, r2, r3, c2
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
 
 	@ Set the HTCR and VTCR to the same shareability and cacheability
 	@ settings as the non-secure TTBCR and with T0SZ == 0.
@@ -137,12 +137,12 @@ phase2:
 	mov	pc, r0
 
 target:	@ We're now in the trampoline code, switch page tables
-	mcrr	p15, 4, r2, r3, c2
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
 	isb
 
 	@ Invalidate the old TLBs
 	mcr	p15, 4, r0, c8, c7, 0	@ TLBIALLH
-	dsb
+	dsb	ish
 
 	eret
 
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index f7793df62f58..01dcb0e752d9 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -49,12 +49,13 @@ __kvm_hyp_code_start:
 ENTRY(__kvm_tlb_flush_vmid_ipa)
 	push	{r2, r3}
 
+	dsb	ishst
 	add	r0, r0, #KVM_VTTBR
 	ldrd	r2, r3, [r0]
-	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	mcrr	p15, 6, rr_lo_hi(r2, r3), c2	@ Write VTTBR
 	isb
 	mcr     p15, 0, r0, c8, c3, 0	@ TLBIALLIS (rt ignored)
-	dsb
+	dsb	ish
 	isb
 	mov	r2, #0
 	mov	r3, #0
@@ -78,7 +79,7 @@ ENTRY(__kvm_flush_vm_context)
 	mcr     p15, 4, r0, c8, c3, 4
 	/* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
 	mcr     p15, 0, r0, c7, c1, 0
-	dsb
+	dsb	ish
 	isb				@ Not necessary if followed by eret
 
 	bx	lr
@@ -134,7 +135,7 @@ ENTRY(__kvm_vcpu_run)
 	ldr	r1, [vcpu, #VCPU_KVM]
 	add	r1, r1, #KVM_VTTBR
 	ldrd	r2, r3, [r1]
-	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	mcrr	p15, 6, rr_lo_hi(r2, r3), c2	@ Write VTTBR
 
 	@ We're all done, just restore the GPRs and go to the guest
 	restore_guest_regs
@@ -198,8 +199,13 @@ after_vfp_restore:
 
 	restore_host_regs
 	clrex				@ Clear exclusive monitor
+#ifndef CONFIG_CPU_ENDIAN_BE8
 	mov	r0, r1			@ Return the return code
 	mov	r1, #0			@ Clear upper bits in return value
+#else
+	@ r1 already has return code
+	mov	r0, #0			@ Clear upper bits in return value
+#endif /* CONFIG_CPU_ENDIAN_BE8 */
 	bx	lr			@ return to IOCTL
 
 /********************************************************************
@@ -219,6 +225,10 @@ after_vfp_restore:
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 and r1.
  *
+ * A function pointer with a value of 0xffffffff has a special meaning,
+ * and is used to implement __hyp_get_vectors in the same way as in
+ * arch/arm/kernel/hyp_stub.S.
+ *
  * The calling convention follows the standard AAPCS:
  *   r0 - r3: caller save
  *   r12:     caller save
@@ -291,6 +301,7 @@ THUMB(	orr	r2, r2, #PSR_T_BIT	)
 	ldr	r2, =BSYM(panic)
 	msr	ELR_hyp, r2
 	ldr	r0, =\panic_str
+	clrex				@ Clear exclusive monitor
 	eret
 .endm
 
@@ -361,6 +372,11 @@ hyp_hvc:
 host_switch_to_hyp:
 	pop	{r0, r1, r2}
 
+	/* Check for __hyp_get_vectors */
+	cmp	r0, #-1
+	mrceq	p15, 4, r0, c12, c0, 0	@ get HVBAR
+	beq	1f
+
 	push	{lr}
 	mrs	lr, SPSR
 	push	{lr}
@@ -376,7 +392,7 @@ THUMB(	orr	lr, #1)
 	pop	{lr}
 	msr	SPSR_csxf, lr
 	pop	{lr}
-	eret
+1:	eret
 
 guest_trap:
 	load_vcpu			@ Load VCPU pointer to r0
@@ -414,6 +430,10 @@ guest_trap:
 	mrcne	p15, 4, r2, c6, c0, 4	@ HPFAR
 	bne	3f
 
+	/* Preserve PAR */
+	mrrc	p15, 0, r0, r1, c7	@ PAR
+	push	{r0, r1}
+
 	/* Resolve IPA using the xFAR */
 	mcr	p15, 0, r2, c7, c8, 0	@ ATS1CPR
 	isb
@@ -424,13 +444,20 @@ guest_trap:
 	lsl	r2, r2, #4
 	orr	r2, r2, r1, lsl #24
 
+	/* Restore PAR */
+	pop	{r0, r1}
+	mcrr	p15, 0, r0, r1, c7	@ PAR
+
 3:	load_vcpu			@ Load VCPU pointer to r0
 	str	r2, [r0, #VCPU_HPFAR]
 
 1:	mov	r1, #ARM_EXCEPTION_HVC
 	b	__kvm_vcpu_return
 
-4:	pop	{r0, r1, r2}		@ Failed translation, return to guest
+4:	pop	{r0, r1}		@ Failed translation, return to guest
+	mcrr	p15, 0, r0, r1, c7	@ PAR
+	clrex
+	pop	{r0, r1, r2}
 	eret
 
 /*
@@ -456,6 +483,7 @@ switch_to_guest_vfp:
 
 	pop	{r3-r7}
 	pop	{r0-r2}
+	clrex
 	eret
 #endif
 
@@ -478,10 +506,10 @@ __kvm_hyp_code_end:
 	.section ".rodata"
 
 und_die_str:
-	.ascii	"unexpected undefined exception in Hyp mode at: %#08x"
+	.ascii	"unexpected undefined exception in Hyp mode at: %#08x\n"
 pabt_die_str:
-	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x"
+	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x\n"
 dabt_die_str:
-	.ascii	"unexpected data abort in Hyp mode at: %#08x"
+	.ascii	"unexpected data abort in Hyp mode at: %#08x\n"
 svc_die_str:
-	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x"
+	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x\n"
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 3c8f2f0b4c5e..98c8c5b9a87f 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -1,4 +1,5 @@
 #include <linux/irqchip/arm-gic.h>
+#include <asm/assembler.h>
 
 #define VCPU_USR_REG(_reg_nr)	(VCPU_USR_REGS + (_reg_nr * 4))
 #define VCPU_USR_SP		(VCPU_USR_REG(13))
@@ -302,11 +303,18 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	.endif
 
 	mrc	p15, 0, r2, c14, c1, 0	@ CNTKCTL
+	mrrc	p15, 0, r4, r5, c7	@ PAR
+	mrc	p15, 0, r6, c10, c3, 0	@ AMAIR0
+	mrc	p15, 0, r7, c10, c3, 1	@ AMAIR1
 
 	.if \store_to_vcpu == 0
-	push	{r2}
+	push	{r2,r4-r7}
 	.else
 	str	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
+	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
+	strd	r4, r5, [r12]
+	str	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
+	str	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
 	.endif
 .endm
 
@@ -319,12 +327,19 @@ vcpu	.req	r0		@ vcpu pointer always in r0
  */
 .macro write_cp15_state read_from_vcpu
 	.if \read_from_vcpu == 0
-	pop	{r2}
+	pop	{r2,r4-r7}
 	.else
 	ldr	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
+	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
+	ldrd	r4, r5, [r12]
+	ldr	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
+	ldr	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
 	.endif
 
 	mcr	p15, 0, r2, c14, c1, 0	@ CNTKCTL
+	mcrr	p15, 0, r4, r5, c7	@ PAR
+	mcr	p15, 0, r6, c10, c3, 0	@ AMAIR0
+	mcr	p15, 0, r7, c10, c3, 1	@ AMAIR1
 
 	.if \read_from_vcpu == 0
 	pop	{r2-r12}
@@ -406,15 +421,23 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	ldr	r8, [r2, #GICH_ELRSR0]
 	ldr	r9, [r2, #GICH_ELRSR1]
 	ldr	r10, [r2, #GICH_APR]
-
-	str	r3, [r11, #VGIC_CPU_HCR]
-	str	r4, [r11, #VGIC_CPU_VMCR]
-	str	r5, [r11, #VGIC_CPU_MISR]
-	str	r6, [r11, #VGIC_CPU_EISR]
-	str	r7, [r11, #(VGIC_CPU_EISR + 4)]
-	str	r8, [r11, #VGIC_CPU_ELRSR]
-	str	r9, [r11, #(VGIC_CPU_ELRSR + 4)]
-	str	r10, [r11, #VGIC_CPU_APR]
+ARM_BE8(rev	r3, r3	)
+ARM_BE8(rev	r4, r4	)
+ARM_BE8(rev	r5, r5	)
+ARM_BE8(rev	r6, r6	)
+ARM_BE8(rev	r7, r7	)
+ARM_BE8(rev	r8, r8	)
+ARM_BE8(rev	r9, r9	)
+ARM_BE8(rev	r10, r10	)
+
+	str	r3, [r11, #VGIC_V2_CPU_HCR]
+	str	r4, [r11, #VGIC_V2_CPU_VMCR]
+	str	r5, [r11, #VGIC_V2_CPU_MISR]
+	str	r6, [r11, #VGIC_V2_CPU_EISR]
+	str	r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
+	str	r8, [r11, #VGIC_V2_CPU_ELRSR]
+	str	r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	r10, [r11, #VGIC_V2_CPU_APR]
 
 	/* Clear GICH_HCR */
 	mov	r5, #0
@@ -422,9 +445,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	/* Save list registers */
 	add	r2, r2, #GICH_LR0
-	add	r3, r11, #VGIC_CPU_LR
+	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r2], #4
+ARM_BE8(rev	r6, r6	)
 	str	r6, [r3], #4
 	subs	r4, r4, #1
 	bne	1b
@@ -449,9 +473,12 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r11, vcpu, #VCPU_VGIC_CPU
 
 	/* We only restore a minimal set of registers */
-	ldr	r3, [r11, #VGIC_CPU_HCR]
-	ldr	r4, [r11, #VGIC_CPU_VMCR]
-	ldr	r8, [r11, #VGIC_CPU_APR]
+	ldr	r3, [r11, #VGIC_V2_CPU_HCR]
+	ldr	r4, [r11, #VGIC_V2_CPU_VMCR]
+	ldr	r8, [r11, #VGIC_V2_CPU_APR]
+ARM_BE8(rev	r3, r3	)
+ARM_BE8(rev	r4, r4	)
+ARM_BE8(rev	r8, r8	)
 
 	str	r3, [r2, #GICH_HCR]
 	str	r4, [r2, #GICH_VMCR]
@@ -459,9 +486,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	/* Restore list registers */
 	add	r2, r2, #GICH_LR0
-	add	r3, r11, #VGIC_CPU_LR
+	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r3], #4
+ARM_BE8(rev	r6, r6  )
 	str	r6, [r2], #4
 	subs	r4, r4, #1
 	bne	1b
@@ -492,11 +520,15 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
 	isb
 
-	mrrc	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	mrrc	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
 	ldr	r4, =VCPU_TIMER_CNTV_CVAL
 	add	r5, vcpu, r4
 	strd	r2, r3, [r5]
 
+	@ Ensure host CNTVCT == CNTPCT
+	mov	r2, #0
+	mcrr	p15, 4, r2, r2, c14	@ CNTVOFF
+
 1:
 #endif
 	@ Allow physical timer/counter access for the host
@@ -528,12 +560,12 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	ldr	r2, [r4, #KVM_TIMER_CNTVOFF]
 	ldr	r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
-	mcrr	p15, 4, r2, r3, c14	@ CNTVOFF
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c14	@ CNTVOFF
 
 	ldr	r4, =VCPU_TIMER_CNTV_CVAL
 	add	r5, vcpu, r4
 	ldrd	r2, r3, [r5]
-	mcrr	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	mcrr	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
 	isb
 
 	ldr	r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
@@ -587,17 +619,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
 .macro configure_hyp_role operation
-	mrc	p15, 4, r2, c1, c1, 0	@ HCR
-	bic	r2, r2, #HCR_VIRT_EXCP_MASK
-	ldr	r3, =HCR_GUEST_MASK
 	.if \operation == vmentry
-	orr	r2, r2, r3
+	ldr	r2, [vcpu, #VCPU_HCR]
 	ldr	r3, [vcpu, #VCPU_IRQ_LINES]
 	orr	r2, r2, r3
 	.else
-	bic	r2, r2, r3
+	mov	r2, #0
 	.endif
-	mcr	p15, 4, r2, c1, c1, 0
+	mcr	p15, 4, r2, c1, c1, 0	@ HCR
 .endm
 
 .macro load_vcpu
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 72a12f2171b2..4cb5a93182e9 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -23,6 +23,68 @@
 
 #include "trace.h"
 
+static void mmio_write_buf(char *buf, unsigned int len, unsigned long data)
+{
+	void *datap = NULL;
+	union {
+		u8	byte;
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		tmp.byte	= data;
+		datap		= &tmp.byte;
+		break;
+	case 2:
+		tmp.hword	= data;
+		datap		= &tmp.hword;
+		break;
+	case 4:
+		tmp.word	= data;
+		datap		= &tmp.word;
+		break;
+	case 8:
+		tmp.dword	= data;
+		datap		= &tmp.dword;
+		break;
+	}
+
+	memcpy(buf, datap, len);
+}
+
+static unsigned long mmio_read_buf(char *buf, unsigned int len)
+{
+	unsigned long data = 0;
+	union {
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		data = buf[0];
+		break;
+	case 2:
+		memcpy(&tmp.hword, buf, len);
+		data = tmp.hword;
+		break;
+	case 4:
+		memcpy(&tmp.word, buf, len);
+		data = tmp.word;
+		break;
+	case 8:
+		memcpy(&tmp.dword, buf, len);
+		data = tmp.dword;
+		break;
+	}
+
+	return data;
+}
+
 /**
  * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
  * @vcpu: The VCPU pointer
@@ -33,28 +95,27 @@
  */
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	unsigned long *dest;
+	unsigned long data;
 	unsigned int len;
 	int mask;
 
 	if (!run->mmio.is_write) {
-		dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt);
-		*dest = 0;
-
 		len = run->mmio.len;
 		if (len > sizeof(unsigned long))
 			return -EINVAL;
 
-		memcpy(dest, run->mmio.data, len);
-
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
-				*((u64 *)run->mmio.data));
+		data = mmio_read_buf(run->mmio.data, len);
 
 		if (vcpu->arch.mmio_decode.sign_extend &&
 		    len < sizeof(unsigned long)) {
 			mask = 1U << ((len * 8) - 1);
-			*dest = (*dest ^ mask) - mask;
+			data = (data ^ mask) - mask;
 		}
+
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+			       data);
+		data = vcpu_data_host_to_guest(vcpu, data, len);
+		*vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;
 	}
 
 	return 0;
@@ -63,7 +124,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		      struct kvm_exit_mmio *mmio)
 {
-	unsigned long rt, len;
+	unsigned long rt;
+	int len;
 	bool is_write, sign_extend;
 
 	if (kvm_vcpu_dabt_isextabt(vcpu)) {
@@ -86,12 +148,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	sign_extend = kvm_vcpu_dabt_issext(vcpu);
 	rt = kvm_vcpu_dabt_get_rd(vcpu);
 
-	if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
-		/* IO memory trying to read/write pc */
-		kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
-		return 1;
-	}
-
 	mmio->is_write = is_write;
 	mmio->phys_addr = fault_ipa;
 	mmio->len = len;
@@ -110,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 phys_addr_t fault_ipa)
 {
 	struct kvm_exit_mmio mmio;
+	unsigned long data;
 	unsigned long rt;
 	int ret;
 
@@ -130,13 +187,15 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	}
 
 	rt = vcpu->arch.mmio_decode.rt;
+	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
+
 	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
 					 KVM_TRACE_MMIO_READ_UNSATISFIED,
 			mmio.len, fault_ipa,
-			(mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0);
+			(mmio.is_write) ? data : 0);
 
 	if (mmio.is_write)
-		memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len);
+		mmio_write_buf(mmio.data, mmio.len, data);
 
 	if (vgic_handle_mmio(vcpu, run, &mmio))
 		return 1;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 84ba67b982c0..eea03069161b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
 #include <linux/mman.h>
 #include <linux/kvm_host.h>
 #include <linux/io.h>
+#include <linux/hugetlb.h>
 #include <trace/events/kvm.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
@@ -41,6 +42,10 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
+#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+
+#define kvm_pmd_huge(_x)	(pmd_huge(_x) || pmd_trans_huge(_x))
+
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
 	/*
@@ -85,9 +90,19 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
+static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+{
+	pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
+	pgd_clear(pgd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pud_free(NULL, pud_table);
+	put_page(virt_to_page(pgd));
+}
+
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
 	pmd_t *pmd_table = pmd_offset(pud, 0);
+	VM_BUG_ON(pud_huge(*pud));
 	pud_clear(pud);
 	kvm_tlb_flush_vmid_ipa(kvm, addr);
 	pmd_free(NULL, pmd_table);
@@ -97,73 +112,186 @@ static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
 	pte_t *pte_table = pte_offset_kernel(pmd, 0);
+	VM_BUG_ON(kvm_pmd_huge(*pmd));
 	pmd_clear(pmd);
 	kvm_tlb_flush_vmid_ipa(kvm, addr);
 	pte_free_kernel(NULL, pte_table);
 	put_page(virt_to_page(pmd));
 }
 
-static bool pmd_empty(pmd_t *pmd)
+static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	struct page *pmd_page = virt_to_page(pmd);
-	return page_count(pmd_page) == 1;
+	phys_addr_t start_addr = addr;
+	pte_t *pte, *start_pte;
+
+	start_pte = pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			kvm_set_pte(pte, __pte(0));
+			put_page(virt_to_page(pte));
+			kvm_tlb_flush_vmid_ipa(kvm, addr);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	if (kvm_pte_table_empty(start_pte))
+		clear_pmd_entry(kvm, pmd, start_addr);
 }
 
-static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
+static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	if (pte_present(*pte)) {
-		kvm_set_pte(pte, __pte(0));
-		put_page(virt_to_page(pte));
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	}
+	phys_addr_t next, start_addr = addr;
+	pmd_t *pmd, *start_pmd;
+
+	start_pmd = pmd = pmd_offset(pud, addr);
+	do {
+		next = kvm_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (kvm_pmd_huge(*pmd)) {
+				pmd_clear(pmd);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				put_page(virt_to_page(pmd));
+			} else {
+				unmap_ptes(kvm, pmd, addr, next);
+			}
+		}
+	} while (pmd++, addr = next, addr != end);
+
+	if (kvm_pmd_table_empty(start_pmd))
+		clear_pud_entry(kvm, pud, start_addr);
 }
 
-static bool pte_empty(pte_t *pte)
+static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	struct page *pte_page = virt_to_page(pte);
-	return page_count(pte_page) == 1;
+	phys_addr_t next, start_addr = addr;
+	pud_t *pud, *start_pud;
+
+	start_pud = pud = pud_offset(pgd, addr);
+	do {
+		next = kvm_pud_addr_end(addr, end);
+		if (!pud_none(*pud)) {
+			if (pud_huge(*pud)) {
+				pud_clear(pud);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				put_page(virt_to_page(pud));
+			} else {
+				unmap_pmds(kvm, pud, addr, next);
+			}
+		}
+	} while (pud++, addr = next, addr != end);
+
+	if (kvm_pud_table_empty(start_pud))
+		clear_pgd_entry(kvm, pgd, start_addr);
 }
 
+
 static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
-			unsigned long long start, u64 size)
+			phys_addr_t start, u64 size)
 {
 	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
+	phys_addr_t addr = start, end = start + size;
+	phys_addr_t next;
+
+	pgd = pgdp + pgd_index(addr);
+	do {
+		next = kvm_pgd_addr_end(addr, end);
+		unmap_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+			      phys_addr_t addr, phys_addr_t end)
+{
 	pte_t *pte;
-	unsigned long long addr = start, end = start + size;
-	u64 range;
 
-	while (addr < end) {
-		pgd = pgdp + pgd_index(addr);
-		pud = pud_offset(pgd, addr);
-		if (pud_none(*pud)) {
-			addr += PUD_SIZE;
-			continue;
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+			kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
 		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
 
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
-			addr += PMD_SIZE;
-			continue;
+static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pmd_t *pmd;
+	phys_addr_t next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = kvm_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (kvm_pmd_huge(*pmd)) {
+				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+				kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
+			} else {
+				stage2_flush_ptes(kvm, pmd, addr, next);
+			}
 		}
+	} while (pmd++, addr = next, addr != end);
+}
 
-		pte = pte_offset_kernel(pmd, addr);
-		clear_pte_entry(kvm, pte, addr);
-		range = PAGE_SIZE;
-
-		/* If we emptied the pte, walk back up the ladder */
-		if (pte_empty(pte)) {
-			clear_pmd_entry(kvm, pmd, addr);
-			range = PMD_SIZE;
-			if (pmd_empty(pmd)) {
-				clear_pud_entry(kvm, pud, addr);
-				range = PUD_SIZE;
+static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pud_t *pud;
+	phys_addr_t next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = kvm_pud_addr_end(addr, end);
+		if (!pud_none(*pud)) {
+			if (pud_huge(*pud)) {
+				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+				kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
+			} else {
+				stage2_flush_pmds(kvm, pud, addr, next);
 			}
 		}
+	} while (pud++, addr = next, addr != end);
+}
 
-		addr += range;
-	}
+static void stage2_flush_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
+	phys_addr_t next;
+	pgd_t *pgd;
+
+	pgd = kvm->arch.pgd + pgd_index(addr);
+	do {
+		next = kvm_pgd_addr_end(addr, end);
+		stage2_flush_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the stage 2 page tables and invalidate any cache lines
+ * backing memory already mapped to the VM.
+ */
+void stage2_flush_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_flush_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 }
 
 /**
@@ -178,14 +306,14 @@ void free_boot_hyp_pgd(void)
 	if (boot_hyp_pgd) {
 		unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
 		unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
-		kfree(boot_hyp_pgd);
+		free_pages((unsigned long)boot_hyp_pgd, pgd_order);
 		boot_hyp_pgd = NULL;
 	}
 
 	if (hyp_pgd)
 		unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 
-	kfree(init_bounce_page);
+	free_page((unsigned long)init_bounce_page);
 	init_bounce_page = NULL;
 
 	mutex_unlock(&kvm_hyp_pgd_mutex);
@@ -215,7 +343,7 @@ void free_hyp_pgds(void)
 		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
 			unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 
-		kfree(hyp_pgd);
+		free_pages((unsigned long)hyp_pgd, pgd_order);
 		hyp_pgd = NULL;
 	}
 
@@ -313,6 +441,17 @@ out:
 	return err;
 }
 
+static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
+{
+	if (!is_vmalloc_addr(kaddr)) {
+		BUG_ON(!virt_addr_valid(kaddr));
+		return __pa(kaddr);
+	} else {
+		return page_to_phys(vmalloc_to_page(kaddr)) +
+		       offset_in_page(kaddr);
+	}
+}
+
 /**
  * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
  * @from:	The virtual kernel start address of the range
@@ -324,16 +463,27 @@ out:
  */
 int create_hyp_mappings(void *from, void *to)
 {
-	unsigned long phys_addr = virt_to_phys(from);
+	phys_addr_t phys_addr;
+	unsigned long virt_addr;
 	unsigned long start = KERN_TO_HYP((unsigned long)from);
 	unsigned long end = KERN_TO_HYP((unsigned long)to);
 
-	/* Check for a valid kernel memory mapping */
-	if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
-		return -EINVAL;
+	start = start & PAGE_MASK;
+	end = PAGE_ALIGN(end);
 
-	return __create_hyp_mappings(hyp_pgd, start, end,
-				     __phys_to_pfn(phys_addr), PAGE_HYP);
+	for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
+		int err;
+
+		phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
+		err = __create_hyp_mappings(hyp_pgd, virt_addr,
+					    virt_addr + PAGE_SIZE,
+					    __phys_to_pfn(phys_addr),
+					    PAGE_HYP);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 /**
@@ -382,9 +532,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	if (!pgd)
 		return -ENOMEM;
 
-	/* stage-2 pgd must be aligned to its size */
-	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
-
 	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
 	kvm_clean_pgd(pgd);
 	kvm->arch.pgd = pgd;
@@ -429,29 +576,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	kvm->arch.pgd = NULL;
 }
 
-
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
-			  phys_addr_t addr, const pte_t *new_pte, bool iomap)
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			     phys_addr_t addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte, old_pte;
 
-	/* Create 2nd stage page table mapping - Level 1 */
 	pgd = kvm->arch.pgd + pgd_index(addr);
 	pud = pud_offset(pgd, addr);
 	if (pud_none(*pud)) {
 		if (!cache)
-			return 0; /* ignore calls from kvm_set_spte_hva */
+			return NULL;
 		pmd = mmu_memory_cache_alloc(cache);
 		pud_populate(NULL, pud, pmd);
 		get_page(virt_to_page(pud));
 	}
 
-	pmd = pmd_offset(pud, addr);
+	return pmd_offset(pud, addr);
+}
+
+static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+			       *cache, phys_addr_t addr, const pmd_t *new_pmd)
+{
+	pmd_t *pmd, old_pmd;
+
+	pmd = stage2_get_pmd(kvm, cache, addr);
+	VM_BUG_ON(!pmd);
+
+	/*
+	 * Mapping in huge pages should only happen through a fault.  If a
+	 * page is merged into a transparent huge page, the individual
+	 * subpages of that huge page should be unmapped through MMU
+	 * notifiers before we get here.
+	 *
+	 * Merging of CompoundPages is not supported; they should become
+	 * splitting first, unmapped, merged, and mapped back in on-demand.
+	 */
+	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
 
-	/* Create 2nd stage page table mapping - Level 2 */
+	old_pmd = *pmd;
+	kvm_set_pmd(pmd, *new_pmd);
+	if (pmd_present(old_pmd))
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+	else
+		get_page(virt_to_page(pmd));
+	return 0;
+}
+
+static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			  phys_addr_t addr, const pte_t *new_pte, bool iomap)
+{
+	pmd_t *pmd;
+	pte_t *pte, old_pte;
+
+	/* Create stage-2 page table mapping - Level 1 */
+	pmd = stage2_get_pmd(kvm, cache, addr);
+	if (!pmd) {
+		/*
+		 * Ignore calls from kvm_set_spte_hva for unallocated
+		 * address ranges.
+		 */
+		return 0;
+	}
+
+	/* Create stage-2 page mappings - Level 2 */
 	if (pmd_none(*pmd)) {
 		if (!cache)
 			return 0; /* ignore calls from kvm_set_spte_hva */
@@ -498,7 +687,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 
 	for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
 		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
-		kvm_set_s2pte_writable(&pte);
 
 		ret = mmu_topup_memory_cache(&cache, 2, 2);
 		if (ret)
@@ -517,23 +705,97 @@ out:
 	return ret;
 }
 
+static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
+{
+	pfn_t pfn = *pfnp;
+	gfn_t gfn = *ipap >> PAGE_SHIFT;
+
+	if (PageTransCompound(pfn_to_page(pfn))) {
+		unsigned long mask;
+		/*
+		 * The address we faulted on is backed by a transparent huge
+		 * page.  However, because we map the compound huge page and
+		 * not the individual tail page, we need to transfer the
+		 * refcount to the head page.  We have to be careful that the
+		 * THP doesn't start to split while we are adjusting the
+		 * refcounts.
+		 *
+		 * We are sure this doesn't happen, because mmu_notifier_retry
+		 * was successful and we are holding the mmu_lock, so if this
+		 * THP is trying to split, it will be blocked in the mmu
+		 * notifier before touching any of the pages, specifically
+		 * before being able to call __split_huge_page_refcount().
+		 *
+		 * We can therefore safely transfer the refcount from PG_tail
+		 * to PG_head and switch the pfn from a tail page to the head
+		 * page accordingly.
+		 */
+		mask = PTRS_PER_PMD - 1;
+		VM_BUG_ON((gfn & mask) != (pfn & mask));
+		if (pfn & mask) {
+			*ipap &= PMD_MASK;
+			kvm_release_pfn_clean(pfn);
+			pfn &= ~mask;
+			kvm_get_pfn(pfn);
+			*pfnp = pfn;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  gfn_t gfn, struct kvm_memory_slot *memslot,
+			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
 {
-	pte_t new_pte;
-	pfn_t pfn;
 	int ret;
-	bool write_fault, writable;
+	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
+	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+	struct vm_area_struct *vma;
+	pfn_t pfn;
+	pgprot_t mem_type = PAGE_S2;
 
-	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
 		kvm_err("Unexpected L2 read permission error\n");
 		return -EFAULT;
 	}
 
+	/* Let's check if we will get back a huge page backed by hugetlbfs */
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma_intersection(current->mm, hva, hva + 1);
+	if (is_vm_hugetlb_page(vma)) {
+		hugetlb = true;
+		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+	} else {
+		/*
+		 * Pages belonging to memslots that don't have the same
+		 * alignment for userspace and IPA cannot be mapped using
+		 * block descriptors even if the pages belong to a THP for
+		 * the process, because the stage-2 block descriptor will
+		 * cover more than a single THP and we loose atomicity for
+		 * unmapping, updates, and splits of the THP or other pages
+		 * in the stage-2 block range.
+		 */
+		if ((memslot->userspace_addr & ~PMD_MASK) !=
+		    ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
+			force_pte = true;
+	}
+	up_read(&current->mm->mmap_sem);
+
 	/* We need minimum second+third level pages */
 	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
 	if (ret)
@@ -551,26 +813,44 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 */
 	smp_rmb();
 
-	pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
+	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
 	if (is_error_pfn(pfn))
 		return -EFAULT;
 
-	new_pte = pfn_pte(pfn, PAGE_S2);
-	coherent_icache_guest_page(vcpu->kvm, gfn);
+	if (kvm_is_mmio_pfn(pfn))
+		mem_type = PAGE_S2_DEVICE;
 
-	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+	spin_lock(&kvm->mmu_lock);
+	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
-	if (writable) {
-		kvm_set_s2pte_writable(&new_pte);
-		kvm_set_pfn_dirty(pfn);
+	if (!hugetlb && !force_pte)
+		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+
+	if (hugetlb) {
+		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
+		new_pmd = pmd_mkhuge(new_pmd);
+		if (writable) {
+			kvm_set_s2pmd_writable(&new_pmd);
+			kvm_set_pfn_dirty(pfn);
+		}
+		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+	} else {
+		pte_t new_pte = pfn_pte(pfn, mem_type);
+		if (writable) {
+			kvm_set_s2pte_writable(&new_pte);
+			kvm_set_pfn_dirty(pfn);
+		}
+		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
+				     mem_type == PAGE_S2_DEVICE);
 	}
-	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
+
 
 out_unlock:
-	spin_unlock(&vcpu->kvm->mmu_lock);
+	spin_unlock(&kvm->mmu_lock);
 	kvm_release_pfn_clean(pfn);
-	return 0;
+	return ret;
 }
 
 /**
@@ -590,7 +870,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	unsigned long fault_status;
 	phys_addr_t fault_ipa;
 	struct kvm_memory_slot *memslot;
-	bool is_iabt;
+	unsigned long hva;
+	bool is_iabt, write_fault, writable;
 	gfn_t gfn;
 	int ret, idx;
 
@@ -601,17 +882,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
 
 	/* Check the stage-2 fault is trans. fault or write fault */
-	fault_status = kvm_vcpu_trap_get_fault(vcpu);
+	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
-		kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
-			kvm_vcpu_trap_get_class(vcpu), fault_status);
+		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+			kvm_vcpu_trap_get_class(vcpu),
+			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+			(unsigned long)kvm_vcpu_get_hsr(vcpu));
 		return -EFAULT;
 	}
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	gfn = fault_ipa >> PAGE_SHIFT;
-	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+	memslot = gfn_to_memslot(vcpu->kvm, gfn);
+	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+	write_fault = kvm_is_write_fault(vcpu);
+	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
 		if (is_iabt) {
 			/* Prefetch Abort on I/O address */
 			kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -619,13 +905,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			goto out_unlock;
 		}
 
-		if (fault_status != FSC_FAULT) {
-			kvm_err("Unsupported fault status on io memory: %#lx\n",
-				fault_status);
-			ret = -EFAULT;
-			goto out_unlock;
-		}
-
 		/*
 		 * The IPA is reported as [MAX:12], so we need to
 		 * complement it with the bottom 12 bits from the
@@ -637,9 +916,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		goto out_unlock;
 	}
 
-	memslot = gfn_to_memslot(vcpu->kvm, gfn);
-
-	ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
@@ -757,9 +1034,9 @@ int kvm_mmu_init(void)
 {
 	int err;
 
-	hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
-	hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
-	hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
+	hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
+	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
+	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
 
 	if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
 		/*
@@ -769,7 +1046,7 @@ int kvm_mmu_init(void)
 		size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
 		phys_addr_t phys_base;
 
-		init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		init_bounce_page = (void *)__get_free_page(GFP_KERNEL);
 		if (!init_bounce_page) {
 			kvm_err("Couldn't allocate HYP init bounce page\n");
 			err = -ENOMEM;
@@ -786,7 +1063,7 @@ int kvm_mmu_init(void)
 		 */
 		kvm_flush_dcache_to_poc(init_bounce_page, len);
 
-		phys_base = virt_to_phys(init_bounce_page);
+		phys_base = kvm_virt_to_phys(init_bounce_page);
 		hyp_idmap_vector += phys_base - hyp_idmap_start;
 		hyp_idmap_start = phys_base;
 		hyp_idmap_end = phys_base + len;
@@ -795,8 +1072,9 @@ int kvm_mmu_init(void)
 			 (unsigned long)phys_base);
 	}
 
-	hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
-	boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+
 	if (!hyp_pgd || !boot_hyp_pgd) {
 		kvm_err("Hyp mode PGD not allocated\n");
 		err = -ENOMEM;
@@ -842,3 +1120,49 @@ out:
 	free_hyp_pgds();
 	return err;
 }
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				   struct kvm_userspace_memory_region *mem,
+				   const struct kvm_memory_slot *old,
+				   enum kvm_mr_change change)
+{
+	gpa_t gpa = old->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = old->npages << PAGE_SHIFT;
+	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+		spin_lock(&kvm->mmu_lock);
+		unmap_stage2_range(kvm, gpa, size);
+		spin_unlock(&kvm->mmu_lock);
+	}
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_userspace_memory_region *mem,
+				   enum kvm_mr_change change)
+{
+	return 0;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+			   struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
+{
+	return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+}
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 7ee5bb7a3667..09cf37737ee2 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -18,6 +18,7 @@
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
+#include <asm/cputype.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_psci.h>
 
@@ -26,6 +27,36 @@
  * as described in ARM document number ARM DEN 0022A.
  */
 
+#define AFFINITY_MASK(level)	~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
+
+static unsigned long psci_affinity_mask(unsigned long affinity_level)
+{
+	if (affinity_level <= 3)
+		return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
+
+	return 0;
+}
+
+static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * NOTE: For simplicity, we make VCPU suspend emulation to be
+	 * same-as WFI (Wait-for-interrupt) emulation.
+	 *
+	 * This means for KVM the wakeup events are interrupts and
+	 * this is consistent with intended use of StateID as described
+	 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
+	 *
+	 * Further, we also treat power-down request to be same as
+	 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
+	 * specification (ARM DEN 0022A). This means all suspend states
+	 * for KVM will preserve the register state.
+	 */
+	kvm_vcpu_block(vcpu);
+
+	return PSCI_RET_SUCCESS;
+}
+
 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pause = true;
@@ -34,25 +65,41 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 {
 	struct kvm *kvm = source_vcpu->kvm;
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu = NULL, *tmp;
 	wait_queue_head_t *wq;
 	unsigned long cpu_id;
+	unsigned long context_id;
+	unsigned long mpidr;
 	phys_addr_t target_pc;
+	int i;
 
 	cpu_id = *vcpu_reg(source_vcpu, 1);
 	if (vcpu_mode_is_32bit(source_vcpu))
 		cpu_id &= ~((u32) 0);
 
-	if (cpu_id >= atomic_read(&kvm->online_vcpus))
-		return KVM_PSCI_RET_INVAL;
-
-	target_pc = *vcpu_reg(source_vcpu, 2);
+	kvm_for_each_vcpu(i, tmp, kvm) {
+		mpidr = kvm_vcpu_get_mpidr(tmp);
+		if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
+			vcpu = tmp;
+			break;
+		}
+	}
 
-	vcpu = kvm_get_vcpu(kvm, cpu_id);
+	/*
+	 * Make sure the caller requested a valid CPU and that the CPU is
+	 * turned off.
+	 */
+	if (!vcpu)
+		return PSCI_RET_INVALID_PARAMS;
+	if (!vcpu->arch.pause) {
+		if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
+			return PSCI_RET_ALREADY_ON;
+		else
+			return PSCI_RET_INVALID_PARAMS;
+	}
 
-	wq = kvm_arch_vcpu_wq(vcpu);
-	if (!waitqueue_active(wq))
-		return KVM_PSCI_RET_INVAL;
+	target_pc = *vcpu_reg(source_vcpu, 2);
+	context_id = *vcpu_reg(source_vcpu, 3);
 
 	kvm_reset_vcpu(vcpu);
 
@@ -62,26 +109,165 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 		vcpu_set_thumb(vcpu);
 	}
 
+	/* Propagate caller endianness */
+	if (kvm_vcpu_is_be(source_vcpu))
+		kvm_vcpu_set_be(vcpu);
+
 	*vcpu_pc(vcpu) = target_pc;
+	/*
+	 * NOTE: We always update r0 (or x0) because for PSCI v0.1
+	 * the general puspose registers are undefined upon CPU_ON.
+	 */
+	*vcpu_reg(vcpu, 0) = context_id;
 	vcpu->arch.pause = false;
 	smp_mb();		/* Make sure the above is visible */
 
+	wq = kvm_arch_vcpu_wq(vcpu);
 	wake_up_interruptible(wq);
 
-	return KVM_PSCI_RET_SUCCESS;
+	return PSCI_RET_SUCCESS;
 }
 
-/**
- * kvm_psci_call - handle PSCI call if r0 value is in range
- * @vcpu: Pointer to the VCPU struct
- *
- * Handle PSCI calls from guests through traps from HVC or SMC instructions.
- * The calling convention is similar to SMC calls to the secure world where
- * the function number is placed in r0 and this function returns true if the
- * function number specified in r0 is withing the PSCI range, and false
- * otherwise.
- */
-bool kvm_psci_call(struct kvm_vcpu *vcpu)
+static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
+{
+	int i;
+	unsigned long mpidr;
+	unsigned long target_affinity;
+	unsigned long target_affinity_mask;
+	unsigned long lowest_affinity_level;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu *tmp;
+
+	target_affinity = *vcpu_reg(vcpu, 1);
+	lowest_affinity_level = *vcpu_reg(vcpu, 2);
+
+	/* Determine target affinity mask */
+	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
+	if (!target_affinity_mask)
+		return PSCI_RET_INVALID_PARAMS;
+
+	/* Ignore other bits of target affinity */
+	target_affinity &= target_affinity_mask;
+
+	/*
+	 * If one or more VCPU matching target affinity are running
+	 * then ON else OFF
+	 */
+	kvm_for_each_vcpu(i, tmp, kvm) {
+		mpidr = kvm_vcpu_get_mpidr(tmp);
+		if (((mpidr & target_affinity_mask) == target_affinity) &&
+		    !tmp->arch.pause) {
+			return PSCI_0_2_AFFINITY_LEVEL_ON;
+		}
+	}
+
+	return PSCI_0_2_AFFINITY_LEVEL_OFF;
+}
+
+static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
+{
+	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+	vcpu->run->system_event.type = type;
+	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
+}
+
+static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
+}
+
+int kvm_psci_version(struct kvm_vcpu *vcpu)
+{
+	if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+		return KVM_ARM_PSCI_0_2;
+
+	return KVM_ARM_PSCI_0_1;
+}
+
+static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
+{
+	int ret = 1;
+	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long val;
+
+	switch (psci_fn) {
+	case PSCI_0_2_FN_PSCI_VERSION:
+		/*
+		 * Bits[31:16] = Major Version = 0
+		 * Bits[15:0] = Minor Version = 2
+		 */
+		val = 2;
+		break;
+	case PSCI_0_2_FN_CPU_SUSPEND:
+	case PSCI_0_2_FN64_CPU_SUSPEND:
+		val = kvm_psci_vcpu_suspend(vcpu);
+		break;
+	case PSCI_0_2_FN_CPU_OFF:
+		kvm_psci_vcpu_off(vcpu);
+		val = PSCI_RET_SUCCESS;
+		break;
+	case PSCI_0_2_FN_CPU_ON:
+	case PSCI_0_2_FN64_CPU_ON:
+		val = kvm_psci_vcpu_on(vcpu);
+		break;
+	case PSCI_0_2_FN_AFFINITY_INFO:
+	case PSCI_0_2_FN64_AFFINITY_INFO:
+		val = kvm_psci_vcpu_affinity_info(vcpu);
+		break;
+	case PSCI_0_2_FN_MIGRATE:
+	case PSCI_0_2_FN64_MIGRATE:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+		/*
+		 * Trusted OS is MP hence does not require migration
+	         * or
+		 * Trusted OS is not present
+		 */
+		val = PSCI_0_2_TOS_MP;
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
+	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	case PSCI_0_2_FN_SYSTEM_OFF:
+		kvm_psci_system_off(vcpu);
+		/*
+		 * We should'nt be going back to guest VCPU after
+		 * receiving SYSTEM_OFF request.
+		 *
+		 * If user space accidently/deliberately resumes
+		 * guest VCPU after SYSTEM_OFF request then guest
+		 * VCPU should see internal failure from PSCI return
+		 * value. To achieve this, we preload r0 (or x0) with
+		 * PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
+	case PSCI_0_2_FN_SYSTEM_RESET:
+		kvm_psci_system_reset(vcpu);
+		/*
+		 * Same reason as SYSTEM_OFF for preloading r0 (or x0)
+		 * with PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*vcpu_reg(vcpu, 0) = val;
+	return ret;
+}
+
+static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 {
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
@@ -89,20 +275,45 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)
 	switch (psci_fn) {
 	case KVM_PSCI_FN_CPU_OFF:
 		kvm_psci_vcpu_off(vcpu);
-		val = KVM_PSCI_RET_SUCCESS;
+		val = PSCI_RET_SUCCESS;
 		break;
 	case KVM_PSCI_FN_CPU_ON:
 		val = kvm_psci_vcpu_on(vcpu);
 		break;
 	case KVM_PSCI_FN_CPU_SUSPEND:
 	case KVM_PSCI_FN_MIGRATE:
-		val = KVM_PSCI_RET_NI;
+		val = PSCI_RET_NOT_SUPPORTED;
 		break;
-
 	default:
-		return false;
+		return -EINVAL;
 	}
 
 	*vcpu_reg(vcpu, 0) = val;
-	return true;
+	return 1;
+}
+
+/**
+ * kvm_psci_call - handle PSCI call if r0 value is in range
+ * @vcpu: Pointer to the VCPU struct
+ *
+ * Handle PSCI calls from guests through traps from HVC instructions.
+ * The calling convention is similar to SMC calls to the secure world
+ * where the function number is placed in r0.
+ *
+ * This function returns: > 0 (success), 0 (success but exit to user
+ * space), and < 0 (errors)
+ *
+ * Errors:
+ * -EINVAL: Unrecognized PSCI function
+ */
+int kvm_psci_call(struct kvm_vcpu *vcpu)
+{
+	switch (kvm_psci_version(vcpu)) {
+	case KVM_ARM_PSCI_0_2:
+		return kvm_psci_0_2_call(vcpu);
+	case KVM_ARM_PSCI_0_1:
+		return kvm_psci_0_1_call(vcpu);
+	default:
+		return -EINVAL;
+	};
 }
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index b80256b554cd..f558c073c023 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -27,16 +27,21 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_coproc.h>
 
+#include <kvm/arm_arch_timer.h>
+
 /******************************************************************************
- * Cortex-A15 Reset Values
+ * Cortex-A15 and Cortex-A7 Reset Values
  */
 
-static const int a15_max_cpu_idx = 3;
-
-static struct kvm_regs a15_regs_reset = {
+static struct kvm_regs cortexa_regs_reset = {
 	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
+static const struct kvm_irq_level cortexa_vtimer_irq = {
+	{ .irq = 27 },
+	.level = 1,
+};
+
 
 /*******************************************************************************
  * Exported reset function
@@ -51,24 +56,28 @@ static struct kvm_regs a15_regs_reset = {
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_regs *cpu_reset;
+	struct kvm_regs *reset_regs;
+	const struct kvm_irq_level *cpu_vtimer_irq;
 
 	switch (vcpu->arch.target) {
+	case KVM_ARM_TARGET_CORTEX_A7:
 	case KVM_ARM_TARGET_CORTEX_A15:
-		if (vcpu->vcpu_id > a15_max_cpu_idx)
-			return -EINVAL;
-		cpu_reset = &a15_regs_reset;
+		reset_regs = &cortexa_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
+		cpu_vtimer_irq = &cortexa_vtimer_irq;
 		break;
 	default:
 		return -ENODEV;
 	}
 
 	/* Reset core registers */
-	memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs));
+	memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));
 
 	/* Reset CP15 registers */
 	kvm_reset_coprocs(vcpu);
 
+	/* Reset arch_timer context */
+	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
 	return 0;
 }
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index a8e73ed5ad5b..b1d640f78623 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault,
 		__entry->ipa			= ipa;
 	),
 
-	TP_printk("guest fault at PC %#08lx (hxfar %#08lx, "
-		  "ipa %#16llx, hsr %#08lx",
-		  __entry->vcpu_pc, __entry->hxfar,
-		  __entry->ipa, __entry->hsr)
+	TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
+		  __entry->ipa, __entry->hsr,
+		  __entry->hxfar, __entry->vcpu_pc)
 );
 
 TRACE_EVENT(kvm_irq_line,
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
deleted file mode 100644
index 17c5ac7d10ed..000000000000
--- a/arch/arm/kvm/vgic.c
+++ /dev/null
@@ -1,1499 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/cpu.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-
-#include <linux/irqchip/arm-gic.h>
-
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-
-/*
- * How the whole thing works (courtesy of Christoffer Dall):
- *
- * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
- *   something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_state vgic
- *   bitmap (this bitmap is updated by both user land ioctls and guest
- *   mmio ops, and other in-kernel peripherals such as the
- *   arch. timers) and indicate the 'wire' state.
- * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
- *   recalculated
- * - To calculate the oracle, we need info for each cpu from
- *   compute_pending_for_cpu, which considers:
- *   - PPI: dist->irq_state & dist->irq_enable
- *   - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
- *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
- *     registers, stored on each vcpu. We only keep one bit of
- *     information per interrupt, making sure that only one vcpu can
- *     accept the interrupt.
- * - The same is true when injecting an interrupt, except that we only
- *   consider a single interrupt at a time. The irq_spi_cpu array
- *   contains the target CPU for each SPI.
- *
- * The handling of level interrupts adds some extra complexity. We
- * need to track when the interrupt has been EOIed, so we can sample
- * the 'line' again. This is achieved as such:
- *
- * - When a level interrupt is moved onto a vcpu, the corresponding
- *   bit in irq_active is set. As long as this bit is set, the line
- *   will be ignored for further interrupts. The interrupt is injected
- *   into the vcpu with the GICH_LR_EOI bit set (generate a
- *   maintenance interrupt on EOI).
- * - When the interrupt is EOIed, the maintenance interrupt fires,
- *   and clears the corresponding bit in irq_active. This allow the
- *   interrupt line to be sampled again.
- */
-
-#define VGIC_ADDR_UNDEF		(-1)
-#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
-
-/* Physical address of vgic virtual cpu interface */
-static phys_addr_t vgic_vcpu_base;
-
-/* Virtual control interface base address */
-static void __iomem *vgic_vctrl_base;
-
-static struct device_node *vgic_node;
-
-#define ACCESS_READ_VALUE	(1 << 0)
-#define ACCESS_READ_RAZ		(0 << 0)
-#define ACCESS_READ_MASK(x)	((x) & (1 << 0))
-#define ACCESS_WRITE_IGNORED	(0 << 1)
-#define ACCESS_WRITE_SETBIT	(1 << 1)
-#define ACCESS_WRITE_CLEARBIT	(2 << 1)
-#define ACCESS_WRITE_VALUE	(3 << 1)
-#define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
-
-static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
-static void vgic_update_state(struct kvm *kvm);
-static void vgic_kick_vcpus(struct kvm *kvm);
-static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
-static u32 vgic_nr_lr;
-
-static unsigned int vgic_maint_irq;
-
-static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
-				int cpuid, u32 offset)
-{
-	offset >>= 2;
-	if (!offset)
-		return x->percpu[cpuid].reg;
-	else
-		return x->shared.reg + offset - 1;
-}
-
-static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
-				   int cpuid, int irq)
-{
-	if (irq < VGIC_NR_PRIVATE_IRQS)
-		return test_bit(irq, x->percpu[cpuid].reg_ul);
-
-	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
-}
-
-static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
-				    int irq, int val)
-{
-	unsigned long *reg;
-
-	if (irq < VGIC_NR_PRIVATE_IRQS) {
-		reg = x->percpu[cpuid].reg_ul;
-	} else {
-		reg =  x->shared.reg_ul;
-		irq -= VGIC_NR_PRIVATE_IRQS;
-	}
-
-	if (val)
-		set_bit(irq, reg);
-	else
-		clear_bit(irq, reg);
-}
-
-static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
-{
-	if (unlikely(cpuid >= VGIC_MAX_CPUS))
-		return NULL;
-	return x->percpu[cpuid].reg_ul;
-}
-
-static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
-{
-	return x->shared.reg_ul;
-}
-
-static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
-{
-	offset >>= 2;
-	BUG_ON(offset > (VGIC_NR_IRQS / 4));
-	if (offset < 4)
-		return x->percpu[cpuid] + offset;
-	else
-		return x->shared + offset - 8;
-}
-
-#define VGIC_CFG_LEVEL	0
-#define VGIC_CFG_EDGE	1
-
-static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int irq_val;
-
-	irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
-	return irq_val == VGIC_CFG_EDGE;
-}
-
-static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
-}
-
-static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
-}
-
-static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
-}
-
-static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
-}
-
-static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
-}
-
-static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
-}
-
-static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
-}
-
-static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
-{
-	if (irq < VGIC_NR_PRIVATE_IRQS)
-		set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
-	else
-		set_bit(irq - VGIC_NR_PRIVATE_IRQS,
-			vcpu->arch.vgic_cpu.pending_shared);
-}
-
-static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
-{
-	if (irq < VGIC_NR_PRIVATE_IRQS)
-		clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
-	else
-		clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
-			  vcpu->arch.vgic_cpu.pending_shared);
-}
-
-static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
-{
-	return *((u32 *)mmio->data) & mask;
-}
-
-static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
-{
-	*((u32 *)mmio->data) = value & mask;
-}
-
-/**
- * vgic_reg_access - access vgic register
- * @mmio:   pointer to the data describing the mmio access
- * @reg:    pointer to the virtual backing of vgic distributor data
- * @offset: least significant 2 bits used for word offset
- * @mode:   ACCESS_ mode (see defines above)
- *
- * Helper to make vgic register access easier using one of the access
- * modes defined for vgic register access
- * (read,raz,write-ignored,setbit,clearbit,write)
- */
-static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
-			    phys_addr_t offset, int mode)
-{
-	int word_offset = (offset & 3) * 8;
-	u32 mask = (1UL << (mmio->len * 8)) - 1;
-	u32 regval;
-
-	/*
-	 * Any alignment fault should have been delivered to the guest
-	 * directly (ARM ARM B3.12.7 "Prioritization of aborts").
-	 */
-
-	if (reg) {
-		regval = *reg;
-	} else {
-		BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
-		regval = 0;
-	}
-
-	if (mmio->is_write) {
-		u32 data = mmio_data_read(mmio, mask) << word_offset;
-		switch (ACCESS_WRITE_MASK(mode)) {
-		case ACCESS_WRITE_IGNORED:
-			return;
-
-		case ACCESS_WRITE_SETBIT:
-			regval |= data;
-			break;
-
-		case ACCESS_WRITE_CLEARBIT:
-			regval &= ~data;
-			break;
-
-		case ACCESS_WRITE_VALUE:
-			regval = (regval & ~(mask << word_offset)) | data;
-			break;
-		}
-		*reg = regval;
-	} else {
-		switch (ACCESS_READ_MASK(mode)) {
-		case ACCESS_READ_RAZ:
-			regval = 0;
-			/* fall through */
-
-		case ACCESS_READ_VALUE:
-			mmio_data_write(mmio, mask, regval >> word_offset);
-		}
-	}
-}
-
-static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
-			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	u32 reg;
-	u32 word_offset = offset & 3;
-
-	switch (offset & ~3) {
-	case 0:			/* CTLR */
-		reg = vcpu->kvm->arch.vgic.enabled;
-		vgic_reg_access(mmio, &reg, word_offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-		if (mmio->is_write) {
-			vcpu->kvm->arch.vgic.enabled = reg & 1;
-			vgic_update_state(vcpu->kvm);
-			return true;
-		}
-		break;
-
-	case 4:			/* TYPER */
-		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
-		reg |= (VGIC_NR_IRQS >> 5) - 1;
-		vgic_reg_access(mmio, &reg, word_offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-		break;
-
-	case 8:			/* IIDR */
-		reg = 0x4B00043B;
-		vgic_reg_access(mmio, &reg, word_offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-		break;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
-			       struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	vgic_reg_access(mmio, NULL, offset,
-			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
-	return false;
-}
-
-static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
-				       struct kvm_exit_mmio *mmio,
-				       phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
-	if (mmio->is_write) {
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
-					 struct kvm_exit_mmio *mmio,
-					 phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
-	if (mmio->is_write) {
-		if (offset < 4) /* Force SGI enabled */
-			*reg |= 0xffff;
-		vgic_retire_disabled_irqs(vcpu);
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
-					struct kvm_exit_mmio *mmio,
-					phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
-	if (mmio->is_write) {
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
-					  struct kvm_exit_mmio *mmio,
-					  phys_addr_t offset)
-{
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
-				       vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
-	if (mmio->is_write) {
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
-				     struct kvm_exit_mmio *mmio,
-				     phys_addr_t offset)
-{
-	u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
-					vcpu->vcpu_id, offset);
-	vgic_reg_access(mmio, reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-	return false;
-}
-
-#define GICD_ITARGETSR_SIZE	32
-#define GICD_CPUTARGETS_BITS	8
-#define GICD_IRQS_PER_ITARGETSR	(GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
-static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int i, c;
-	unsigned long *bmap;
-	u32 val = 0;
-
-	irq -= VGIC_NR_PRIVATE_IRQS;
-
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
-		for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
-			if (test_bit(irq + i, bmap))
-				val |= 1 << (c + i * 8);
-	}
-
-	return val;
-}
-
-static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int i, c;
-	unsigned long *bmap;
-	u32 target;
-
-	irq -= VGIC_NR_PRIVATE_IRQS;
-
-	/*
-	 * Pick the LSB in each byte. This ensures we target exactly
-	 * one vcpu per IRQ. If the byte is null, assume we target
-	 * CPU0.
-	 */
-	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
-		int shift = i * GICD_CPUTARGETS_BITS;
-		target = ffs((val >> shift) & 0xffU);
-		target = target ? (target - 1) : 0;
-		dist->irq_spi_cpu[irq + i] = target;
-		kvm_for_each_vcpu(c, vcpu, kvm) {
-			bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
-			if (c == target)
-				set_bit(irq + i, bmap);
-			else
-				clear_bit(irq + i, bmap);
-		}
-	}
-}
-
-static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
-				   struct kvm_exit_mmio *mmio,
-				   phys_addr_t offset)
-{
-	u32 reg;
-
-	/* We treat the banked interrupts targets as read-only */
-	if (offset < 32) {
-		u32 roreg = 1 << vcpu->vcpu_id;
-		roreg |= roreg << 8;
-		roreg |= roreg << 16;
-
-		vgic_reg_access(mmio, &roreg, offset,
-				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-		return false;
-	}
-
-	reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
-	vgic_reg_access(mmio, &reg, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-	if (mmio->is_write) {
-		vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-static u32 vgic_cfg_expand(u16 val)
-{
-	u32 res = 0;
-	int i;
-
-	/*
-	 * Turn a 16bit value like abcd...mnop into a 32bit word
-	 * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
-	 */
-	for (i = 0; i < 16; i++)
-		res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
-
-	return res;
-}
-
-static u16 vgic_cfg_compress(u32 val)
-{
-	u16 res = 0;
-	int i;
-
-	/*
-	 * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
-	 * abcd...mnop which is what we really care about.
-	 */
-	for (i = 0; i < 16; i++)
-		res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
-
-	return res;
-}
-
-/*
- * The distributor uses 2 bits per IRQ for the CFG register, but the
- * LSB is always 0. As such, we only keep the upper bit, and use the
- * two above functions to compress/expand the bits
- */
-static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
-				struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	u32 val;
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
-				       vcpu->vcpu_id, offset >> 1);
-	if (offset & 2)
-		val = *reg >> 16;
-	else
-		val = *reg & 0xffff;
-
-	val = vgic_cfg_expand(val);
-	vgic_reg_access(mmio, &val, offset,
-			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
-	if (mmio->is_write) {
-		if (offset < 4) {
-			*reg = ~0U; /* Force PPIs/SGIs to 1 */
-			return false;
-		}
-
-		val = vgic_cfg_compress(val);
-		if (offset & 2) {
-			*reg &= 0xffff;
-			*reg |= val << 16;
-		} else {
-			*reg &= 0xffff << 16;
-			*reg |= val;
-		}
-	}
-
-	return false;
-}
-
-static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
-				struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
-	u32 reg;
-	vgic_reg_access(mmio, &reg, offset,
-			ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
-	if (mmio->is_write) {
-		vgic_dispatch_sgi(vcpu, reg);
-		vgic_update_state(vcpu->kvm);
-		return true;
-	}
-
-	return false;
-}
-
-/*
- * I would have liked to use the kvm_bus_io_*() API instead, but it
- * cannot cope with banked registers (only the VM pointer is passed
- * around, and we need the vcpu). One of these days, someone please
- * fix it!
- */
-struct mmio_range {
-	phys_addr_t base;
-	unsigned long len;
-	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
-			    phys_addr_t offset);
-};
-
-static const struct mmio_range vgic_ranges[] = {
-	{
-		.base		= GIC_DIST_CTRL,
-		.len		= 12,
-		.handle_mmio	= handle_mmio_misc,
-	},
-	{
-		.base		= GIC_DIST_IGROUP,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GIC_DIST_ENABLE_SET,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_set_enable_reg,
-	},
-	{
-		.base		= GIC_DIST_ENABLE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_clear_enable_reg,
-	},
-	{
-		.base		= GIC_DIST_PENDING_SET,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_set_pending_reg,
-	},
-	{
-		.base		= GIC_DIST_PENDING_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_clear_pending_reg,
-	},
-	{
-		.base		= GIC_DIST_ACTIVE_SET,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GIC_DIST_ACTIVE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
-		.handle_mmio	= handle_mmio_raz_wi,
-	},
-	{
-		.base		= GIC_DIST_PRI,
-		.len		= VGIC_NR_IRQS,
-		.handle_mmio	= handle_mmio_priority_reg,
-	},
-	{
-		.base		= GIC_DIST_TARGET,
-		.len		= VGIC_NR_IRQS,
-		.handle_mmio	= handle_mmio_target_reg,
-	},
-	{
-		.base		= GIC_DIST_CONFIG,
-		.len		= VGIC_NR_IRQS / 4,
-		.handle_mmio	= handle_mmio_cfg_reg,
-	},
-	{
-		.base		= GIC_DIST_SOFTINT,
-		.len		= 4,
-		.handle_mmio	= handle_mmio_sgi_reg,
-	},
-	{}
-};
-
-static const
-struct mmio_range *find_matching_range(const struct mmio_range *ranges,
-				       struct kvm_exit_mmio *mmio,
-				       phys_addr_t base)
-{
-	const struct mmio_range *r = ranges;
-	phys_addr_t addr = mmio->phys_addr - base;
-
-	while (r->len) {
-		if (addr >= r->base &&
-		    (addr + mmio->len) <= (r->base + r->len))
-			return r;
-		r++;
-	}
-
-	return NULL;
-}
-
-/**
- * vgic_handle_mmio - handle an in-kernel MMIO access
- * @vcpu:	pointer to the vcpu performing the access
- * @run:	pointer to the kvm_run structure
- * @mmio:	pointer to the data describing the access
- *
- * returns true if the MMIO access has been performed in kernel space,
- * and false if it needs to be emulated in user space.
- */
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		      struct kvm_exit_mmio *mmio)
-{
-	const struct mmio_range *range;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	unsigned long base = dist->vgic_dist_base;
-	bool updated_state;
-	unsigned long offset;
-
-	if (!irqchip_in_kernel(vcpu->kvm) ||
-	    mmio->phys_addr < base ||
-	    (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
-		return false;
-
-	/* We don't support ldrd / strd or ldm / stm to the emulated vgic */
-	if (mmio->len > 4) {
-		kvm_inject_dabt(vcpu, mmio->phys_addr);
-		return true;
-	}
-
-	range = find_matching_range(vgic_ranges, mmio, base);
-	if (unlikely(!range || !range->handle_mmio)) {
-		pr_warn("Unhandled access %d %08llx %d\n",
-			mmio->is_write, mmio->phys_addr, mmio->len);
-		return false;
-	}
-
-	spin_lock(&vcpu->kvm->arch.vgic.lock);
-	offset = mmio->phys_addr - range->base - base;
-	updated_state = range->handle_mmio(vcpu, mmio, offset);
-	spin_unlock(&vcpu->kvm->arch.vgic.lock);
-	kvm_prepare_mmio(run, mmio);
-	kvm_handle_mmio_return(vcpu, run);
-
-	if (updated_state)
-		vgic_kick_vcpus(vcpu->kvm);
-
-	return true;
-}
-
-static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
-{
-	struct kvm *kvm = vcpu->kvm;
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	int nrcpus = atomic_read(&kvm->online_vcpus);
-	u8 target_cpus;
-	int sgi, mode, c, vcpu_id;
-
-	vcpu_id = vcpu->vcpu_id;
-
-	sgi = reg & 0xf;
-	target_cpus = (reg >> 16) & 0xff;
-	mode = (reg >> 24) & 3;
-
-	switch (mode) {
-	case 0:
-		if (!target_cpus)
-			return;
-
-	case 1:
-		target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
-		break;
-
-	case 2:
-		target_cpus = 1 << vcpu_id;
-		break;
-	}
-
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		if (target_cpus & 1) {
-			/* Flag the SGI as pending */
-			vgic_dist_irq_set(vcpu, sgi);
-			dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
-			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
-		}
-
-		target_cpus >>= 1;
-	}
-}
-
-static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
-	unsigned long pending_private, pending_shared;
-	int vcpu_id;
-
-	vcpu_id = vcpu->vcpu_id;
-	pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
-	pend_shared = vcpu->arch.vgic_cpu.pending_shared;
-
-	pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
-	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
-	bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
-
-	pending = vgic_bitmap_get_shared_map(&dist->irq_state);
-	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
-	bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
-	bitmap_and(pend_shared, pend_shared,
-		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
-		   VGIC_NR_SHARED_IRQS);
-
-	pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
-	pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
-	return (pending_private < VGIC_NR_PRIVATE_IRQS ||
-		pending_shared < VGIC_NR_SHARED_IRQS);
-}
-
-/*
- * Update the interrupt state and determine which CPUs have pending
- * interrupts. Must be called with distributor lock held.
- */
-static void vgic_update_state(struct kvm *kvm)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int c;
-
-	if (!dist->enabled) {
-		set_bit(0, &dist->irq_pending_on_cpu);
-		return;
-	}
-
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		if (compute_pending_for_cpu(vcpu)) {
-			pr_debug("CPU%d has pending interrupts\n", c);
-			set_bit(c, &dist->irq_pending_on_cpu);
-		}
-	}
-}
-
-#define LR_CPUID(lr)	\
-	(((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
-#define MK_LR_PEND(src, irq)	\
-	(GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
-
-/*
- * An interrupt may have been disabled after being made pending on the
- * CPU interface (the classic case is a timer running while we're
- * rebooting the guest - the interrupt would kick as soon as the CPU
- * interface gets enabled, with deadly consequences).
- *
- * The solution is to examine already active LRs, and check the
- * interrupt is still enabled. If not, just retire it.
- */
-static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	int lr;
-
-	for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
-		if (!vgic_irq_is_enabled(vcpu, irq)) {
-			vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-			clear_bit(lr, vgic_cpu->lr_used);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE;
-			if (vgic_irq_is_active(vcpu, irq))
-				vgic_irq_clear_active(vcpu, irq);
-		}
-	}
-}
-
-/*
- * Queue an interrupt to a CPU virtual interface. Return true on success,
- * or false if it wasn't possible to queue it.
- */
-static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	int lr;
-
-	/* Sanitize the input... */
-	BUG_ON(sgi_source_id & ~7);
-	BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
-	BUG_ON(irq >= VGIC_NR_IRQS);
-
-	kvm_debug("Queue IRQ%d\n", irq);
-
-	lr = vgic_cpu->vgic_irq_lr_map[irq];
-
-	/* Do we have an active interrupt for the same CPUID? */
-	if (lr != LR_EMPTY &&
-	    (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) {
-		kvm_debug("LR%d piggyback for IRQ%d %x\n",
-			  lr, irq, vgic_cpu->vgic_lr[lr]);
-		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-		vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
-		return true;
-	}
-
-	/* Try to use another LR for this interrupt */
-	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-			       vgic_cpu->nr_lr);
-	if (lr >= vgic_cpu->nr_lr)
-		return false;
-
-	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-	vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
-	vgic_cpu->vgic_irq_lr_map[irq] = lr;
-	set_bit(lr, vgic_cpu->lr_used);
-
-	if (!vgic_irq_is_edge(vcpu, irq))
-		vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
-
-	return true;
-}
-
-static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	unsigned long sources;
-	int vcpu_id = vcpu->vcpu_id;
-	int c;
-
-	sources = dist->irq_sgi_sources[vcpu_id][irq];
-
-	for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
-		if (vgic_queue_irq(vcpu, c, irq))
-			clear_bit(c, &sources);
-	}
-
-	dist->irq_sgi_sources[vcpu_id][irq] = sources;
-
-	/*
-	 * If the sources bitmap has been cleared it means that we
-	 * could queue all the SGIs onto link registers (see the
-	 * clear_bit above), and therefore we are done with them in
-	 * our emulated gic and can get rid of them.
-	 */
-	if (!sources) {
-		vgic_dist_irq_clear(vcpu, irq);
-		vgic_cpu_irq_clear(vcpu, irq);
-		return true;
-	}
-
-	return false;
-}
-
-static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
-{
-	if (vgic_irq_is_active(vcpu, irq))
-		return true; /* level interrupt, already queued */
-
-	if (vgic_queue_irq(vcpu, 0, irq)) {
-		if (vgic_irq_is_edge(vcpu, irq)) {
-			vgic_dist_irq_clear(vcpu, irq);
-			vgic_cpu_irq_clear(vcpu, irq);
-		} else {
-			vgic_irq_set_active(vcpu, irq);
-		}
-
-		return true;
-	}
-
-	return false;
-}
-
-/*
- * Fill the list registers with pending interrupts before running the
- * guest.
- */
-static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int i, vcpu_id;
-	int overflow = 0;
-
-	vcpu_id = vcpu->vcpu_id;
-
-	/*
-	 * We may not have any pending interrupt, or the interrupts
-	 * may have been serviced from another vcpu. In all cases,
-	 * move along.
-	 */
-	if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
-		pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
-		goto epilog;
-	}
-
-	/* SGIs */
-	for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
-		if (!vgic_queue_sgi(vcpu, i))
-			overflow = 1;
-	}
-
-	/* PPIs */
-	for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
-		if (!vgic_queue_hwirq(vcpu, i))
-			overflow = 1;
-	}
-
-	/* SPIs */
-	for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
-		if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
-			overflow = 1;
-	}
-
-epilog:
-	if (overflow) {
-		vgic_cpu->vgic_hcr |= GICH_HCR_UIE;
-	} else {
-		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
-		/*
-		 * We're about to run this VCPU, and we've consumed
-		 * everything the distributor had in store for
-		 * us. Claim we don't have anything pending. We'll
-		 * adjust that if needed while exiting.
-		 */
-		clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
-	}
-}
-
-static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	bool level_pending = false;
-
-	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
-
-	if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
-		/*
-		 * Some level interrupts have been EOIed. Clear their
-		 * active bit.
-		 */
-		int lr, irq;
-
-		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
-				 vgic_cpu->nr_lr) {
-			irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
-			vgic_irq_clear_active(vcpu, irq);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI;
-
-			/* Any additional pending interrupt? */
-			if (vgic_dist_irq_is_pending(vcpu, irq)) {
-				vgic_cpu_irq_set(vcpu, irq);
-				level_pending = true;
-			} else {
-				vgic_cpu_irq_clear(vcpu, irq);
-			}
-
-			/*
-			 * Despite being EOIed, the LR may not have
-			 * been marked as empty.
-			 */
-			set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
-		}
-	}
-
-	if (vgic_cpu->vgic_misr & GICH_MISR_U)
-		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
-
-	return level_pending;
-}
-
-/*
- * Sync back the VGIC state after a guest run. The distributor lock is
- * needed so we don't get preempted in the middle of the state processing.
- */
-static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int lr, pending;
-	bool level_pending;
-
-	level_pending = vgic_process_maintenance(vcpu);
-
-	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
-			 vgic_cpu->nr_lr) {
-		int irq;
-
-		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
-			continue;
-
-		irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
-		BUG_ON(irq >= VGIC_NR_IRQS);
-		vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-	}
-
-	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
-				      vgic_cpu->nr_lr);
-	if (level_pending || pending < vgic_cpu->nr_lr)
-		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
-}
-
-void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return;
-
-	spin_lock(&dist->lock);
-	__kvm_vgic_flush_hwstate(vcpu);
-	spin_unlock(&dist->lock);
-}
-
-void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return;
-
-	spin_lock(&dist->lock);
-	__kvm_vgic_sync_hwstate(vcpu);
-	spin_unlock(&dist->lock);
-}
-
-int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return 0;
-
-	return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
-}
-
-static void vgic_kick_vcpus(struct kvm *kvm)
-{
-	struct kvm_vcpu *vcpu;
-	int c;
-
-	/*
-	 * We've injected an interrupt, time to find out who deserves
-	 * a good kick...
-	 */
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		if (kvm_vgic_vcpu_pending_irq(vcpu))
-			kvm_vcpu_kick(vcpu);
-	}
-}
-
-static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
-{
-	int is_edge = vgic_irq_is_edge(vcpu, irq);
-	int state = vgic_dist_irq_is_pending(vcpu, irq);
-
-	/*
-	 * Only inject an interrupt if:
-	 * - edge triggered and we have a rising edge
-	 * - level triggered and we change level
-	 */
-	if (is_edge)
-		return level > state;
-	else
-		return level != state;
-}
-
-static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
-				  unsigned int irq_num, bool level)
-{
-	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int is_edge, is_level;
-	int enabled;
-	bool ret = true;
-
-	spin_lock(&dist->lock);
-
-	vcpu = kvm_get_vcpu(kvm, cpuid);
-	is_edge = vgic_irq_is_edge(vcpu, irq_num);
-	is_level = !is_edge;
-
-	if (!vgic_validate_injection(vcpu, irq_num, level)) {
-		ret = false;
-		goto out;
-	}
-
-	if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
-		cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
-		vcpu = kvm_get_vcpu(kvm, cpuid);
-	}
-
-	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
-
-	if (level)
-		vgic_dist_irq_set(vcpu, irq_num);
-	else
-		vgic_dist_irq_clear(vcpu, irq_num);
-
-	enabled = vgic_irq_is_enabled(vcpu, irq_num);
-
-	if (!enabled) {
-		ret = false;
-		goto out;
-	}
-
-	if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
-		/*
-		 * Level interrupt in progress, will be picked up
-		 * when EOId.
-		 */
-		ret = false;
-		goto out;
-	}
-
-	if (level) {
-		vgic_cpu_irq_set(vcpu, irq_num);
-		set_bit(cpuid, &dist->irq_pending_on_cpu);
-	}
-
-out:
-	spin_unlock(&dist->lock);
-
-	return ret;
-}
-
-/**
- * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
- * @kvm:     The VM structure pointer
- * @cpuid:   The CPU for PPIs
- * @irq_num: The IRQ number that is assigned to the device
- * @level:   Edge-triggered:  true:  to trigger the interrupt
- *			      false: to ignore the call
- *	     Level-sensitive  true:  activates an interrupt
- *			      false: deactivates an interrupt
- *
- * The GIC is not concerned with devices being active-LOW or active-HIGH for
- * level-sensitive interrupts.  You can think of the level parameter as 1
- * being HIGH and 0 being LOW and all devices being active-HIGH.
- */
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
-			bool level)
-{
-	if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
-		vgic_kick_vcpus(kvm);
-
-	return 0;
-}
-
-static irqreturn_t vgic_maintenance_handler(int irq, void *data)
-{
-	/*
-	 * We cannot rely on the vgic maintenance interrupt to be
-	 * delivered synchronously. This means we can only use it to
-	 * exit the VM, and we perform the handling of EOIed
-	 * interrupts on the exit path (see vgic_process_maintenance).
-	 */
-	return IRQ_HANDLED;
-}
-
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int i;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return 0;
-
-	if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
-		return -EBUSY;
-
-	for (i = 0; i < VGIC_NR_IRQS; i++) {
-		if (i < VGIC_NR_PPIS)
-			vgic_bitmap_set_irq_val(&dist->irq_enabled,
-						vcpu->vcpu_id, i, 1);
-		if (i < VGIC_NR_PRIVATE_IRQS)
-			vgic_bitmap_set_irq_val(&dist->irq_cfg,
-						vcpu->vcpu_id, i, VGIC_CFG_EDGE);
-
-		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
-	}
-
-	/*
-	 * By forcing VMCR to zero, the GIC will restore the binary
-	 * points to their reset values. Anything else resets to zero
-	 * anyway.
-	 */
-	vgic_cpu->vgic_vmcr = 0;
-
-	vgic_cpu->nr_lr = vgic_nr_lr;
-	vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
-
-	return 0;
-}
-
-static void vgic_init_maintenance_interrupt(void *info)
-{
-	enable_percpu_irq(vgic_maint_irq, 0);
-}
-
-static int vgic_cpu_notify(struct notifier_block *self,
-			   unsigned long action, void *cpu)
-{
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		vgic_init_maintenance_interrupt(NULL);
-		break;
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic_maint_irq);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block vgic_cpu_nb = {
-	.notifier_call = vgic_cpu_notify,
-};
-
-int kvm_vgic_hyp_init(void)
-{
-	int ret;
-	struct resource vctrl_res;
-	struct resource vcpu_res;
-
-	vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
-	if (!vgic_node) {
-		kvm_err("error: no compatible vgic node in DT\n");
-		return -ENODEV;
-	}
-
-	vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
-	if (!vgic_maint_irq) {
-		kvm_err("error getting vgic maintenance irq from DT\n");
-		ret = -ENXIO;
-		goto out;
-	}
-
-	ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
-				 "vgic", kvm_get_running_vcpus());
-	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
-		goto out;
-	}
-
-	ret = register_cpu_notifier(&vgic_cpu_nb);
-	if (ret) {
-		kvm_err("Cannot register vgic CPU notifier\n");
-		goto out_free_irq;
-	}
-
-	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
-	if (ret) {
-		kvm_err("Cannot obtain VCTRL resource\n");
-		goto out_free_irq;
-	}
-
-	vgic_vctrl_base = of_iomap(vgic_node, 2);
-	if (!vgic_vctrl_base) {
-		kvm_err("Cannot ioremap VCTRL\n");
-		ret = -ENOMEM;
-		goto out_free_irq;
-	}
-
-	vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
-	vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
-
-	ret = create_hyp_io_mappings(vgic_vctrl_base,
-				     vgic_vctrl_base + resource_size(&vctrl_res),
-				     vctrl_res.start);
-	if (ret) {
-		kvm_err("Cannot map VCTRL into hyp\n");
-		goto out_unmap;
-	}
-
-	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-		 vctrl_res.start, vgic_maint_irq);
-	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
-	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
-		kvm_err("Cannot obtain VCPU resource\n");
-		ret = -ENXIO;
-		goto out_unmap;
-	}
-	vgic_vcpu_base = vcpu_res.start;
-
-	goto out;
-
-out_unmap:
-	iounmap(vgic_vctrl_base);
-out_free_irq:
-	free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
-out:
-	of_node_put(vgic_node);
-	return ret;
-}
-
-int kvm_vgic_init(struct kvm *kvm)
-{
-	int ret = 0, i;
-
-	mutex_lock(&kvm->lock);
-
-	if (vgic_initialized(kvm))
-		goto out;
-
-	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
-	    IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
-		kvm_err("Need to set vgic cpu and dist addresses first\n");
-		ret = -ENXIO;
-		goto out;
-	}
-
-	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
-				    vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
-	if (ret) {
-		kvm_err("Unable to remap VGIC CPU to VCPU\n");
-		goto out;
-	}
-
-	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
-		vgic_set_target_reg(kvm, 0, i);
-
-	kvm_timer_init(kvm);
-	kvm->arch.vgic.ready = true;
-out:
-	mutex_unlock(&kvm->lock);
-	return ret;
-}
-
-int kvm_vgic_create(struct kvm *kvm)
-{
-	int ret = 0;
-
-	mutex_lock(&kvm->lock);
-
-	if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
-		ret = -EEXIST;
-		goto out;
-	}
-
-	spin_lock_init(&kvm->arch.vgic.lock);
-	kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
-	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
-	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
-
-out:
-	mutex_unlock(&kvm->lock);
-	return ret;
-}
-
-static bool vgic_ioaddr_overlap(struct kvm *kvm)
-{
-	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
-	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
-
-	if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
-		return 0;
-	if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
-	    (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
-		return -EBUSY;
-	return 0;
-}
-
-static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
-			      phys_addr_t addr, phys_addr_t size)
-{
-	int ret;
-
-	if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
-		return -EEXIST;
-	if (addr + size < addr)
-		return -EINVAL;
-
-	ret = vgic_ioaddr_overlap(kvm);
-	if (ret)
-		return ret;
-	*ioaddr = addr;
-	return ret;
-}
-
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
-	int r = 0;
-	struct vgic_dist *vgic = &kvm->arch.vgic;
-
-	if (addr & ~KVM_PHYS_MASK)
-		return -E2BIG;
-
-	if (addr & (SZ_4K - 1))
-		return -EINVAL;
-
-	mutex_lock(&kvm->lock);
-	switch (type) {
-	case KVM_VGIC_V2_ADDR_TYPE_DIST:
-		r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
-				       addr, KVM_VGIC_V2_DIST_SIZE);
-		break;
-	case KVM_VGIC_V2_ADDR_TYPE_CPU:
-		r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
-				       addr, KVM_VGIC_V2_CPU_SIZE);
-		break;
-	default:
-		r = -ENODEV;
-	}
-
-	mutex_unlock(&kvm->lock);
-	return r;
-}
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8fb007..3bc8eb811a73 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -197,24 +197,24 @@
 
 12:	PLD(	pld	[r1, #124]		)
 13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, pull #\pull
+		mov	r3, lr, lspull #\pull
 		subs	r2, r2, #32
 		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
+		orr	r3, r3, r4, lspush #\push
+		mov	r4, r4, lspull #\pull
+		orr	r4, r4, r5, lspush #\push
+		mov	r5, r5, lspull #\pull
+		orr	r5, r5, r6, lspush #\push
+		mov	r6, r6, lspull #\pull
+		orr	r6, r6, r7, lspush #\push
+		mov	r7, r7, lspull #\pull
+		orr	r7, r7, r8, lspush #\push
+		mov	r8, r8, lspull #\pull
+		orr	r8, r8, r9, lspush #\push
+		mov	r9, r9, lspull #\pull
+		orr	r9, r9, ip, lspush #\push
+		mov	ip, ip, lspull #\pull
+		orr	ip, ip, lr, lspush #\push
 		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -225,10 +225,10 @@
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, lspull #\pull
 		ldr1w	r1, lr, abort=21f
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, lspush #\push
 		str1w	r0, r3, abort=21f
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d620a5f22a09..d6e742d24007 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -141,7 +141,7 @@ FN_ENTRY
 		tst	len, #2
 		mov	r5, r4, get_byte_0
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
@@ -171,23 +171,23 @@ FN_ENTRY
 		cmp	ip, #2
 		beq	.Lsrc2_aligned
 		bhi	.Lsrc3_aligned
-		mov	r4, r5, pull #8		@ C = 0
+		mov	r4, r5, lspull #8		@ C = 0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		mov	r7, r7, pull #8
-		orr	r7, r7, r8, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
+		mov	r7, r7, lspull #8
+		orr	r7, r7, r8, lspush #24
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #8
+		mov	r4, r8, lspull #8
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -196,50 +196,50 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #8
+		mov	r4, r6, lspull #8
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #24
+		orr	r4, r4, r5, lspush #24
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #8
+		mov	r4, r5, lspull #8
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
 		tst	len, #2
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_2
 		b	.Lexit
 
-.Lsrc2_aligned:	mov	r4, r5, pull #16
+.Lsrc2_aligned:	mov	r4, r5, lspull #16
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		mov	r7, r7, pull #16
-		orr	r7, r7, r8, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
+		mov	r7, r7, lspull #16
+		orr	r7, r7, r8, lspush #16
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #16
+		mov	r4, r8, lspull #16
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -248,20 +248,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #16
+		mov	r4, r6, lspull #16
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #16
+		orr	r4, r4, r5, lspush #16
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #16
+		mov	r4, r5, lspull #16
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -276,24 +276,24 @@ FN_ENTRY
 		load1b	r5
 		b	.Lexit
 
-.Lsrc3_aligned:	mov	r4, r5, pull #24
+.Lsrc3_aligned:	mov	r4, r5, lspull #24
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		mov	r7, r7, pull #24
-		orr	r7, r7, r8, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
+		mov	r7, r7, lspull #24
+		orr	r7, r7, r8, lspush #8
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #24
+		mov	r4, r8, lspull #24
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -302,20 +302,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #24
+		mov	r4, r6, lspull #24
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #8
+		orr	r4, r4, r5, lspush #8
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #24
+		mov	r4, r5, lspull #24
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -326,7 +326,7 @@ FN_ENTRY
 		load1l	r4
 		mov	r5, r4, get_byte_0
 		strb	r5, [dst], #1
-		adcs	sum, sum, r4, push #24
+		adcs	sum, sum, r4, lspush #24
 		mov	r5, r4, get_byte_1
 		b	.Lexit
 FN_EXIT
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 5fb97e7f9f4b..7a7430950c79 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -47,25 +47,25 @@ ENTRY(__raw_readsl)
 		strb	ip, [r1], #1
 
 4:		subs	r2, r2, #1
-		mov	ip, r3, pull #24
+		mov	ip, r3, lspull #24
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #8
+		orrne	ip, ip, r3, lspush #8
 		strne	ip, [r1], #4
 		bne	4b
 		b	8f
 
 5:		subs	r2, r2, #1
-		mov	ip, r3, pull #16
+		mov	ip, r3, lspull #16
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #16
+		orrne	ip, ip, r3, lspush #16
 		strne	ip, [r1], #4
 		bne	5b
 		b	7f
 
 6:		subs	r2, r2, #1
-		mov	ip, r3, pull #8
+		mov	ip, r3, lspull #8
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #24
+		orrne	ip, ip, r3, lspush #24
 		strne	ip, [r1], #4
 		bne	6b
 
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index 8d3b7813725c..d0d104a0dd11 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -41,26 +41,26 @@ ENTRY(__raw_writesl)
 		blt	5f
 		bgt	6f
 
-4:		mov	ip, r3, pull #16
+4:		mov	ip, r3, lspull #16
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #16
+		orr	ip, ip, r3, lspush #16
 		str	ip, [r0]
 		bne	4b
 		mov	pc, lr
 
-5:		mov	ip, r3, pull #8
+5:		mov	ip, r3, lspull #8
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #24
+		orr	ip, ip, r3, lspush #24
 		str	ip, [r0]
 		bne	5b
 		mov	pc, lr
 
-6:		mov	ip, r3, pull #24
+6:		mov	ip, r3, lspull #24
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #8
+		orr	ip, ip, r3, lspush #8
 		str	ip, [r0]
 		bne	6b
 		mov	pc, lr
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 938fc14f962d..d1fc0c0c342c 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -147,24 +147,24 @@ ENTRY(memmove)
 
 12:	PLD(	pld	[r1, #-128]		)
 13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
+		mov     lr, r3, lspush #\push
 		subs    r2, r2, #32
 		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
+		orr     lr, lr, ip, lspull #\pull
+		mov     ip, ip, lspush #\push
+		orr     ip, ip, r9, lspull #\pull
+		mov     r9, r9, lspush #\push
+		orr     r9, r9, r8, lspull #\pull
+		mov     r8, r8, lspush #\push
+		orr     r8, r8, r7, lspull #\pull
+		mov     r7, r7, lspush #\push
+		orr     r7, r7, r6, lspull #\pull
+		mov     r6, r6, lspush #\push
+		orr     r6, r6, r5, lspull #\pull
+		mov     r5, r5, lspush #\push
+		orr     r5, r5, r4, lspull #\pull
+		mov     r4, r4, lspush #\push
+		orr     r4, r4, r3, lspull #\pull
 		stmdb   r0!, {r4 - r9, ip, lr}
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -175,10 +175,10 @@ ENTRY(memmove)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov     lr, r3, push #\push
+15:		mov     lr, r3, lspush #\push
 		ldr	r3, [r1, #-4]!
 		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
+		orr	lr, lr, r3, lspull #\pull
 		str	lr, [r0, #-4]!
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index 5c908b1cb8ed..e50520904b76 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -117,9 +117,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -131,30 +131,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_1rem8lp
 
-.Lc2u_1cpy8lp:	mov	r3, r7, pull #8
+.Lc2u_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_1cpy8lp
 
 .Lc2u_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_1fupi
@@ -172,9 +172,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -186,30 +186,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_2rem8lp
 
-.Lc2u_2cpy8lp:	mov	r3, r7, pull #16
+.Lc2u_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_2cpy8lp
 
 .Lc2u_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_2fupi
@@ -227,9 +227,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -241,30 +241,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_3rem8lp
 
-.Lc2u_3cpy8lp:	mov	r3, r7, pull #24
+.Lc2u_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_3cpy8lp
 
 .Lc2u_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_3fupi
@@ -382,9 +382,9 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 .Lcfu_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -396,30 +396,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lcfu_1rem8lp
 
-.Lcfu_1cpy8lp:	mov	r3, r7, pull #8
+.Lcfu_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_1cpy8lp
 
 .Lcfu_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_1fupi
@@ -437,9 +437,9 @@ USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
 .Lcfu_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -452,30 +452,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		blt	.Lcfu_2rem8lp
 
 
-.Lcfu_2cpy8lp:	mov	r3, r7, pull #16
+.Lcfu_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_2cpy8lp
 
 .Lcfu_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_2fupi
@@ -493,9 +493,9 @@ USER(	TUSER(	ldrgtb) r3, [r1], #0)			@ May fault
 .Lcfu_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -507,30 +507,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lcfu_3rem8lp
 
-.Lcfu_3cpy8lp:	mov	r3, r7, pull #24
+.Lcfu_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}
 		subs	ip, ip, #16
 		bpl	.Lcfu_3cpy8lp
 
 .Lcfu_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_3fupi
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index dff4ddc5ef81..139e42da25f0 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -75,6 +75,7 @@ static struct davinci_nand_pdata davinci_nand_data = {
 	.parts			= davinci_nand_partitions,
 	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
 	.ecc_mode		= NAND_ECC_HW_SYNDROME,
+	.ecc_bits		= 4,
 	.bbt_options		= NAND_BBT_USE_FLASH,
 };
 
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index a33686a6fbb2..fa4bfaf952d8 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -153,6 +153,7 @@ static struct davinci_nand_pdata davinci_evm_nandflash_data = {
 	.parts		= davinci_evm_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(davinci_evm_nandflash_partition),
 	.ecc_mode	= NAND_ECC_HW,
+	.ecc_bits	= 1,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 	.timing		= &davinci_evm_nandflash_timing,
 };
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index fbb8e5ab1dc1..0c005e876cac 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -90,6 +90,7 @@ static struct davinci_nand_pdata davinci_nand_data = {
 	.parts			= davinci_nand_partitions,
 	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
 	.ecc_mode		= NAND_ECC_HW,
+	.ecc_bits		= 1,
 	.options		= 0,
 };
 
diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index 2bc112adf565..808233b60e3d 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -88,6 +88,7 @@ static struct davinci_nand_pdata davinci_ntosd2_nandflash_data = {
 	.parts		= davinci_ntosd2_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(davinci_ntosd2_nandflash_partition),
 	.ecc_mode	= NAND_ECC_HW,
+	.ecc_bits	= 1,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 };
 
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index a7cd2cf5e08d..3490a24f969e 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -276,8 +276,6 @@ int __init dc21285_setup(int nr, struct pci_sys_data *sys)
 
 	sys->mem_offset  = DC21285_PCI_MEM;
 
-	pci_ioremap_io(0, DC21285_PCI_IO);
-
 	pci_add_resource_offset(&sys->resources, &res[0], sys->mem_offset);
 	pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset);
 
diff --git a/arch/arm/mach-s3c24xx/clock-s3c2410.c b/arch/arm/mach-s3c24xx/clock-s3c2410.c
index 34fffdf6fc1d..564553694b54 100644
--- a/arch/arm/mach-s3c24xx/clock-s3c2410.c
+++ b/arch/arm/mach-s3c24xx/clock-s3c2410.c
@@ -119,66 +119,101 @@ static struct clk init_clocks_off[] = {
 	}
 };
 
-static struct clk init_clocks[] = {
-	{
-		.name		= "lcd",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_LCDC,
-	}, {
-		.name		= "gpio",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_GPIO,
-	}, {
-		.name		= "usb-host",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_USBH,
-	}, {
-		.name		= "usb-device",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_USBD,
-	}, {
-		.name		= "timers",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_PWMT,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.0",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART0,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.1",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART1,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.2",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART2,
-	}, {
-		.name		= "rtc",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_RTC,
-	}, {
-		.name		= "watchdog",
-		.parent		= &clk_p,
-		.ctrlbit	= 0,
-	}, {
-		.name		= "usb-bus-host",
-		.parent		= &clk_usb_bus,
-	}, {
-		.name		= "usb-bus-gadget",
-		.parent		= &clk_usb_bus,
-	},
+static struct clk clk_lcd = {
+	.name		= "lcd",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_LCDC,
+};
+
+static struct clk clk_gpio = {
+	.name		= "gpio",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_GPIO,
+};
+
+static struct clk clk_usb_host = {
+	.name		= "usb-host",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_USBH,
+};
+
+static struct clk clk_usb_device = {
+	.name		= "usb-device",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_USBD,
+};
+
+static struct clk clk_timers = {
+	.name		= "timers",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_PWMT,
+};
+
+struct clk s3c24xx_clk_uart0 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.0",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART0,
+};
+
+struct clk s3c24xx_clk_uart1 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.1",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART1,
+};
+
+struct clk s3c24xx_clk_uart2 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.2",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART2,
+};
+
+static struct clk clk_rtc = {
+	.name		= "rtc",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_RTC,
+};
+
+static struct clk clk_watchdog = {
+	.name		= "watchdog",
+	.parent		= &clk_p,
+	.ctrlbit	= 0,
+};
+
+static struct clk clk_usb_bus_host = {
+	.name		= "usb-bus-host",
+	.parent		= &clk_usb_bus,
+};
+
+static struct clk clk_usb_bus_gadget = {
+	.name		= "usb-bus-gadget",
+	.parent		= &clk_usb_bus,
+};
+
+static struct clk *init_clocks[] = {
+	&clk_lcd,
+	&clk_gpio,
+	&clk_usb_host,
+	&clk_usb_device,
+	&clk_timers,
+	&s3c24xx_clk_uart0,
+	&s3c24xx_clk_uart1,
+	&s3c24xx_clk_uart2,
+	&clk_rtc,
+	&clk_watchdog,
+	&clk_usb_bus_host,
+	&clk_usb_bus_gadget,
 };
 
 /* s3c2410_baseclk_add()
@@ -195,7 +230,6 @@ int __init s3c2410_baseclk_add(void)
 {
 	unsigned long clkslow = __raw_readl(S3C2410_CLKSLOW);
 	unsigned long clkcon  = __raw_readl(S3C2410_CLKCON);
-	struct clk *clkp;
 	struct clk *xtal;
 	int ret;
 	int ptr;
@@ -207,8 +241,9 @@ int __init s3c2410_baseclk_add(void)
 
 	/* register clocks from clock array */
 
-	clkp = init_clocks;
-	for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++, clkp++) {
+	for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++) {
+		struct clk *clkp = init_clocks[ptr];
+
 		/* ensure that we note the clock state */
 
 		clkp->usage = clkcon & clkp->ctrlbit ? 1 : 0;
diff --git a/arch/arm/mach-s3c24xx/clock-s3c2440.c b/arch/arm/mach-s3c24xx/clock-s3c2440.c
index 1069b5680826..aaf006d1d6dc 100644
--- a/arch/arm/mach-s3c24xx/clock-s3c2440.c
+++ b/arch/arm/mach-s3c24xx/clock-s3c2440.c
@@ -166,6 +166,9 @@ static struct clk_lookup s3c2440_clk_lookup[] = {
 	CLKDEV_INIT(NULL, "clk_uart_baud1", &s3c24xx_uclk),
 	CLKDEV_INIT(NULL, "clk_uart_baud2", &clk_p),
 	CLKDEV_INIT(NULL, "clk_uart_baud3", &s3c2440_clk_fclk_n),
+	CLKDEV_INIT("s3c2440-uart.0", "uart", &s3c24xx_clk_uart0),
+	CLKDEV_INIT("s3c2440-uart.1", "uart", &s3c24xx_clk_uart1),
+	CLKDEV_INIT("s3c2440-uart.2", "uart", &s3c24xx_clk_uart2),
 	CLKDEV_INIT("s3c2440-camif", "camera", &s3c2440_clk_cam_upll),
 };
 
diff --git a/arch/arm/mach-shmobile/setup-emev2.c b/arch/arm/mach-shmobile/setup-emev2.c
index 899a86c31ec9..1ccddd228112 100644
--- a/arch/arm/mach-shmobile/setup-emev2.c
+++ b/arch/arm/mach-shmobile/setup-emev2.c
@@ -287,14 +287,14 @@ static struct gpio_em_config gio3_config = {
 static struct resource gio3_resources[] = {
 	[0] = {
 		.name	= "GIO_096",
-		.start	= 0xe0050100,
-		.end	= 0xe005012b,
+		.start	= 0xe0050180,
+		.end	= 0xe00501ab,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
 		.name	= "GIO_096",
-		.start	= 0xe0050140,
-		.end	= 0xe005015f,
+		.start	= 0xe00501c0,
+		.end	= 0xe00501df,
 		.flags	= IORESOURCE_MEM,
 	},
 	[2] = {
diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c
index c5a75a7a508f..7f45c2edbca9 100644
--- a/arch/arm/mach-shmobile/setup-r8a73a4.c
+++ b/arch/arm/mach-shmobile/setup-r8a73a4.c
@@ -62,7 +62,7 @@ enum { SCIFA0, SCIFA1, SCIFB0, SCIFB1, SCIFB2, SCIFB3 };
 static const struct plat_sci_port scif[] = {
 	SCIFA_DATA(SCIFA0, 0xe6c40000, gic_spi(144)), /* SCIFA0 */
 	SCIFA_DATA(SCIFA1, 0xe6c50000, gic_spi(145)), /* SCIFA1 */
-	SCIFB_DATA(SCIFB0, 0xe6c50000, gic_spi(145)), /* SCIFB0 */
+	SCIFB_DATA(SCIFB0, 0xe6c20000, gic_spi(148)), /* SCIFB0 */
 	SCIFB_DATA(SCIFB1, 0xe6c30000, gic_spi(149)), /* SCIFB1 */
 	SCIFB_DATA(SCIFB2, 0xe6ce0000, gic_spi(150)), /* SCIFB2 */
 	SCIFB_DATA(SCIFB3, 0xe6cf0000, gic_spi(151)), /* SCIFB3 */
diff --git a/arch/arm/mach-versatile/include/mach/platform.h b/arch/arm/mach-versatile/include/mach/platform.h
index ec087407b163..6f938ccb0c54 100644
--- a/arch/arm/mach-versatile/include/mach/platform.h
+++ b/arch/arm/mach-versatile/include/mach/platform.h
@@ -231,12 +231,14 @@
 /* PCI space */
 #define VERSATILE_PCI_BASE             0x41000000	/* PCI Interface */
 #define VERSATILE_PCI_CFG_BASE	       0x42000000
+#define VERSATILE_PCI_IO_BASE          0x43000000
 #define VERSATILE_PCI_MEM_BASE0        0x44000000
 #define VERSATILE_PCI_MEM_BASE1        0x50000000
 #define VERSATILE_PCI_MEM_BASE2        0x60000000
 /* Sizes of above maps */
 #define VERSATILE_PCI_BASE_SIZE	       0x01000000
 #define VERSATILE_PCI_CFG_BASE_SIZE    0x02000000
+#define VERSATILE_PCI_IO_BASE_SIZE     0x01000000
 #define VERSATILE_PCI_MEM_BASE0_SIZE   0x0c000000	/* 32Mb */
 #define VERSATILE_PCI_MEM_BASE1_SIZE   0x10000000	/* 256Mb */
 #define VERSATILE_PCI_MEM_BASE2_SIZE   0x10000000	/* 256Mb */
diff --git a/arch/arm/mach-versatile/pci.c b/arch/arm/mach-versatile/pci.c
index e92e5e0705bc..c97be4ea76d2 100644
--- a/arch/arm/mach-versatile/pci.c
+++ b/arch/arm/mach-versatile/pci.c
@@ -43,9 +43,9 @@
 #define PCI_IMAP0		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0)
 #define PCI_IMAP1		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4)
 #define PCI_IMAP2		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8)
-#define PCI_SMAP0		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10)
-#define PCI_SMAP1		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14)
-#define PCI_SMAP2		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18)
+#define PCI_SMAP0		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14)
+#define PCI_SMAP1		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18)
+#define PCI_SMAP2		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x1c)
 #define PCI_SELFID		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc)
 
 #define DEVICE_ID_OFFSET		0x00
@@ -170,8 +170,8 @@ static struct pci_ops pci_versatile_ops = {
 	.write	= versatile_write_config,
 };
 
-static struct resource io_mem = {
-	.name	= "PCI I/O space",
+static struct resource unused_mem = {
+	.name	= "PCI unused",
 	.start	= VERSATILE_PCI_MEM_BASE0,
 	.end	= VERSATILE_PCI_MEM_BASE0+VERSATILE_PCI_MEM_BASE0_SIZE-1,
 	.flags	= IORESOURCE_MEM,
@@ -195,9 +195,9 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys)
 {
 	int ret = 0;
 
-	ret = request_resource(&iomem_resource, &io_mem);
+	ret = request_resource(&iomem_resource, &unused_mem);
 	if (ret) {
-		printk(KERN_ERR "PCI: unable to allocate I/O "
+		printk(KERN_ERR "PCI: unable to allocate unused "
 		       "memory region (%d)\n", ret);
 		goto out;
 	}
@@ -205,7 +205,7 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys)
 	if (ret) {
 		printk(KERN_ERR "PCI: unable to allocate non-prefetchable "
 		       "memory region (%d)\n", ret);
-		goto release_io_mem;
+		goto release_unused_mem;
 	}
 	ret = request_resource(&iomem_resource, &pre_mem);
 	if (ret) {
@@ -225,8 +225,8 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys)
 
  release_non_mem:
 	release_resource(&non_mem);
- release_io_mem:
-	release_resource(&io_mem);
+ release_unused_mem:
+	release_resource(&unused_mem);
  out:
 	return ret;
 }
@@ -246,7 +246,7 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys)
 		goto out;
 	}
 
-	ret = pci_ioremap_io(0, VERSATILE_PCI_MEM_BASE0);
+	ret = pci_ioremap_io(0, VERSATILE_PCI_IO_BASE);
 	if (ret)
 		goto out;
 
@@ -295,6 +295,19 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys)
 	__raw_writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_2);
 
 	/*
+	 * For many years the kernel and QEMU were symbiotically buggy
+	 * in that they both assumed the same broken IRQ mapping.
+	 * QEMU therefore attempts to auto-detect old broken kernels
+	 * so that they still work on newer QEMU as they did on old
+	 * QEMU. Since we now use the correct (ie matching-hardware)
+	 * IRQ mapping we write a definitely different value to a
+	 * PCI_INTERRUPT_LINE register to tell QEMU that we expect
+	 * real hardware behaviour and it need not be backwards
+	 * compatible for us. This write is harmless on real hardware.
+	 */
+	__raw_writel(0, VERSATILE_PCI_VIRT_BASE+PCI_INTERRUPT_LINE);
+
+	/*
 	 * Do not to map Versatile FPGA PCI device into memory space
 	 */
 	pci_slot_ignore |= (1 << myslot);
@@ -327,13 +340,13 @@ static int __init versatile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq;
 
-	/* slot,  pin,	irq
-	 *  24     1     IRQ_SIC_PCI0
-	 *  25     1     IRQ_SIC_PCI1
-	 *  26     1     IRQ_SIC_PCI2
-	 *  27     1     IRQ_SIC_PCI3
+	/*
+	 * Slot	INTA	INTB	INTC	INTD
+	 * 31	PCI1	PCI2	PCI3	PCI0
+	 * 30	PCI0	PCI1	PCI2	PCI3
+	 * 29	PCI3	PCI0	PCI1	PCI2
 	 */
-	irq = IRQ_SIC_PCI0 + ((slot - 24 + pin - 1) & 3);
+	irq = IRQ_SIC_PCI0 + ((slot + 2 + pin - 1) & 3);
 
 	return irq;
 }
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index dc1ace55d557..84bd6fde7a7c 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -51,7 +51,7 @@ static int __init vexpress_dt_find_scu(unsigned long node,
 {
 	if (of_flat_dt_match(node, vexpress_dt_cortex_a9_match)) {
 		phys_addr_t phys_addr;
-		__be32 *reg = of_get_flat_dt_prop(node, "reg", NULL);
+		const __be32 *reg = of_get_flat_dt_prop(node, "reg", NULL);
 
 		if (WARN_ON(!reg))
 			return -EINVAL;
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 35955b54944c..08c9fe917d1f 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -411,24 +411,28 @@ config CPU_32v3
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v4
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v4T
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v5
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v6
 	bool
@@ -756,6 +760,7 @@ config CPU_BPREDICT_DISABLE
 
 config TLS_REG_EMUL
 	bool
+	select NEED_KUSER_HELPERS
 	help
 	  An SMP system using a pre-ARMv6 processor (there are apparently
 	  a few prototypes like that in existence) and therefore access to
@@ -763,11 +768,43 @@ config TLS_REG_EMUL
 
 config NEEDS_SYSCALL_FOR_CMPXCHG
 	bool
+	select NEED_KUSER_HELPERS
 	help
 	  SMP on a pre-ARMv6 processor?  Well OK then.
 	  Forget about fast user space cmpxchg support.
 	  It is just not possible.
 
+config NEED_KUSER_HELPERS
+	bool
+
+config KUSER_HELPERS
+	bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS
+	default y
+	help
+	  Warning: disabling this option may break user programs.
+
+	  Provide kuser helpers in the vector page.  The kernel provides
+	  helper code to userspace in read only form at a fixed location
+	  in the high vector page to allow userspace to be independent of
+	  the CPU type fitted to the system.  This permits binaries to be
+	  run on ARMv4 through to ARMv7 without modification.
+
+	  See Documentation/arm/kernel_user_helpers.txt for details.
+
+	  However, the fixed address nature of these helpers can be used
+	  by ROP (return orientated programming) authors when creating
+	  exploits.
+
+	  If all of the binaries and libraries which run on your platform
+	  are built specifically for your platform, and make no use of
+	  these helpers, then you can turn this option off to hinder
+	  such exploits. However, in that case, if a binary or library
+	  relying on those helpers is run, it will receive a SIGILL signal,
+	  which will terminate the program.
+
+	  Say N here only if you are absolutely certain that you do not
+	  need these helpers; otherwise, the safe option is to say Y.
+
 config DMA_CACHE_RWFO
 	bool "Enable read/write for ownership DMA cache maintenance"
 	depends on CPU_V6K && SMP
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 2ac37372ef52..eeab06ebd06e 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -39,19 +39,43 @@
  * non 64-bit operations.
  */
 #define ASID_FIRST_VERSION	(1ULL << ASID_BITS)
-#define NUM_USER_ASIDS		(ASID_FIRST_VERSION - 1)
-
-#define ASID_TO_IDX(asid)	((asid & ~ASID_MASK) - 1)
-#define IDX_TO_ASID(idx)	((idx + 1) & ~ASID_MASK)
+#define NUM_USER_ASIDS		ASID_FIRST_VERSION
 
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
 static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
 
-DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 static cpumask_t tlb_flush_pending;
 
+#ifdef CONFIG_ARM_ERRATA_798181
+void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+			     cpumask_t *mask)
+{
+	int cpu;
+	unsigned long flags;
+	u64 context_id, asid;
+
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+	context_id = mm->context.id.counter;
+	for_each_online_cpu(cpu) {
+		if (cpu == this_cpu)
+			continue;
+		/*
+		 * We only need to send an IPI if the other CPUs are
+		 * running the same ASID as the one being invalidated.
+		 */
+		asid = per_cpu(active_asids, cpu).counter;
+		if (asid == 0)
+			asid = per_cpu(reserved_asids, cpu);
+		if (context_id == asid)
+			cpumask_set_cpu(cpu, mask);
+	}
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+}
+#endif
+
 #ifdef CONFIG_ARM_LPAE
 static void cpu_set_reserved_ttbr0(void)
 {
@@ -128,7 +152,16 @@ static void flush_context(unsigned int cpu)
 			asid = 0;
 		} else {
 			asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
-			__set_bit(ASID_TO_IDX(asid), asid_map);
+			/*
+			 * If this CPU has already been through a
+			 * rollover, but hasn't run another task in
+			 * the meantime, we must preserve its reserved
+			 * ASID, as this is the only trace we have of
+			 * the process it is still running.
+			 */
+			if (asid == 0)
+				asid = per_cpu(reserved_asids, i);
+			__set_bit(asid & ~ASID_MASK, asid_map);
 		}
 		per_cpu(reserved_asids, i) = asid;
 	}
@@ -167,17 +200,19 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 		/*
 		 * Allocate a free ASID. If we can't find one, take a
 		 * note of the currently active ASIDs and mark the TLBs
-		 * as requiring flushes.
+		 * as requiring flushes. We always count from ASID #1,
+		 * as we reserve ASID #0 to switch via TTBR0 and indicate
+		 * rollover events.
 		 */
-		asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 		if (asid == NUM_USER_ASIDS) {
 			generation = atomic64_add_return(ASID_FIRST_VERSION,
 							 &asid_generation);
 			flush_context(cpu);
-			asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+			asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 		}
 		__set_bit(asid, asid_map);
-		asid = generation | IDX_TO_ASID(asid);
+		asid |= generation;
 		cpumask_clear(mm_cpumask(mm));
 	}
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ef3e0f3aac96..1fb40dc37ec2 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
 	if (!pages)
 		goto no_pages;
 
-	if (IS_ENABLED(CONFIG_CMA))
+	if (IS_ENABLED(CONFIG_DMA_CMA))
 		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
 					      atomic_pool_init);
 	else
@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
 	else if (!(gfp & __GFP_WAIT))
 		addr = __alloc_from_pool(size, &page);
-	else if (!IS_ENABLED(CONFIG_CMA))
+	else if (!IS_ENABLED(CONFIG_DMA_CMA))
 		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
 	else
 		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		__dma_free_buffer(page, size);
 	} else if (__free_from_pool(cpu_addr, size)) {
 		return;
-	} else if (!IS_ENABLED(CONFIG_CMA)) {
+	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
 		__dma_free_remap(cpu_addr, size);
 		__dma_free_buffer(page, size);
 	} else {
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 83cb3ac27095..c0a1e48f6733 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -10,6 +10,7 @@
 #include <asm/system_info.h>
 
 pgd_t *idmap_pgd;
+phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
 
 #ifdef CONFIG_ARM_LPAE
 static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
@@ -67,8 +68,8 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start,
 	unsigned long addr, end;
 	unsigned long next;
 
-	addr = virt_to_phys(text_start);
-	end = virt_to_phys(text_end);
+	addr = virt_to_idmap(text_start);
+	end = virt_to_idmap(text_end);
 
 	prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9a5cdc01fcdf..c12ae661d4ab 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -76,7 +76,7 @@ static int __init parse_tag_initrd2(const struct tag *tag)
 __tagtable(ATAG_INITRD2, parse_tag_initrd2);
 
 #ifdef CONFIG_OF_FLATTREE
-void __init early_init_dt_setup_initrd_arch(unsigned long start, unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	phys_initrd_start = start;
 	phys_initrd_size = end - start;
@@ -600,7 +600,7 @@ void __init mem_init(void)
 
 #ifdef CONFIG_SA1111
 	/* now that our DMA memory is actually so designated, we can free it */
-	free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, 0, NULL);
+	free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, 0, NULL);
 #endif
 
 	free_highpages();
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4d409e6a552d..daf336fe0be8 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1175,7 +1175,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	/*
 	 * Allocate the vector page early.
 	 */
-	vectors = early_alloc(PAGE_SIZE);
+	vectors = early_alloc(PAGE_SIZE * 2);
 
 	early_trap_init(vectors);
 
@@ -1220,15 +1220,27 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	map.pfn = __phys_to_pfn(virt_to_phys(vectors));
 	map.virtual = 0xffff0000;
 	map.length = PAGE_SIZE;
+#ifdef CONFIG_KUSER_HELPERS
 	map.type = MT_HIGH_VECTORS;
+#else
+	map.type = MT_LOW_VECTORS;
+#endif
 	create_mapping(&map);
 
 	if (!vectors_high()) {
 		map.virtual = 0;
+		map.length = PAGE_SIZE * 2;
 		map.type = MT_LOW_VECTORS;
 		create_mapping(&map);
 	}
 
+	/* Now create a kernel read-only mapping */
+	map.pfn += 1;
+	map.virtual = 0xffff0000 + PAGE_SIZE;
+	map.length = PAGE_SIZE;
+	map.type = MT_LOW_VECTORS;
+	create_mapping(&map);
+
 	/*
 	 * Ask the machine support to map in the statically mapped devices.
 	 */
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 9704097c450e..b3997c70af32 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -110,7 +110,7 @@ ENTRY(cpu_v7_set_pte_ext)
  ARM(	str	r3, [r0, #2048]! )
  THUMB(	add	r0, r0, #2048 )
  THUMB(	str	r3, [r0] )
-	ALT_SMP(mov	pc,lr)
+	ALT_SMP(W(nop))
 	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
 #endif
 	mov	pc, lr
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 363027e811d6..6ba4bd9118f2 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -73,7 +73,7 @@ ENTRY(cpu_v7_set_pte_ext)
 	tst	r3, #1 << (55 - 32)		@ L_PTE_DIRTY
 	orreq	r2, #L_PTE_RDONLY
 1:	strd	r2, r3, [r0]
-	ALT_SMP(mov	pc, lr)
+	ALT_SMP(W(nop))
 	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
 #endif
 	mov	pc, lr
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index e35fec34453e..5fbccee5f644 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -75,13 +75,14 @@ ENTRY(cpu_v7_do_idle)
 ENDPROC(cpu_v7_do_idle)
 
 ENTRY(cpu_v7_dcache_clean_area)
-	ALT_SMP(mov	pc, lr)			@ MP extensions imply L1 PTW
-	ALT_UP(W(nop))
-	dcache_line_size r2, r3
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	ALT_SMP(W(nop))			@ MP extensions imply L1 PTW
+	ALT_UP_B(1f)
+	mov	pc, lr
+1:	dcache_line_size r2, r3
+2:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	add	r0, r0, r2
 	subs	r1, r1, r2
-	bhi	1b
+	bhi	2b
 	dsb
 	mov	pc, lr
 ENDPROC(cpu_v7_dcache_clean_area)
diff --git a/arch/arm/plat-samsung/include/plat/clock.h b/arch/arm/plat-samsung/include/plat/clock.h
index a62753dc15ba..df45d6edc98d 100644
--- a/arch/arm/plat-samsung/include/plat/clock.h
+++ b/arch/arm/plat-samsung/include/plat/clock.h
@@ -83,6 +83,11 @@ extern struct clk clk_ext;
 extern struct clksrc_clk clk_epllref;
 extern struct clksrc_clk clk_esysclk;
 
+/* S3C24XX UART clocks */
+extern struct clk s3c24xx_clk_uart0;
+extern struct clk s3c24xx_clk_uart1;
+extern struct clk s3c24xx_clk_uart2;
+
 /* S3C64XX specific clocks */
 extern struct clk clk_h2;
 extern struct clk clk_27m;
diff --git a/arch/arm/plat-samsung/s5p-dev-mfc.c b/arch/arm/plat-samsung/s5p-dev-mfc.c
index a93fb6fb6606..586ca73d1059 100644
--- a/arch/arm/plat-samsung/s5p-dev-mfc.c
+++ b/arch/arm/plat-samsung/s5p-dev-mfc.c
@@ -116,8 +116,8 @@ device_initcall(s5p_mfc_memory_init);
 int __init s5p_fdt_find_mfc_mem(unsigned long node, const char *uname,
 				int depth, void *data)
 {
-	__be32 *prop;
-	unsigned long len;
+	const __be32 *prop;
+	int len;
 	struct s5p_mfc_dt_meminfo *mfc_mem = data;
 
 	if (!data)
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 13609e01f4b7..81edd31bb4ac 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -170,6 +170,7 @@ static void __init xen_percpu_init(void *unused)
 	per_cpu(xen_vcpu, cpu) = vcpup;
 
 	enable_percpu_irq(xen_events_irq, 0);
+	put_cpu();
 }
 
 static void xen_restart(char str, const char *cmd)
@@ -272,12 +273,15 @@ core_initcall(xen_guest_init);
 
 static int __init xen_pm_init(void)
 {
+	if (!xen_domain())
+		return -ENODEV;
+
 	pm_power_off = xen_power_off;
 	arm_pm_restart = xen_restart;
 
 	return 0;
 }
-subsys_initcall(xen_pm_init);
+late_initcall(xen_pm_init);
 
 static irqreturn_t xen_arm_callback(int irq, void *arg)
 {