169 files changed, 5781 insertions, 630 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 136f263ed47..1116be551be 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -19,7 +19,6 @@ config ARM
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HARDIRQS_SW_RESEND
-	select HAVE_AOUT
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER
@@ -213,7 +212,8 @@ config VECTORS_BASE
 	default DRAM_BASE if REMAP_VECTORS_TO_RAM
 	default 0x00000000
 	help
-	  The base address of exception vectors.
+	  The base address of exception vectors.  This must be two pages
+	  in size.
 
 config ARM_PATCH_PHYS_VIRT
 	bool "Patch physical to virtual translations at runtime" if EMBEDDED
@@ -1494,6 +1494,109 @@ config SCHED_SMT
 	  MultiThreading at a cost of slightly increased overhead in some
 	  places. If unsure say N here.
 
+config DISABLE_CPU_SCHED_DOMAIN_BALANCE
+	bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
+	help
+	  Disables scheduler load-balancing at CPU sched domain level.
+
+config SCHED_HMP
+	bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
+	depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
+	help
+	  Experimental scheduler optimizations for heterogeneous platforms.
+	  Attempts to introspectively select task affinity to optimize power
+	  and performance. Basic support for multiple (>2) cpu types is in place,
+	  but it has only been tested with two types of cpus.
+	  There is currently no support for migration of task groups, hence
+	  !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
+	  between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
+	  When turned on, this option adds sys/kernel/hmp directory which
+	  contains the following files:
+	  up_threshold - the load average threshold used for up migration
+	                 (0 - 1023)
+	  down_threshold - the load average threshold used for down migration
+	                 (0 - 1023)
+	  hmp_domains - a list of cpumasks for the present HMP domains,
+	                starting with the 'biggest' and ending with the
+	                'smallest'.
+	  Note that both the threshold files can be written at runtime to
+	  control scheduler behaviour.
+
+config SCHED_HMP_PRIO_FILTER
+	bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
+	depends on SCHED_HMP
+	help
+	  Enables task priority based HMP migration filter. Any task with
+	  a NICE value above the threshold will always be on low-power cpus
+	  with less compute capacity.
+
+config SCHED_HMP_PRIO_FILTER_VAL
+	int "NICE priority threshold"
+	default 5
+	depends on SCHED_HMP_PRIO_FILTER
+
+config HMP_FAST_CPU_MASK
+	string "HMP scheduler fast CPU mask"
+	depends on SCHED_HMP
+	help
+          Leave empty to use device tree information.
+	  Specify the cpuids of the fast CPUs in the system as a list string,
+	  e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_SLOW_CPU_MASK
+	string "HMP scheduler slow CPU mask"
+	depends on SCHED_HMP
+	help
+	  Leave empty to use device tree information.
+	  Specify the cpuids of the slow CPUs in the system as a list string,
+	  e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_VARIABLE_SCALE
+	bool "Allows changing the load tracking scale through sysfs"
+	depends on SCHED_HMP
+	help
+	  When turned on, this option exports the load average period value
+	  for the load tracking patches through sysfs.
+	  The values can be modified to change the rate of load accumulation
+	  used for HMP migration. 'load_avg_period_ms' is the time in ms to
+	  reach a load average of 0.5 for an idle task of 0 load average
+	  ratio which becomes 100% busy.
+	  For example, with load_avg_period_ms = 128 and up_threshold = 512,
+	  a running task with a load of 0 will be migrated to a bigger CPU after
+	  128ms, because after 128ms its load_avg_ratio is 0.5 and the real
+	  up_threshold is 0.5.
+	  This patch has the same behavior as changing the Y of the load
+	  average computation to
+	        (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
+	  but removes intermediate overflows in computation.
+
+config HMP_FREQUENCY_INVARIANT_SCALE
+	bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
+	depends on SCHED_HMP && CPU_FREQ
+	help
+	  Scales the current load contribution in line with the frequency
+	  of the CPU that the task was executed on.
+	  In this version, we use a simple linear scale derived from the
+	  maximum frequency reported by CPUFreq.
+	  Restricting tracked load to be scaled by the CPU's frequency
+	  represents the consumption of possible compute capacity
+	  (rather than consumption of actual instantaneous capacity as
+	  normal) and allows the HMP migration's simple threshold
+	  migration strategy to interact more predictably with CPUFreq's
+	  asynchronous compute capacity changes.
+
+config SCHED_HMP_LITTLE_PACKING
+	bool "Small task packing for HMP"
+	depends on SCHED_HMP
+	default n
+	help
+	  Allows the HMP Scheduler to pack small tasks into CPUs in the
+	  smallest HMP domain.
+	  Controlled by two sysfs files in sys/kernel/hmp.
+	  packing_enable: 1 to enable, 0 to disable packing. Default 1.
+	  packing_limit: runqueue load ratio where a RQ is considered
+	    to be full. Default is NICE_0_LOAD * 9/8.
+
 config HAVE_ARM_SCU
 	bool
 	help
@@ -1521,6 +1624,31 @@ config MCPM
 	  for (multi-)cluster based systems, such as big.LITTLE based
 	  systems.
 
+config BIG_LITTLE
+	bool "big.LITTLE support (Experimental)"
+	depends on CPU_V7 && SMP
+	select MCPM
+	help
+	  This option enables support for the big.LITTLE architecture.
+
+config BL_SWITCHER
+	bool "big.LITTLE switcher support"
+	depends on BIG_LITTLE && MCPM && HOTPLUG_CPU
+	select CPU_PM
+	select ARM_CPU_SUSPEND
+	help
+	  The big.LITTLE "switcher" provides the core functionality to
+	  transparently handle transition between a cluster of A15's
+	  and a cluster of A7's in a big.LITTLE system.
+
+config BL_SWITCHER_DUMMY_IF
+	tristate "Simple big.LITTLE switcher user interface"
+	depends on BL_SWITCHER && DEBUG_KERNEL
+	help
+	  This is a simple and dummy char dev interface to control
+	  the big.LITTLE switcher core code.  It is meant for
+	  debugging purposes only.
+
 choice
 	prompt "Memory split"
 	default VMSPLIT_3G
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
index aabc02a6848..d1153c8a765 100644
--- a/arch/arm/boot/compressed/atags_to_fdt.c
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -53,6 +53,17 @@ static const void *getprop(const void *fdt, const char *node_path,
 	return fdt_getprop(fdt, offset, property, len);
 }
 
+static uint32_t get_cell_size(const void *fdt)
+{
+	int len;
+	uint32_t cell_size = 1;
+	const uint32_t *size_len =  getprop(fdt, "/", "#size-cells", &len);
+
+	if (size_len)
+		cell_size = fdt32_to_cpu(*size_len);
+	return cell_size;
+}
+
 static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline)
 {
 	char cmdline[COMMAND_LINE_SIZE];
@@ -95,9 +106,11 @@ static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline)
 int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 {
 	struct tag *atag = atag_list;
-	uint32_t mem_reg_property[2 * NR_BANKS];
+	/* In the case of 64 bits memory size, need to reserve 2 cells for
+	 * address and size for each bank */
+	uint32_t mem_reg_property[2 * 2 * NR_BANKS];
 	int memcount = 0;
-	int ret;
+	int ret, memsize;
 
 	/* make sure we've got an aligned pointer */
 	if ((u32)atag_list & 0x3)
@@ -137,8 +150,25 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 				continue;
 			if (!atag->u.mem.size)
 				continue;
-			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start);
-			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size);
+			memsize = get_cell_size(fdt);
+
+			if (memsize == 2) {
+				/* if memsize is 2, that means that
+				 * each data needs 2 cells of 32 bits,
+				 * so the data are 64 bits */
+				uint64_t *mem_reg_prop64 =
+					(uint64_t *)mem_reg_property;
+				mem_reg_prop64[memcount++] =
+					cpu_to_fdt64(atag->u.mem.start);
+				mem_reg_prop64[memcount++] =
+					cpu_to_fdt64(atag->u.mem.size);
+			} else {
+				mem_reg_property[memcount++] =
+					cpu_to_fdt32(atag->u.mem.start);
+				mem_reg_property[memcount++] =
+					cpu_to_fdt32(atag->u.mem.size);
+			}
+
 		} else if (atag->hdr.tag == ATAG_INITRD2) {
 			uint32_t initrd_start, initrd_size;
 			initrd_start = atag->u.initrd.start;
@@ -150,8 +180,10 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 		}
 	}
 
-	if (memcount)
-		setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount);
+	if (memcount) {
+		setprop(fdt, "/memory", "reg", mem_reg_property,
+			4 * memcount * memsize);
+	}
 
 	return fdt_pack(fdt);
 }
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index f0895c581a8..00baf9f5766 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -202,7 +202,14 @@ dtb-$(CONFIG_ARCH_VERSATILE) += versatile-ab.dtb \
 dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2p-ca5s.dtb \
 	vexpress-v2p-ca9.dtb \
 	vexpress-v2p-ca15-tc1.dtb \
-	vexpress-v2p-ca15_a7.dtb
+	vexpress-v2p-ca15_a7.dtb \
+	rtsm_ve-cortex_a9x2.dtb \
+	rtsm_ve-cortex_a9x4.dtb \
+	rtsm_ve-cortex_a15x1.dtb \
+	rtsm_ve-cortex_a15x2.dtb \
+	rtsm_ve-cortex_a15x4.dtb \
+	rtsm_ve-v2p-ca15x1-ca7x1.dtb \
+	rtsm_ve-v2p-ca15x4-ca7x4.dtb
 dtb-$(CONFIG_ARCH_VIRT) += xenvm-4.2.dtb
 dtb-$(CONFIG_ARCH_VT8500) += vt8500-bv07.dtb \
 	wm8505-ref.dtb \
diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi
index 550eb772c30..4d12d2347c1 100644
--- a/arch/arm/boot/dts/armada-370-xp.dtsi
+++ b/arch/arm/boot/dts/armada-370-xp.dtsi
@@ -51,7 +51,7 @@
 
 			coherency-fabric@20200 {
 				compatible = "marvell,coherency-fabric";
-				reg = <0x20200 0xb0>, <0x21810 0x1c>;
+				reg = <0x20200 0xb0>, <0x21010 0x1c>;
 			};
 
 			serial@12000 {
diff --git a/arch/arm/boot/dts/armada-xp-mv78230.dtsi b/arch/arm/boot/dts/armada-xp-mv78230.dtsi
index f8eaa383e07..f94cdbc579c 100644
--- a/arch/arm/boot/dts/armada-xp-mv78230.dtsi
+++ b/arch/arm/boot/dts/armada-xp-mv78230.dtsi
@@ -81,7 +81,7 @@
 			/*
 			 * MV78230 has 2 PCIe units Gen2.0: One unit can be
 			 * configured as x4 or quad x1 lanes. One unit is
-			 * x4/x1.
+			 * x1 only.
 			 */
 			pcie-controller {
 				compatible = "marvell,armada-xp-pcie";
@@ -94,10 +94,10 @@
 				bus-range = <0x00 0xff>;
 
 				ranges = <0x82000000 0 0x40000 0x40000 0 0x00002000   /* Port 0.0 registers */
-					0x82000000 0 0x42000 0x42000 0 0x00002000   /* Port 2.0 registers */
 					0x82000000 0 0x44000 0x44000 0 0x00002000   /* Port 0.1 registers */
 					0x82000000 0 0x48000 0x48000 0 0x00002000   /* Port 0.2 registers */
 					0x82000000 0 0x4c000 0x4c000 0 0x00002000   /* Port 0.3 registers */
+					0x82000000 0 0x80000 0x80000 0 0x00002000   /* Port 1.0 registers */
 					0x82000000 0 0xe0000000 0xe0000000 0 0x08000000   /* non-prefetchable memory */
 					0x81000000 0 0	  0xe8000000 0 0x00100000>; /* downstream I/O */
 
@@ -165,19 +165,19 @@
 					status = "disabled";
 				};
 
-				pcie@9,0 {
+				pcie@5,0 {
 					device_type = "pci";
-					assigned-addresses = <0x82000800 0 0x42000 0 0x2000>;
-					reg = <0x4800 0 0 0 0>;
+					assigned-addresses = <0x82000800 0 0x80000 0 0x2000>;
+					reg = <0x2800 0 0 0 0>;
 					#address-cells = <3>;
 					#size-cells = <2>;
 					#interrupt-cells = <1>;
 					ranges;
 					interrupt-map-mask = <0 0 0 0>;
-					interrupt-map = <0 0 0 0 &mpic 99>;
-					marvell,pcie-port = <2>;
+					interrupt-map = <0 0 0 0 &mpic 62>;
+					marvell,pcie-port = <1>;
 					marvell,pcie-lane = <0>;
-					clocks = <&gateclk 26>;
+					clocks = <&gateclk 9>;
 					status = "disabled";
 				};
 			};
diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
index f4029f015af..55cdd58c155 100644
--- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi
+++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
@@ -101,7 +101,7 @@
 			/*
 			 * MV78260 has 3 PCIe units Gen2.0: Two units can be
 			 * configured as x4 or quad x1 lanes. One unit is
-			 * x4/x1.
+			 * x4 only.
 			 */
 			pcie-controller {
 				compatible = "marvell,armada-xp-pcie";
@@ -119,7 +119,9 @@
 					0x82000000 0 0x48000 0x48000 0 0x00002000   /* Port 0.2 registers */
 					0x82000000 0 0x4c000 0x4c000 0 0x00002000   /* Port 0.3 registers */
 					0x82000000 0 0x80000 0x80000 0 0x00002000   /* Port 1.0 registers */
-					0x82000000 0 0x82000 0x82000 0 0x00002000   /* Port 3.0 registers */
+					0x82000000 0 0x84000 0x84000 0 0x00002000   /* Port 1.1 registers */
+					0x82000000 0 0x88000 0x88000 0 0x00002000   /* Port 1.2 registers */
+					0x82000000 0 0x8c000 0x8c000 0 0x00002000   /* Port 1.3 registers */
 					0x82000000 0 0xe0000000 0xe0000000 0 0x08000000   /* non-prefetchable memory */
 					0x81000000 0 0	  0xe8000000 0 0x00100000>; /* downstream I/O */
 
@@ -187,35 +189,83 @@
 					status = "disabled";
 				};
 
-				pcie@9,0 {
+				pcie@5,0 {
 					device_type = "pci";
-					assigned-addresses = <0x82000800 0 0x42000 0 0x2000>;
-					reg = <0x4800 0 0 0 0>;
+					assigned-addresses = <0x82000800 0 0x80000 0 0x2000>;
+					reg = <0x2800 0 0 0 0>;
 					#address-cells = <3>;
 					#size-cells = <2>;
 					#interrupt-cells = <1>;
 					ranges;
 					interrupt-map-mask = <0 0 0 0>;
-					interrupt-map = <0 0 0 0 &mpic 99>;
-					marvell,pcie-port = <2>;
+					interrupt-map = <0 0 0 0 &mpic 62>;
+					marvell,pcie-port = <1>;
 					marvell,pcie-lane = <0>;
-					clocks = <&gateclk 26>;
+					clocks = <&gateclk 9>;
+					status = "disabled";
+				};
+
+				pcie@6,0 {
+					device_type = "pci";
+					assigned-addresses = <0x82000800 0 0x84000 0 0x2000>;
+					reg = <0x3000 0 0 0 0>;
+					#address-cells = <3>;
+					#size-cells = <2>;
+					#interrupt-cells = <1>;
+					ranges;
+					interrupt-map-mask = <0 0 0 0>;
+					interrupt-map = <0 0 0 0 &mpic 63>;
+					marvell,pcie-port = <1>;
+					marvell,pcie-lane = <1>;
+					clocks = <&gateclk 10>;
+					status = "disabled";
+				};
+
+				pcie@7,0 {
+					device_type = "pci";
+					assigned-addresses = <0x82000800 0 0x88000 0 0x2000>;
+					reg = <0x3800 0 0 0 0>;
+					#address-cells = <3>;
+					#size-cells = <2>;
+					#interrupt-cells = <1>;
+					ranges;
+					interrupt-map-mask = <0 0 0 0>;
+					interrupt-map = <0 0 0 0 &mpic 64>;
+					marvell,pcie-port = <1>;
+					marvell,pcie-lane = <2>;
+					clocks = <&gateclk 11>;
 					status = "disabled";
 				};
 
-				pcie@10,0 {
+				pcie@8,0 {
 					device_type = "pci";
-					assigned-addresses = <0x82000800 0 0x82000 0 0x2000>;
-					reg = <0x5000 0 0 0 0>;
+					assigned-addresses = <0x82000800 0 0x8c000 0 0x2000>;
+					reg = <0x4000 0 0 0 0>;
 					#address-cells = <3>;
 					#size-cells = <2>;
 					#interrupt-cells = <1>;
 					ranges;
 					interrupt-map-mask = <0 0 0 0>;
-					interrupt-map = <0 0 0 0 &mpic 103>;
-					marvell,pcie-port = <3>;
+					interrupt-map = <0 0 0 0 &mpic 65>;
+					marvell,pcie-port = <1>;
+					marvell,pcie-lane = <3>;
+					clocks = <&gateclk 12>;
+					status = "disabled";
+				};
+
+				pcie@9,0 {
+					device_type = "pci";
+					assigned-addresses = <0x82000800 0 0x42000 0 0x2000>;
+					reg = <0x4800 0 0 0 0>;
+					#address-cells = <3>;
+					#size-cells = <2>;
+					#interrupt-cells = <1>;
+					ranges;
+					interrupt-map-mask = <0 0 0 0>;
+					interrupt-map = <0 0 0 0 &mpic 99>;
+					marvell,pcie-port = <2>;
 					marvell,pcie-lane = <0>;
-					clocks = <&gateclk 27>;
+					clocks = <&gateclk 26>;
 					status = "disabled";
 				};
 			};
diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi
index 5d3ed5aafc6..0af879a4eaf 100644
--- a/arch/arm/boot/dts/at91rm9200.dtsi
+++ b/arch/arm/boot/dts/at91rm9200.dtsi
@@ -35,8 +35,12 @@
 		ssc2 = &ssc2;
 	};
 	cpus {
-		cpu@0 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
 			compatible = "arm,arm920t";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi
index 84c4bef2d72..0dbdb846f90 100644
--- a/arch/arm/boot/dts/at91sam9260.dtsi
+++ b/arch/arm/boot/dts/at91sam9260.dtsi
@@ -32,8 +32,12 @@
 		ssc0 = &ssc0;
 	};
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
@@ -340,6 +344,14 @@
 					};
 				};
 
+				i2c_gpio0 {
+					pinctrl_i2c_gpio0: i2c_gpio0-0 {
+						atmel,pins =
+							<0 23 0x0 0x3   /* PA23 gpio I2C_SDA pin */
+							 0 24 0x0 0x3>; /* PA24 gpio I2C_SCL pin */
+					};
+				};
+
 				pioA: gpio@fffff400 {
 					compatible = "atmel,at91rm9200-gpio";
 					reg = <0xfffff400 0x200>;
@@ -592,6 +604,8 @@
 		i2c-gpio,delay-us = <2>;	/* ~100 kHz */
 		#address-cells = <1>;
 		#size-cells = <0>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c_gpio0>;
 		status = "disabled";
 	};
 };
diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
index 94b58ab2cc0..fcd38f89904 100644
--- a/arch/arm/boot/dts/at91sam9263.dtsi
+++ b/arch/arm/boot/dts/at91sam9263.dtsi
@@ -29,8 +29,12 @@
 		ssc1 = &ssc1;
 	};
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi
index bf18a735c37..479a0622cdb 100644
--- a/arch/arm/boot/dts/at91sam9g45.dtsi
+++ b/arch/arm/boot/dts/at91sam9g45.dtsi
@@ -35,8 +35,12 @@
 		ssc1 = &ssc1;
 	};
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi
index 8d25f889928..a92ec78349a 100644
--- a/arch/arm/boot/dts/at91sam9n12.dtsi
+++ b/arch/arm/boot/dts/at91sam9n12.dtsi
@@ -31,8 +31,12 @@
 		ssc0 = &ssc0;
 	};
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts
index d30e48bd1e9..28ba7989387 100644
--- a/arch/arm/boot/dts/at91sam9n12ek.dts
+++ b/arch/arm/boot/dts/at91sam9n12ek.dts
@@ -14,11 +14,11 @@
 	compatible = "atmel,at91sam9n12ek", "atmel,at91sam9n12", "atmel,at91sam9";
 
 	chosen {
-		bootargs = "mem=128M console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=jffs2";
+		bootargs = "console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=jffs2";
 	};
 
 	memory {
-		reg = <0x20000000 0x10000000>;
+		reg = <0x20000000 0x8000000>;
 	};
 
 	clocks {
diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index 1145ac330fb..2b2b6923d16 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -33,8 +33,12 @@
 		ssc0 = &ssc0;
 	};
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
@@ -643,7 +647,7 @@
 			};
 
 			rtc@fffffeb0 {
-				compatible = "atmel,at91rm9200-rtc";
+				compatible = "atmel,at91sam9x5-rtc";
 				reg = <0xfffffeb0 0x40>;
 				interrupts = <1 4 7>;
 				status = "disabled";
diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi
index 1e12aeff403..aa537ed13f0 100644
--- a/arch/arm/boot/dts/bcm2835.dtsi
+++ b/arch/arm/boot/dts/bcm2835.dtsi
@@ -85,6 +85,8 @@
 			reg = <0x7e205000 0x1000>;
 			interrupts = <2 21>;
 			clocks = <&clk_i2c>;
+			#address-cells = <1>;
+			#size-cells = <0>;
 			status = "disabled";
 		};
 
@@ -93,6 +95,8 @@
 			reg = <0x7e804000 0x1000>;
 			interrupts = <2 21>;
 			clocks = <&clk_i2c>;
+			#address-cells = <1>;
+			#size-cells = <0>;
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/clcd-panels.dtsi b/arch/arm/boot/dts/clcd-panels.dtsi
new file mode 100644
index 00000000000..0b0ff6ead4b
--- /dev/null
+++ b/arch/arm/boot/dts/clcd-panels.dtsi
@@ -0,0 +1,52 @@
+/*
+ * ARM Ltd. Versatile Express
+ *
+ */
+
+/ {
+	panels {
+		panel@0 {
+			compatible	= "panel";
+			mode		= "VGA";
+			refresh		= <60>;
+			xres		= <640>;
+			yres		= <480>;
+			pixclock	= <39721>;
+			left_margin	= <40>;
+			right_margin	= <24>;
+			upper_margin	= <32>;
+			lower_margin	= <11>;
+			hsync_len	= <96>;
+			vsync_len	= <2>;
+			sync		= <0>;
+			vmode		= "FB_VMODE_NONINTERLACED";
+
+			tim2		= "TIM2_BCD", "TIM2_IPC";
+			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+			bpp		= <16>;
+		};
+
+		panel@1 {
+			compatible	= "panel";
+			mode		= "XVGA";
+			refresh		= <60>;
+			xres		= <1024>;
+			yres		= <768>;
+			pixclock	= <15748>;
+			left_margin	= <152>;
+			right_margin	= <48>;
+			upper_margin	= <23>;
+			lower_margin	= <3>;
+			hsync_len	= <104>;
+			vsync_len	= <4>;
+			sync		= <0>;
+			vmode		= "FB_VMODE_NONINTERLACED";
+
+			tim2		= "TIM2_BCD", "TIM2_IPC";
+			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+			bpp		= <16>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/cros5250-common.dtsi b/arch/arm/boot/dts/cros5250-common.dtsi
index 3f0239ec1bc..49d8da2779a 100644
--- a/arch/arm/boot/dts/cros5250-common.dtsi
+++ b/arch/arm/boot/dts/cros5250-common.dtsi
@@ -27,6 +27,13 @@
 		i2c2_bus: i2c2-bus {
 			samsung,pin-pud = <0>;
 		};
+
+		max77686_irq: max77686-irq {
+			samsung,pins = "gpx3-2";
+			samsung,pin-function = <0>;
+			samsung,pin-pud = <0>;
+			samsung,pin-drv = <0>;
+		};
 	};
 
 	i2c@12C60000 {
@@ -35,6 +42,11 @@
 
 		max77686@09 {
 			compatible = "maxim,max77686";
+			interrupt-parent = <&gpx3>;
+			interrupts = <2 0>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&max77686_irq>;
+			wakeup-source;
 			reg = <0x09>;
 
 			voltage-regulators {
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index fc9fb3d526e..cdbdc4dfef2 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -545,7 +545,7 @@
 			compatible = "arm,pl330", "arm,primecell";
 			reg = <0x10800000 0x1000>;
 			interrupts = <0 33 0>;
-			clocks = <&clock 271>;
+			clocks = <&clock 346>;
 			clock-names = "apb_pclk";
 			#dma-cells = <1>;
 			#dma-channels = <8>;
diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
index 73fd7d0887b..587ceef81e4 100644
--- a/arch/arm/boot/dts/imx23.dtsi
+++ b/arch/arm/boot/dts/imx23.dtsi
@@ -23,8 +23,12 @@
 	};
 
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 600f7cb51f3..4c10a1968c0 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -32,8 +32,12 @@
 	};
 
 	cpus {
-		cpu@0 {
-			compatible = "arm,arm926ejs";
+		#address-cells = <0>;
+		#size-cells = <0>;
+
+		cpu {
+			compatible = "arm,arm926ej-s";
+			device_type = "cpu";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi
index 5bcdf3a90bb..62dc7812679 100644
--- a/arch/arm/boot/dts/imx6dl.dtsi
+++ b/arch/arm/boot/dts/imx6dl.dtsi
@@ -18,12 +18,14 @@
 
 		cpu@0 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <0>;
 			next-level-cache = <&L2>;
 		};
 
 		cpu@1 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <1>;
 			next-level-cache = <&L2>;
 		};
diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi
index 21e675848bd..dc54a72a3bc 100644
--- a/arch/arm/boot/dts/imx6q.dtsi
+++ b/arch/arm/boot/dts/imx6q.dtsi
@@ -18,6 +18,7 @@
 
 		cpu@0 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <0>;
 			next-level-cache = <&L2>;
 			operating-points = <
@@ -39,18 +40,21 @@
 
 		cpu@1 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <1>;
 			next-level-cache = <&L2>;
 		};
 
 		cpu@2 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <2>;
 			next-level-cache = <&L2>;
 		};
 
 		cpu@3 {
 			compatible = "arm,cortex-a9";
+			device_type = "cpu";
 			reg = <3>;
 			next-level-cache = <&L2>;
 		};
diff --git a/arch/arm/boot/dts/integratorcp.dts b/arch/arm/boot/dts/integratorcp.dts
index ff1aea0ee04..72693a69f83 100644
--- a/arch/arm/boot/dts/integratorcp.dts
+++ b/arch/arm/boot/dts/integratorcp.dts
@@ -9,11 +9,6 @@
 	model = "ARM Integrator/CP";
 	compatible = "arm,integrator-cp";
 
-	aliases {
-		arm,timer-primary = &timer2;
-		arm,timer-secondary = &timer1;
-	};
-
 	chosen {
 		bootargs = "root=/dev/ram0 console=ttyAMA0,38400n8 earlyprintk";
 	};
@@ -24,14 +19,18 @@
 	};
 
 	timer0: timer@13000000 {
+		/* TIMER0 runs @ 25MHz */
 		compatible = "arm,integrator-cp-timer";
+		status = "disabled";
 	};
 
 	timer1: timer@13000100 {
+		/* TIMER1 runs @ 1MHz */
 		compatible = "arm,integrator-cp-timer";
 	};
 
 	timer2: timer@13000200 {
+		/* TIMER2 runs @ 1MHz */
 		compatible = "arm,integrator-cp-timer";
 	};
 
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts
new file mode 100644
index 00000000000..c9eee916aa7
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts
@@ -0,0 +1,159 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x1CT
+ *
+ * RTSM_VE_Cortex_A15x1.lisa
+ */
+
+/dts-v1/;
+
+/ {
+	model = "RTSM_VE_CortexA15x1";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a15x1", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x1000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts
new file mode 100644
index 00000000000..853a166e3c3
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts
@@ -0,0 +1,165 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x2CT
+ *
+ * RTSM_VE_Cortex_A15x2.lisa
+ */
+
+/dts-v1/;
+
+/ {
+	model = "RTSM_VE_CortexA15x2";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a15x2", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <1>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x1000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts
new file mode 100644
index 00000000000..c1947a3a5c8
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts
@@ -0,0 +1,177 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x4CT
+ *
+ * RTSM_VE_Cortex_A15x4.lisa
+ */
+
+/dts-v1/;
+
+/ {
+	model = "RTSM_VE_CortexA15x4";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a15x4", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <1>;
+		};
+
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <2>;
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <3>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x1000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts
new file mode 100644
index 00000000000..fca6b2f7967
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts
@@ -0,0 +1,171 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA9MPx2CT
+ *
+ * RTSM_VE_Cortex_A9x2.lisa
+ */
+
+/dts-v1/;
+
+/ {
+	model = "RTSM_VE_CortexA9x2";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a9x2", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <1>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x80000000 0x80000000>;
+	};
+
+	scu@2c000000 {
+		compatible = "arm,cortex-a9-scu";
+		reg = <0x2c000000 0x58>;
+	};
+
+	timer@2c000600 {
+		compatible = "arm,cortex-a9-twd-timer";
+		reg = <0x2c000600 0x20>;
+		interrupts = <1 13 0xf04>;
+	};
+
+	watchdog@2c000620 {
+		compatible = "arm,cortex-a9-twd-wdt";
+		reg = <0x2c000620 0x20>;
+		interrupts = <1 14 0xf04>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0x2c001000 0x1000>,
+		      <0x2c000100 0x100>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0x08000000 0x04000000>,
+			 <1 0 0x14000000 0x04000000>,
+			 <2 0 0x18000000 0x04000000>,
+			 <3 0 0x1c000000 0x04000000>,
+			 <4 0 0x0c000000 0x04000000>,
+			 <5 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts
new file mode 100644
index 00000000000..fd8a6ed97a0
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts
@@ -0,0 +1,183 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA9MPx4CT
+ *
+ * RTSM_VE_Cortex_A9x4.lisa
+ */
+
+/dts-v1/;
+
+/ {
+	model = "RTSM_VE_CortexA9x4";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a9x4", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <1>;
+		};
+
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <2>;
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a9";
+			reg = <3>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x80000000 0x80000000>;
+	};
+
+	scu@2c000000 {
+		compatible = "arm,cortex-a9-scu";
+		reg = <0x2c000000 0x58>;
+	};
+
+	timer@2c000600 {
+		compatible = "arm,cortex-a9-twd-timer";
+		reg = <0x2c000600 0x20>;
+		interrupts = <1 13 0xf04>;
+	};
+
+	watchdog@2c000620 {
+		compatible = "arm,cortex-a9-twd-wdt";
+		reg = <0x2c000620 0x20>;
+		interrupts = <1 14 0xf04>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0x2c001000 0x1000>,
+		      <0x2c000100 0x100>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0x08000000 0x04000000>,
+			 <1 0 0x14000000 0x04000000>,
+			 <2 0 0x18000000 0x04000000>,
+			 <3 0 0x1c000000 0x04000000>,
+			 <4 0 0x0c000000 0x04000000>,
+			 <5 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi
new file mode 100644
index 00000000000..a2d895ee5fa
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi
@@ -0,0 +1,231 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * Motherboard component
+ *
+ * VEMotherBoard.lisa
+ */
+
+	motherboard {
+		compatible = "arm,vexpress,v2m-p1", "simple-bus";
+		arm,hbi = <0x190>;
+		arm,vexpress,site = <0>;
+		arm,v2m-memory-map = "rs1";
+		#address-cells = <2>; /* SMB chipselect number and offset */
+		#size-cells = <1>;
+		#interrupt-cells = <1>;
+		ranges;
+
+		flash@0,00000000 {
+			compatible = "arm,vexpress-flash", "cfi-flash";
+			reg = <0 0x00000000 0x04000000>,
+			      <4 0x00000000 0x04000000>;
+			bank-width = <4>;
+		};
+
+		vram@2,00000000 {
+			compatible = "arm,vexpress-vram";
+			reg = <2 0x00000000 0x00800000>;
+		};
+
+		ethernet@2,02000000 {
+			compatible = "smsc,lan91c111";
+			reg = <2 0x02000000 0x10000>;
+			interrupts = <15>;
+		};
+
+		iofpga@3,00000000 {
+			compatible = "arm,amba-bus", "simple-bus";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0 3 0 0x200000>;
+
+			v2m_sysreg: sysreg@010000 {
+				compatible = "arm,vexpress-sysreg";
+				reg = <0x010000 0x1000>;
+				gpio-controller;
+				#gpio-cells = <2>;
+			};
+
+			v2m_sysctl: sysctl@020000 {
+				compatible = "arm,sp810", "arm,primecell";
+				reg = <0x020000 0x1000>;
+				clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&smbclk>;
+				clock-names = "refclk", "timclk", "apb_pclk";
+				#clock-cells = <1>;
+				clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
+			};
+
+			aaci@040000 {
+				compatible = "arm,pl041", "arm,primecell";
+				reg = <0x040000 0x1000>;
+				interrupts = <11>;
+				clocks = <&smbclk>;
+				clock-names = "apb_pclk";
+			};
+
+			mmci@050000 {
+				compatible = "arm,pl180", "arm,primecell";
+				reg = <0x050000 0x1000>;
+				interrupts = <9 10>;
+				cd-gpios = <&v2m_sysreg 0 0>;
+				wp-gpios = <&v2m_sysreg 1 0>;
+				max-frequency = <12000000>;
+				vmmc-supply = <&v2m_fixed_3v3>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "mclk", "apb_pclk";
+			};
+
+			kmi@060000 {
+				compatible = "arm,pl050", "arm,primecell";
+				reg = <0x060000 0x1000>;
+				interrupts = <12>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "KMIREFCLK", "apb_pclk";
+			};
+
+			kmi@070000 {
+				compatible = "arm,pl050", "arm,primecell";
+				reg = <0x070000 0x1000>;
+				interrupts = <13>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "KMIREFCLK", "apb_pclk";
+			};
+
+			v2m_serial0: uart@090000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x090000 0x1000>;
+				interrupts = <5>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial1: uart@0a0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0a0000 0x1000>;
+				interrupts = <6>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial2: uart@0b0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0b0000 0x1000>;
+				interrupts = <7>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			v2m_serial3: uart@0c0000 {
+				compatible = "arm,pl011", "arm,primecell";
+				reg = <0x0c0000 0x1000>;
+				interrupts = <8>;
+				clocks = <&v2m_clk24mhz>, <&smbclk>;
+				clock-names = "uartclk", "apb_pclk";
+			};
+
+			wdt@0f0000 {
+				compatible = "arm,sp805", "arm,primecell";
+				reg = <0x0f0000 0x1000>;
+				interrupts = <0>;
+				clocks = <&v2m_refclk32khz>, <&smbclk>;
+				clock-names = "wdogclk", "apb_pclk";
+			};
+
+			v2m_timer01: timer@110000 {
+				compatible = "arm,sp804", "arm,primecell";
+				reg = <0x110000 0x1000>;
+				interrupts = <2>;
+				clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&smbclk>;
+				clock-names = "timclken1", "timclken2", "apb_pclk";
+			};
+
+			v2m_timer23: timer@120000 {
+				compatible = "arm,sp804", "arm,primecell";
+				reg = <0x120000 0x1000>;
+				interrupts = <3>;
+				clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&smbclk>;
+				clock-names = "timclken1", "timclken2", "apb_pclk";
+			};
+
+			rtc@170000 {
+				compatible = "arm,pl031", "arm,primecell";
+				reg = <0x170000 0x1000>;
+				interrupts = <4>;
+				clocks = <&smbclk>;
+				clock-names = "apb_pclk";
+			};
+
+			clcd@1f0000 {
+				compatible = "arm,pl111", "arm,primecell";
+				reg = <0x1f0000 0x1000>;
+				interrupts = <14>;
+				clocks = <&v2m_oscclk1>, <&smbclk>;
+				clock-names = "v2m:oscclk1", "apb_pclk";
+				mode = "VGA";
+				use_dma = <0>;
+				framebuffer = <0x18000000 0x00180000>;
+			};
+
+			virtio_block@0130000 {
+				compatible = "virtio,mmio";
+				reg = <0x130000 0x200>;
+				interrupts = <42>;
+			};
+
+		};
+
+		v2m_fixed_3v3: fixedregulator@0 {
+			compatible = "regulator-fixed";
+			regulator-name = "3V3";
+			regulator-min-microvolt = <3300000>;
+			regulator-max-microvolt = <3300000>;
+			regulator-always-on;
+		};
+
+		v2m_clk24mhz: clk24mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <24000000>;
+			clock-output-names = "v2m:clk24mhz";
+		};
+
+		v2m_refclk1mhz: refclk1mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <1000000>;
+			clock-output-names = "v2m:refclk1mhz";
+		};
+
+		v2m_refclk32khz: refclk32khz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <32768>;
+			clock-output-names = "v2m:refclk32khz";
+		};
+
+		mcc {
+			compatible = "simple-bus";
+			arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+			v2m_oscclk1: osc@1 {
+				/* CLCD clock */
+				compatible = "arm,vexpress-osc";
+				arm,vexpress-sysreg,func = <1 1>;
+				freq-range = <23750000 63500000>;
+				#clock-cells = <0>;
+				clock-output-names = "v2m:oscclk1";
+			};
+
+			muxfpga@0 {
+				compatible = "arm,vexpress-muxfpga";
+				arm,vexpress-sysreg,func = <7 0>;
+			};
+
+			shutdown@0 {
+				compatible = "arm,vexpress-shutdown";
+				arm,vexpress-sysreg,func = <8 0>;
+			};
+		};
+	};
diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts
new file mode 100644
index 00000000000..fe8cf5dc857
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts
@@ -0,0 +1,244 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x4CT
+ * ARMCortexA7x4CT
+ * RTSM_VE_Cortex_A15x1_A7x1.lisa
+ */
+
+/dts-v1/;
+
+/memreserve/ 0xff000000 0x01000000;
+
+/ {
+	model = "RTSM_VE_CortexA15x1-A7x1";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a15x1_a7x1", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	clusters {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cluster0: cluster@0 {
+			reg = <0>;
+//			freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core0: core@0 {
+					reg = <0>;
+				};
+
+			};
+		};
+
+		cluster1: cluster@1 {
+			reg = <1>;
+//			freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core1: core@0 {
+					reg = <0>;
+				};
+
+			};
+		};
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+			cluster = <&cluster0>;
+			core = <&core0>;
+//			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x100>;
+			cluster = <&cluster1>;
+			core = <&core1>;
+//			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>;
+	};
+
+	cci@2c090000 {
+		compatible = "arm,cci-400", "arm,cci";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0 0x2c090000 0 0x1000>;
+		ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+		cci_control1: slave-if@4000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x4000 0x1000>;
+		};
+
+		cci_control2: slave-if@5000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x5000 0x1000>;
+		};
+	};
+
+	dcscb@60000000 {
+		compatible = "arm,rtsm,dcscb";
+		reg = <0 0x60000000 0 0x1000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x1000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+
+		gic-cpuif@0 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <0>;
+			cpu = <&cpu0>;
+		};
+		gic-cpuif@1 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <1>;
+			cpu = <&cpu1>;
+		};
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts
new file mode 100644
index 00000000000..f715285131d
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts
@@ -0,0 +1,358 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x4CT
+ * ARMCortexA7x4CT
+ * RTSM_VE_Cortex_A15x4_A7x4.lisa
+ */
+
+/dts-v1/;
+
+/memreserve/ 0xff000000 0x01000000;
+
+/ {
+	model = "RTSM_VE_CortexA15x4-A7x4";
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,rtsm_ve,cortex_a15x4_a7x4", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen { };
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+	};
+
+	clusters {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cluster0: cluster@0 {
+			reg = <0>;
+//			freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core0: core@0 {
+					reg = <0>;
+				};
+
+				core1: core@1 {
+					reg = <1>;
+				};
+
+				core2: core@2 {
+					reg = <2>;
+				};
+
+				core3: core@3 {
+					reg = <3>;
+				};
+
+			};
+		};
+
+		cluster1: cluster@1 {
+			reg = <1>;
+//			freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core4: core@0 {
+					reg = <0>;
+				};
+
+				core5: core@1 {
+					reg = <1>;
+				};
+
+				core6: core@2 {
+					reg = <2>;
+				};
+				
+				core7: core@3 {
+					reg = <3>;
+				};
+				
+			};
+		};
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+			cluster = <&cluster0>;
+			core = <&core0>;
+//			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <1>;
+			cluster = <&cluster0>;
+			core = <&core1>;
+//			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu2: cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <2>;
+			cluster = <&cluster0>;
+			core = <&core2>;
+//			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu3: cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <3>;
+			cluster = <&cluster0>;
+			core = <&core3>;
+//			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu4: cpu@4 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x100>;
+			cluster = <&cluster1>;
+			core = <&core4>;
+//			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+
+		cpu5: cpu@5 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x101>;
+			cluster = <&cluster1>;
+			core = <&core5>;
+//			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+		
+		cpu6: cpu@6 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x102>;
+			cluster = <&cluster1>;
+			core = <&core6>;
+//			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+		
+		cpu7: cpu@7 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x103>;
+			cluster = <&cluster1>;
+			core = <&core7>;
+//			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>;
+	};
+
+	cci@2c090000 {
+		compatible = "arm,cci-400", "arm,cci";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0 0x2c090000 0 0x1000>;
+		ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+		cci_control1: slave-if@4000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x4000 0x1000>;
+		};
+
+		cci_control2: slave-if@5000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x5000 0x1000>;
+		};
+	};
+
+	dcscb@60000000 {
+		compatible = "arm,rtsm,dcscb";
+		reg = <0 0x60000000 0 0x1000>;
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x1000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <1 9 0xf04>;
+
+		gic-cpuif@0 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <0>;
+			cpu = <&cpu0>;
+		};
+		gic-cpuif@1 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <1>;
+			cpu = <&cpu1>;
+		};
+		gic-cpuif@2 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <2>;
+			cpu = <&cpu2>;
+		};
+		gic-cpuif@3 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <3>;
+			cpu = <&cpu3>;
+		};
+		gic-cpuif@4 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <4>;
+			cpu = <&cpu4>;
+		};
+		gic-cpuif@5 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <5>;
+			cpu = <&cpu5>;
+		};
+		gic-cpuif@6 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <6>;
+			cpu = <&cpu6>;
+		};
+		gic-cpuif@7 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <7>;
+			cpu = <&cpu7>;
+		};
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		osc@0 {
+			/* ACLK clock to the AXI master port on the test chip */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 0>;
+			freq-range = <30000000 50000000>;
+			#clock-cells = <0>;
+			clock-output-names = "extsaxiclk";
+		};
+
+		oscclk1: osc@1 {
+			/* Reference clock for the CLCD */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 1>;
+			freq-range = <10000000 80000000>;
+			#clock-cells = <0>;
+			clock-output-names = "clcdclk";
+		};
+
+		smbclk: oscclk2: osc@2 {
+			/* Reference clock for the test chip internal PLLs */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 2>;
+			freq-range = <33000000 100000000>;
+			#clock-cells = <0>;
+			clock-output-names = "tcrefclk";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic 0  0 4>,
+				<0 0  1 &gic 0  1 4>,
+				<0 0  2 &gic 0  2 4>,
+				<0 0  3 &gic 0  3 4>,
+				<0 0  4 &gic 0  4 4>,
+				<0 0  5 &gic 0  5 4>,
+				<0 0  6 &gic 0  6 4>,
+				<0 0  7 &gic 0  7 4>,
+				<0 0  8 &gic 0  8 4>,
+				<0 0  9 &gic 0  9 4>,
+				<0 0 10 &gic 0 10 4>,
+				<0 0 11 &gic 0 11 4>,
+				<0 0 12 &gic 0 12 4>,
+				<0 0 13 &gic 0 13 4>,
+				<0 0 14 &gic 0 14 4>,
+				<0 0 15 &gic 0 15 4>,
+				<0 0 16 &gic 0 16 4>,
+				<0 0 17 &gic 0 17 4>,
+				<0 0 18 &gic 0 18 4>,
+				<0 0 19 &gic 0 19 4>,
+				<0 0 20 &gic 0 20 4>,
+				<0 0 21 &gic 0 21 4>,
+				<0 0 22 &gic 0 22 4>,
+				<0 0 23 &gic 0 23 4>,
+				<0 0 24 &gic 0 24 4>,
+				<0 0 25 &gic 0 25 4>,
+				<0 0 26 &gic 0 26 4>,
+				<0 0 27 &gic 0 27 4>,
+				<0 0 28 &gic 0 28 4>,
+				<0 0 29 &gic 0 29 4>,
+				<0 0 30 &gic 0 30 4>,
+				<0 0 31 &gic 0 31 4>,
+				<0 0 32 &gic 0 32 4>,
+				<0 0 33 &gic 0 33 4>,
+				<0 0 34 &gic 0 34 4>,
+				<0 0 35 &gic 0 35 4>,
+				<0 0 36 &gic 0 36 4>,
+				<0 0 37 &gic 0 37 4>,
+				<0 0 38 &gic 0 38 4>,
+				<0 0 39 &gic 0 39 4>,
+				<0 0 40 &gic 0 40 4>,
+				<0 0 41 &gic 0 41 4>,
+				<0 0 42 &gic 0 42 4>;
+
+		/include/ "rtsm_ve-motherboard.dtsi"
+	};
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi
index 5000e0d4284..642775d7ca6 100644
--- a/arch/arm/boot/dts/sama5d3.dtsi
+++ b/arch/arm/boot/dts/sama5d3.dtsi
@@ -35,8 +35,12 @@
 		ssc1 = &ssc1;
 	};
 	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
 		cpu@0 {
+			device_type = "cpu";
 			compatible = "arm,cortex-a5";
+			reg = <0x0>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index e7ef619a70a..06ef8b625db 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -16,8 +16,12 @@
 	interrupt-parent = <&intc>;
 
 	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
 		cpu@0 {
+			device_type = "cpu";
 			compatible = "arm,cortex-a8";
+			reg = <0x0>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index 31fa38f8cc9..d2852547b57 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -17,8 +17,12 @@
 	interrupt-parent = <&intc>;
 
 	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
 		cpu@0 {
+			device_type = "cpu";
 			compatible = "arm,cortex-a8";
+			reg = <0x0>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
index ac870fb3fa0..9584232ee6b 100644
--- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
+++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
@@ -228,6 +228,7 @@
 			};
 
 			clcd@1f0000 {
+				status = "disabled";
 				compatible = "arm,pl111", "arm,primecell";
 				reg = <0x1f0000 0x1000>;
 				interrupts = <14>;
diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi b/arch/arm/boot/dts/vexpress-v2m.dtsi
index f1420368355..6593398c11a 100644
--- a/arch/arm/boot/dts/vexpress-v2m.dtsi
+++ b/arch/arm/boot/dts/vexpress-v2m.dtsi
@@ -227,6 +227,7 @@
 			};
 
 			clcd@1f000 {
+				status = "disabled";
 				compatible = "arm,pl111", "arm,primecell";
 				reg = <0x1f000 0x1000>;
 				interrupts = <14>;
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts
index 9420053acc1..cc6a8c0cfe3 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts
@@ -9,6 +9,8 @@
 
 /dts-v1/;
 
+/memreserve/ 0xbf000000 0x01000000;
+
 / {
 	model = "V2P-CA15";
 	arm,hbi = <0x237>;
@@ -57,6 +59,8 @@
 		interrupts = <0 85 4>;
 		clocks = <&oscclk5>;
 		clock-names = "pxlclk";
+		mode = "1024x768-16@60";
+		framebuffer = <0 0xff000000 0 0x01000000>;
 	};
 
 	memory-controller@2b0a0000 {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index d2803be4e1a..f1dc620c5c4 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -9,11 +9,13 @@
 
 /dts-v1/;
 
+/memreserve/ 0xff000000 0x01000000;
+
 / {
 	model = "V2P-CA15_CA7";
 	arm,hbi = <0x249>;
 	arm,vexpress,site = <0xf>;
-	compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress";
+	compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress", "arm,generic";
 	interrupt-parent = <&gic>;
 	#address-cells = <2>;
 	#size-cells = <2>;
@@ -29,44 +31,106 @@
 		i2c1 = &v2m_i2c_pcie;
 	};
 
-	cpus {
+	clusters {
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu0: cpu@0 {
-			device_type = "cpu";
-			compatible = "arm,cortex-a15";
+		cluster0: cluster@0 {
 			reg = <0>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core0: core@0 {
+					reg = <0>;
+				};
+
+				core1: core@1 {
+					reg = <1>;
+				};
+
+			};
 		};
 
-		cpu1: cpu@1 {
-			device_type = "cpu";
-			compatible = "arm,cortex-a15";
+		cluster1: cluster@1 {
 			reg = <1>;
+			cores {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				core2: core@0 {
+					reg = <0>;
+				};
+
+				core3: core@1 {
+					reg = <1>;
+				};
+
+				core4: core@2 {
+					reg = <2>;
+				};
+			};
 		};
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
 
 		cpu2: cpu@2 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x100>;
+			cluster = <&cluster1>;
+			core = <&core2>;
+			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
 		};
 
 		cpu3: cpu@3 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x101>;
+			cluster = <&cluster1>;
+			core = <&core3>;
+			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
 		};
 
 		cpu4: cpu@4 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x102>;
+			cluster = <&cluster1>;
+			core = <&core4>;
+			clock-frequency = <800000000>;
+			cci-control-port = <&cci_control2>;
+		};
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <0>;
+			cluster = <&cluster0>;
+			core = <&core0>;
+			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a15";
+			reg = <1>;
+			cluster = <&cluster0>;
+			core = <&core1>;
+			clock-frequency = <1000000000>;
+			cci-control-port = <&cci_control1>;
 		};
 	};
 
 	memory@80000000 {
 		device_type = "memory";
-		reg = <0 0x80000000 0 0x40000000>;
+		reg = <0 0x80000000 0 0x80000000>;
 	};
 
 	wdt@2a490000 {
@@ -81,6 +145,8 @@
 		compatible = "arm,hdlcd";
 		reg = <0 0x2b000000 0 0x1000>;
 		interrupts = <0 85 4>;
+		mode = "1024x768-16@60";
+		framebuffer = <0 0xff000000 0 0x01000000>;
 		clocks = <&oscclk5>;
 		clock-names = "pxlclk";
 	};
@@ -102,6 +168,64 @@
 		      <0 0x2c004000 0 0x2000>,
 		      <0 0x2c006000 0 0x2000>;
 		interrupts = <1 9 0xf04>;
+
+		gic-cpuif@0 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <0>;
+			cpu = <&cpu0>;
+		};
+		gic-cpuif@1 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <1>;
+			cpu = <&cpu1>;
+		};
+		gic-cpuif@2 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <2>;
+			cpu = <&cpu2>;
+		};
+
+		gic-cpuif@3 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <3>;
+			cpu = <&cpu3>;
+		};
+
+		gic-cpuif@4 {
+			compatible = "arm,gic-cpuif";
+			cpuif-id = <4>;
+			cpu = <&cpu4>;
+		};
+	};
+
+	cci@2c090000 {
+		compatible = "arm,cci-400";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0 0x2c090000 0 0x1000>;
+		ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+		cci_control1: slave-if@4000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x4000 0x1000>;
+		};
+
+		cci_control2: slave-if@5000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x5000 0x1000>;
+		};
+	};
+
+	cci-pmu@2c099000 {
+		compatible = "arm,cci-400-pmu";
+		reg = <0 0x2c099000 0 0x6000>;
+		interrupts = <0 101 4>,
+			     <0 102 4>,
+			     <0 103 4>,
+			     <0 104 4>,
+			     <0 105 4>;
 	};
 
 	memory-controller@7ffd0000 {
@@ -125,6 +249,12 @@
 		clock-names = "apb_pclk";
 	};
 
+	spc@7fff0000 {
+		compatible = "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc";
+		reg = <0 0x7fff0000 0 0x1000>;
+		interrupts = <0 95 4>;
+	};
+
 	timer {
 		compatible = "arm,armv7-timer";
 		interrupts = <1 13 0xf08>,
@@ -133,12 +263,21 @@
 			     <1 10 0xf08>;
 	};
 
-	pmu {
+	pmu_a15 {
 		compatible = "arm,cortex-a15-pmu";
+		cluster  = <&cluster0>;
 		interrupts = <0 68 4>,
 			     <0 69 4>;
 	};
 
+	pmu_a7 {
+		compatible = "arm,cortex-a7-pmu";
+		cluster  = <&cluster1>;
+		interrupts = <0 128 4>,
+			     <0 129 4>,
+			     <0 130 4>;
+	};
+
 	oscclk6a: oscclk6a {
 		/* Reference 24MHz clock */
 		compatible = "fixed-clock";
@@ -147,6 +286,15 @@
 		clock-output-names = "oscclk6a";
 	};
 
+	psci {
+		compatible      = "arm,psci";
+		method          = "smc";
+		cpu_suspend     = <0x80100001>;
+		cpu_off         = <0x80100002>;
+		cpu_on          = <0x80100003>;
+		migrate         = <0x80100004>;
+	};
+
 	dcc {
 		compatible = "arm,vexpress,config-bus";
 		arm,vexpress,config-bridge = <&v2m_sysreg>;
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
index c544a550459..cf633ed6a1b 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
@@ -9,6 +9,8 @@
 
 /dts-v1/;
 
+/memreserve/ 0xbf000000 0x01000000;
+
 / {
 	model = "V2P-CA5s";
 	arm,hbi = <0x225>;
@@ -59,6 +61,8 @@
 		interrupts = <0 85 4>;
 		clocks = <&oscclk3>;
 		clock-names = "pxlclk";
+		mode = "640x480-16@60";
+		framebuffer = <0xbf000000 0x01000000>;
 	};
 
 	memory-controller@2a150000 {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
index 62d9b225dcc..f83706bd3f9 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
@@ -9,6 +9,8 @@
 
 /dts-v1/;
 
+/include/ "clcd-panels.dtsi"
+
 / {
 	model = "V2P-CA9";
 	arm,hbi = <0x191>;
@@ -73,6 +75,8 @@
 		interrupts = <0 44 4>;
 		clocks = <&oscclk1>, <&oscclk2>;
 		clock-names = "clcdclk", "apb_pclk";
+		mode = "XVGA";
+		use_dma = <1>;
 	};
 
 	memory-controller@100e0000 {
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 48434cbe3e8..462cd580fc2 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -14,5 +14,9 @@ obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
 obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
 obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
+obj-$(CONFIG_BL_SWITCHER)	+= bL_switcher.o
+obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o
+
 AFLAGS_mcpm_head.o		:= -march=armv7-a
 AFLAGS_vlock.o			:= -march=armv7-a
+CFLAGS_REMOVE_mcpm_entry.o	= -pg
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
new file mode 100644
index 00000000000..8fee70dfb30
--- /dev/null
+++ b/arch/arm/common/bL_switcher.c
@@ -0,0 +1,864 @@
+/*
+ * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
+ *
+ * Created by:	Nicolas Pitre, March 2012
+ * Copyright:	(C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/time.h>
+#include <linux/clockchips.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/moduleparam.h>
+
+#include <asm/smp_plat.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/suspend.h>
+#include <asm/mcpm.h>
+#include <asm/bL_switcher.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/power_cpu_migrate.h>
+
+
+/*
+ * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
+ * __attribute_const__ and we don't want the compiler to assume any
+ * constness here as the value _does_ change along some code paths.
+ */
+
+static int read_mpidr(void)
+{
+	unsigned int id;
+	asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));
+	return id & MPIDR_HWID_BITMASK;
+}
+
+/*
+ * Get a global nanosecond time stamp for tracing.
+ */
+static s64 get_ns(void)
+{
+	struct timespec ts;
+	getnstimeofday(&ts);
+	return timespec_to_ns(&ts);
+}
+
+/*
+ * bL switcher core code.
+ */
+
+static void bL_do_switch(void *_arg)
+{
+	unsigned ib_mpidr, ib_cpu, ib_cluster;
+	long volatile handshake, **handshake_ptr = _arg;
+
+	pr_debug("%s\n", __func__);
+
+	ib_mpidr = cpu_logical_map(smp_processor_id());
+	ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+	ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+	/* Advertise our handshake location */
+	if (handshake_ptr) {
+		handshake = 0;
+		*handshake_ptr = &handshake;
+	} else
+		handshake = -1;
+
+	/*
+	 * Our state has been saved at this point.  Let's release our
+	 * inbound CPU.
+	 */
+	mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume);
+	sev();
+
+	/*
+	 * From this point, we must assume that our counterpart CPU might
+	 * have taken over in its parallel world already, as if execution
+	 * just returned from cpu_suspend().  It is therefore important to
+	 * be very careful not to make any change the other guy is not
+	 * expecting.  This is why we need stack isolation.
+	 *
+	 * Fancy under cover tasks could be performed here.  For now
+	 * we have none.
+	 */
+
+	/*
+	 * Let's wait until our inbound is alive.
+	 */
+	while (!handshake) {
+		wfe();
+		smp_mb();
+	}
+
+	/* Let's put ourself down. */
+	mcpm_cpu_power_down();
+
+	/* should never get here */
+	BUG();
+}
+
+/*
+ * Stack isolation.  To ensure 'current' remains valid, we just use another
+ * piece of our thread's stack space which should be fairly lightly used.
+ * The selected area starts just above the thread_info structure located
+ * at the very bottom of the stack, aligned to a cache line, and indexed
+ * with the cluster number.
+ */
+#define STACK_SIZE 512
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+static int bL_switchpoint(unsigned long _arg)
+{
+	unsigned int mpidr = read_mpidr();
+	unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	void *stack = current_thread_info() + 1;
+	stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
+	stack += clusterid * STACK_SIZE + STACK_SIZE;
+	call_with_stack(bL_do_switch, (void *)_arg, stack);
+	BUG();
+}
+
+/*
+ * Generic switcher interface
+ */
+
+static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
+static int bL_switcher_cpu_pairing[NR_CPUS];
+
+/*
+ * bL_switch_to - Switch to a specific cluster for the current CPU
+ * @new_cluster_id: the ID of the cluster to switch to.
+ *
+ * This function must be called on the CPU to be switched.
+ * Returns 0 on success, else a negative status code.
+ */
+static int bL_switch_to(unsigned int new_cluster_id)
+{
+	unsigned int mpidr, this_cpu, that_cpu;
+	unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
+	struct completion inbound_alive;
+	struct tick_device *tdev;
+	enum clock_event_mode tdev_mode;
+	long volatile *handshake_ptr;
+	int ipi_nr, ret;
+
+	this_cpu = smp_processor_id();
+	ob_mpidr = read_mpidr();
+	ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0);
+	ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1);
+	BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr);
+
+	if (new_cluster_id == ob_cluster)
+		return 0;
+
+	that_cpu = bL_switcher_cpu_pairing[this_cpu];
+	ib_mpidr = cpu_logical_map(that_cpu);
+	ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+	ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+	pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n",
+		 this_cpu, ob_mpidr, ib_mpidr);
+
+	this_cpu = smp_processor_id();
+
+	/* Close the gate for our entry vectors */
+	mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL);
+	mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL);
+
+	/* Install our "inbound alive" notifier. */
+	init_completion(&inbound_alive);
+	ipi_nr = register_ipi_completion(&inbound_alive, this_cpu);
+	ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]);
+	mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr);
+
+	/*
+	 * Let's wake up the inbound CPU now in case it requires some delay
+	 * to come online, but leave it gated in our entry vector code.
+	 */
+	ret = mcpm_cpu_power_up(ib_cpu, ib_cluster);
+	if (ret) {
+		pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
+		return ret;
+	}
+
+	/*
+	 * Raise a SGI on the inbound CPU to make sure it doesn't stall
+	 * in a possible WFI, such as in bL_power_down().
+	 */
+	gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0);
+
+	/*
+	 * Wait for the inbound to come up.  This allows for other
+	 * tasks to be scheduled in the mean time.
+	 */
+	wait_for_completion(&inbound_alive);
+	mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0);
+
+	/*
+	 * From this point we are entering the switch critical zone
+	 * and can't sleep/schedule anymore.
+	 */
+	local_irq_disable();
+	local_fiq_disable();
+	trace_cpu_migrate_begin(get_ns(), ob_mpidr);
+
+	/* redirect GIC's SGIs to our counterpart */
+	gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
+
+	tdev = tick_get_device(this_cpu);
+	if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
+		tdev = NULL;
+	if (tdev) {
+		tdev_mode = tdev->evtdev->mode;
+		clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+	}
+
+	ret = cpu_pm_enter();
+
+	/* we can not tolerate errors at this point */
+	if (ret)
+		panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
+
+	/*
+	 * Swap the physical CPUs in the logical map for this logical CPU.
+	 * This must be flushed to RAM as the resume code
+	 * needs to access it while the caches are still disabled.
+	 */
+	cpu_logical_map(this_cpu) = ib_mpidr;
+	cpu_logical_map(that_cpu) = ob_mpidr;
+	sync_cache_w(&cpu_logical_map(this_cpu));
+
+	/* Let's do the actual CPU switch. */
+	ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint);
+	if (ret > 0)
+		panic("%s: cpu_suspend() returned %d\n", __func__, ret);
+
+	/* We are executing on the inbound CPU at this point */
+	mpidr = read_mpidr();
+	pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr);
+	BUG_ON(mpidr != ib_mpidr);
+
+	mcpm_cpu_powered_up();
+
+	ret = cpu_pm_exit();
+
+	if (tdev) {
+		clockevents_set_mode(tdev->evtdev, tdev_mode);
+		clockevents_program_event(tdev->evtdev,
+					  tdev->evtdev->next_event, 1);
+	}
+
+	trace_cpu_migrate_finish(get_ns(), ib_mpidr);
+	local_fiq_enable();
+	local_irq_enable();
+
+	*handshake_ptr = 1;
+	dsb_sev();
+
+	if (ret)
+		pr_err("%s exiting with error %d\n", __func__, ret);
+	return ret;
+}
+
+struct bL_thread {
+	spinlock_t lock;
+	struct task_struct *task;
+	wait_queue_head_t wq;
+	int wanted_cluster;
+	struct completion started;
+	bL_switch_completion_handler completer;
+	void *completer_cookie;
+};
+
+static struct bL_thread bL_threads[NR_CPUS];
+
+static int bL_switcher_thread(void *arg)
+{
+	struct bL_thread *t = arg;
+	struct sched_param param = { .sched_priority = 1 };
+	int cluster;
+	bL_switch_completion_handler completer;
+	void *completer_cookie;
+
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+	complete(&t->started);
+
+	do {
+		if (signal_pending(current))
+			flush_signals(current);
+		wait_event_interruptible(t->wq,
+				t->wanted_cluster != -1 ||
+				kthread_should_stop());
+
+		spin_lock(&t->lock);
+		cluster = t->wanted_cluster;
+		completer = t->completer;
+		completer_cookie = t->completer_cookie;
+		t->wanted_cluster = -1;
+		t->completer = NULL;
+		spin_unlock(&t->lock);
+
+		if (cluster != -1) {
+			bL_switch_to(cluster);
+
+			if (completer)
+				completer(completer_cookie);
+		}
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+static struct task_struct * bL_switcher_thread_create(int cpu, void *arg)
+{
+	struct task_struct *task;
+
+	task = kthread_create_on_node(bL_switcher_thread, arg,
+				      cpu_to_node(cpu), "kswitcher_%d", cpu);
+	if (!IS_ERR(task)) {
+		kthread_bind(task, cpu);
+		wake_up_process(task);
+	} else
+		pr_err("%s failed for CPU %d\n", __func__, cpu);
+	return task;
+}
+
+/*
+ * bL_switch_request_cb - Switch to a specific cluster for the given CPU,
+ *      with completion notification via a callback
+ *
+ * @cpu: the CPU to switch
+ * @new_cluster_id: the ID of the cluster to switch to.
+ * @completer: switch completion callback.  if non-NULL,
+ *	@completer(@completer_cookie) will be called on completion of
+ *	the switch, in non-atomic context.
+ * @completer_cookie: opaque context argument for @completer.
+ *
+ * This function causes a cluster switch on the given CPU by waking up
+ * the appropriate switcher thread.  This function may or may not return
+ * before the switch has occurred.
+ *
+ * If a @completer callback function is supplied, it will be called when
+ * the switch is complete.  This can be used to determine asynchronously
+ * when the switch is complete, regardless of when bL_switch_request()
+ * returns.  When @completer is supplied, no new switch request is permitted
+ * for the affected CPU until after the switch is complete, and @completer
+ * has returned.
+ */
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+			 bL_switch_completion_handler completer,
+			 void *completer_cookie)
+{
+	struct bL_thread *t;
+
+	if (cpu >= ARRAY_SIZE(bL_threads)) {
+		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
+		return -EINVAL;
+	}
+
+	t = &bL_threads[cpu];
+
+	if (IS_ERR(t->task))
+		return PTR_ERR(t->task);
+	if (!t->task)
+		return -ESRCH;
+
+	spin_lock(&t->lock);
+	if (t->completer) {
+		spin_unlock(&t->lock);
+		return -EBUSY;
+	}
+	t->completer = completer;
+	t->completer_cookie = completer_cookie;
+	t->wanted_cluster = new_cluster_id;
+	spin_unlock(&t->lock);
+	wake_up(&t->wq);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(bL_switch_request_cb);
+
+/*
+ * Detach an outstanding switch request.
+ *
+ * The switcher will continue with the switch request in the background,
+ * but the completer function will not be called.
+ *
+ * This may be necessary if the completer is in a kernel module which is
+ * about to be unloaded.
+ */
+void bL_switch_request_detach(unsigned int cpu,
+			      bL_switch_completion_handler completer)
+{
+	struct bL_thread *t;
+
+	if (cpu >= ARRAY_SIZE(bL_threads)) {
+		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
+		return;
+	}
+
+	t = &bL_threads[cpu];
+
+	if (IS_ERR(t->task) || !t->task)
+		return;
+
+	spin_lock(&t->lock);
+	if (t->completer == completer)
+		t->completer = NULL;
+	spin_unlock(&t->lock);
+}
+
+EXPORT_SYMBOL_GPL(bL_switch_request_detach);
+
+/*
+ * Activation and configuration code.
+ */
+
+static DEFINE_MUTEX(bL_switcher_activation_lock);
+static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier);
+static unsigned int bL_switcher_active;
+static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS];
+static cpumask_t bL_switcher_removed_logical_cpus;
+
+int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_register_notifier);
+
+int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier);
+
+static int bL_activation_notify(unsigned long val)
+{
+	int ret;
+       
+	ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL);
+	if (ret & NOTIFY_STOP_MASK)
+		pr_err("%s: notifier chain failed with status 0x%x\n",
+			__func__, ret);
+	return notifier_to_errno(ret);
+}
+
+static void bL_switcher_restore_cpus(void)
+{
+	int i;
+
+	for_each_cpu(i, &bL_switcher_removed_logical_cpus)
+		cpu_up(i);
+}
+
+static int bL_switcher_halve_cpus(void)
+{
+	int i, j, cluster_0, gic_id, ret;
+	unsigned int cpu, cluster, mask;
+	cpumask_t available_cpus;
+
+	/* First pass to validate what we have */
+	mask = 0;
+	for_each_online_cpu(i) {
+		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+		if (cluster >= 2) {
+			pr_err("%s: only dual cluster systems are supported\n", __func__);
+			return -EINVAL;
+		}
+		if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER))
+			return -EINVAL;
+		mask |= (1 << cluster);
+	}
+	if (mask != 3) {
+		pr_err("%s: no CPU pairing possible\n", __func__);
+		return -EINVAL;
+	}
+
+	/*
+	 * Now let's do the pairing.  We match each CPU with another CPU
+	 * from a different cluster.  To get a uniform scheduling behavior
+	 * without fiddling with CPU topology and compute capacity data,
+	 * we'll use logical CPUs initially belonging to the same cluster.
+	 */
+	memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing));
+	cpumask_copy(&available_cpus, cpu_online_mask);
+	cluster_0 = -1;
+	for_each_cpu(i, &available_cpus) {
+		int match = -1;
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+		if (cluster_0 == -1)
+			cluster_0 = cluster;
+		if (cluster != cluster_0)
+			continue;
+		cpumask_clear_cpu(i, &available_cpus);
+		for_each_cpu(j, &available_cpus) {
+			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1);
+			/*
+			 * Let's remember the last match to create "odd"
+			 * pairing on purpose in order for other code not
+			 * to assume any relation between physical and
+			 * logical CPU numbers.
+			 */
+			if (cluster != cluster_0)
+				match = j;
+		}
+		if (match != -1) {
+			bL_switcher_cpu_pairing[i] = match;
+			cpumask_clear_cpu(match, &available_cpus);
+			pr_info("CPU%d paired with CPU%d\n", i, match);
+		}
+	}
+
+	/*
+	 * Now we disable the unwanted CPUs i.e. everything that has no
+	 * pairing information (that includes the pairing counterparts).
+	 */ 
+	cpumask_clear(&bL_switcher_removed_logical_cpus);
+	for_each_online_cpu(i) {
+		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+
+		/* Let's take note of the GIC ID for this CPU */
+		gic_id = gic_get_cpu_id(i);
+		if (gic_id < 0) {
+			pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
+			bL_switcher_restore_cpus();
+			return -EINVAL;
+		}
+		bL_gic_id[cpu][cluster] = gic_id;
+		pr_info("GIC ID for CPU %u cluster %u is %u\n",
+			cpu, cluster, gic_id);
+
+		if (bL_switcher_cpu_pairing[i] != -1) {
+			bL_switcher_cpu_original_cluster[i] = cluster;
+			continue;
+		}
+
+		ret = cpu_down(i);
+		if (ret) {
+			bL_switcher_restore_cpus();
+			return ret;
+		}
+		cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus);
+	}
+
+	return 0;
+}
+
+/* Determine the logical CPU a given physical CPU is grouped on. */
+int bL_switcher_get_logical_index(u32 mpidr)
+{
+	int cpu;
+
+	if (!bL_switcher_active)
+		return -EUNATCH;
+
+	mpidr &= MPIDR_HWID_BITMASK;
+	for_each_online_cpu(cpu) {
+		int pairing = bL_switcher_cpu_pairing[cpu];
+		if (pairing == -1)
+			continue;
+		if ((mpidr == cpu_logical_map(cpu)) ||
+		    (mpidr == cpu_logical_map(pairing)))
+			return cpu;
+	}
+	return -EINVAL;
+}
+
+static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
+{
+	trace_cpu_migrate_current(get_ns(), read_mpidr());
+}
+
+int bL_switcher_trace_trigger(void)
+{
+	int ret;
+
+	preempt_disable();
+
+	bL_switcher_trace_trigger_cpu(NULL);
+	ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
+
+	preempt_enable();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
+
+static int bL_switcher_enable(void)
+{
+	int cpu, ret;
+
+	mutex_lock(&bL_switcher_activation_lock);
+	cpu_hotplug_driver_lock();
+	if (bL_switcher_active) {
+		cpu_hotplug_driver_unlock();
+		mutex_unlock(&bL_switcher_activation_lock);
+		return 0;
+	}
+
+	pr_info("big.LITTLE switcher initializing\n");
+
+	ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE);
+	if (ret)
+		goto error;
+
+	ret = bL_switcher_halve_cpus();
+	if (ret)
+		goto error;
+
+	bL_switcher_trace_trigger();
+
+	for_each_online_cpu(cpu) {
+		struct bL_thread *t = &bL_threads[cpu];
+		spin_lock_init(&t->lock);
+		init_waitqueue_head(&t->wq);
+		init_completion(&t->started);
+		t->wanted_cluster = -1;
+		t->task = bL_switcher_thread_create(cpu, t);
+	}
+
+	bL_switcher_active = 1;
+	bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+	pr_info("big.LITTLE switcher initialized\n");
+	goto out;
+
+error:
+	pr_warning("big.LITTLE switcher initialization failed\n");
+	bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+	cpu_hotplug_driver_unlock();
+	mutex_unlock(&bL_switcher_activation_lock);
+	return ret;
+}
+
+#ifdef CONFIG_SYSFS
+
+static void bL_switcher_disable(void)
+{
+	unsigned int cpu, cluster;
+	struct bL_thread *t;
+	struct task_struct *task;
+
+	mutex_lock(&bL_switcher_activation_lock);
+	cpu_hotplug_driver_lock();
+
+	if (!bL_switcher_active)
+		goto out;
+
+	if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) {
+		bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+		goto out;
+	}
+
+	bL_switcher_active = 0;
+
+	/*
+	 * To deactivate the switcher, we must shut down the switcher
+	 * threads to prevent any other requests from being accepted.
+	 * Then, if the final cluster for given logical CPU is not the
+	 * same as the original one, we'll recreate a switcher thread
+	 * just for the purpose of switching the CPU back without any
+	 * possibility for interference from external requests.
+	 */
+	for_each_online_cpu(cpu) {
+		t = &bL_threads[cpu];
+		task = t->task;
+		t->task = NULL;
+		if (!task || IS_ERR(task))
+			continue;
+		kthread_stop(task);
+		/* no more switch may happen on this CPU at this point */
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+		if (cluster == bL_switcher_cpu_original_cluster[cpu])
+			continue;
+		init_completion(&t->started);
+		t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu];
+		task = bL_switcher_thread_create(cpu, t);
+		if (!IS_ERR(task)) {
+			wait_for_completion(&t->started);
+			kthread_stop(task);
+			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+			if (cluster == bL_switcher_cpu_original_cluster[cpu])
+				continue;
+		}
+		/* If execution gets here, we're in trouble. */
+		pr_crit("%s: unable to restore original cluster for CPU %d\n",
+			__func__, cpu);
+		pr_crit("%s: CPU %d can't be restored\n",
+			__func__, bL_switcher_cpu_pairing[cpu]);
+		cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu],
+				  &bL_switcher_removed_logical_cpus);
+	}
+
+	bL_switcher_restore_cpus();
+	bL_switcher_trace_trigger();
+
+	bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+	cpu_hotplug_driver_unlock();
+	mutex_unlock(&bL_switcher_activation_lock);
+}
+
+static ssize_t bL_switcher_active_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", bL_switcher_active);
+}
+
+static ssize_t bL_switcher_active_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+
+	switch (buf[0]) {
+	case '0':
+		bL_switcher_disable();
+		ret = 0;
+		break;
+	case '1':
+		ret = bL_switcher_enable();
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return (ret >= 0) ? count : ret;
+}
+
+static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret = bL_switcher_trace_trigger();
+
+	return ret ? ret : count;
+}
+
+static struct kobj_attribute bL_switcher_active_attr =
+	__ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store);
+
+static struct kobj_attribute bL_switcher_trace_trigger_attr =
+	__ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store);
+
+static struct attribute *bL_switcher_attrs[] = {
+	&bL_switcher_active_attr.attr,
+	&bL_switcher_trace_trigger_attr.attr,
+	NULL,
+};
+
+static struct attribute_group bL_switcher_attr_group = {
+	.attrs = bL_switcher_attrs,
+};
+
+static struct kobject *bL_switcher_kobj;
+
+static int __init bL_switcher_sysfs_init(void)
+{
+	int ret;
+
+	bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj);
+	if (!bL_switcher_kobj)
+		return -ENOMEM;
+	ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group);
+	if (ret)
+		kobject_put(bL_switcher_kobj);
+	return ret;
+}
+
+#endif  /* CONFIG_SYSFS */
+
+bool bL_switcher_get_enabled(void)
+{
+	mutex_lock(&bL_switcher_activation_lock);
+
+	return bL_switcher_active;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_get_enabled);
+
+void bL_switcher_put_enabled(void)
+{
+	mutex_unlock(&bL_switcher_activation_lock);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_put_enabled);
+
+/*
+ * Veto any CPU hotplug operation while the switcher is active.
+ * We're just not ready to deal with that given the trickery involved.
+ */
+static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_DOWN_PREPARE:
+		if (bL_switcher_active)
+			return NOTIFY_BAD;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block bL_switcher_hotplug_notifier =
+        { &bL_switcher_hotplug_callback, NULL, 0 };
+
+#ifdef CONFIG_SCHED_HMP
+static bool no_bL_switcher = true;
+#else
+static bool no_bL_switcher;
+#endif
+core_param(no_bL_switcher, no_bL_switcher, bool, 0644);
+
+static int __init bL_switcher_init(void)
+{
+	int ret;
+
+	if (MAX_NR_CLUSTERS != 2) {
+		pr_err("%s: only dual cluster systems are supported\n", __func__);
+		return -EINVAL;
+	}
+
+	register_cpu_notifier(&bL_switcher_hotplug_notifier);
+
+	if (!no_bL_switcher) {
+		ret = bL_switcher_enable();
+		if (ret)
+			return ret;
+	}
+
+#ifdef CONFIG_SYSFS
+	ret = bL_switcher_sysfs_init();
+	if (ret)
+		pr_err("%s: unable to create sysfs entry\n", __func__);
+#endif
+
+	return 0;
+}
+
+late_initcall(bL_switcher_init);
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
new file mode 100644
index 00000000000..5e2dd197e72
--- /dev/null
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface
+ *
+ * Created by:	Nicolas Pitre, November 2012
+ * Copyright:	(C) 2012  Linaro Limited
+ *
+ * Dummy interface to user space for debugging purpose only.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <asm/uaccess.h>
+#include <asm/bL_switcher.h>
+
+static ssize_t bL_switcher_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	unsigned char val[3];
+	unsigned int cpu, cluster;
+	int ret;
+
+	pr_debug("%s\n", __func__);
+
+	if (len < 3)
+		return -EINVAL;
+
+	if (copy_from_user(val, buf, 3))
+		return -EFAULT;
+
+	/* format: <cpu#>,<cluster#> */
+	if (val[0] < '0' || val[0] > '4' ||
+	    val[1] != ',' ||
+	    val[2] < '0' || val[2] > '1')
+		return -EINVAL;
+
+	cpu = val[0] - '0';
+	cluster = val[2] - '0';
+	ret = bL_switch_request(cpu, cluster);
+
+	return ret ? : len;
+}
+
+static const struct file_operations bL_switcher_fops = {
+	.write		= bL_switcher_write,
+	.owner	= THIS_MODULE,
+};
+
+static struct miscdevice bL_switcher_device = {
+        MISC_DYNAMIC_MINOR,
+        "b.L_switcher",
+        &bL_switcher_fops
+};
+
+static int __init bL_switcher_dummy_if_init(void)
+{
+	return misc_register(&bL_switcher_device);
+}
+
+static void __exit bL_switcher_dummy_if_exit(void)
+{
+	misc_deregister(&bL_switcher_device);
+}
+
+module_init(bL_switcher_dummy_if_init);
+module_exit(bL_switcher_dummy_if_exit);
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 370236dd1a0..4a2b32fd53a 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
 	sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
 }
 
+extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2];
+
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+			 unsigned long poke_phys_addr, unsigned long poke_val)
+{
+	unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0];
+	poke[0] = poke_phys_addr;
+	poke[1] = poke_val;
+	__cpuc_flush_dcache_area((void *)poke, 8);
+	outer_clean_range(__pa(poke), __pa(poke + 2));
+}
+
 static const struct mcpm_platform_ops *platform_ops;
 
 int __init mcpm_platform_register(const struct mcpm_platform_ops *ops)
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index 8178705c4b2..057e9c5a9e1 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -71,12 +71,19 @@ ENTRY(mcpm_entry_point)
 	 * position independent way.
 	 */
 	adr	r5, 3f
-	ldmia	r5, {r6, r7, r8, r11}
+	ldmia	r5, {r0, r6, r7, r8, r11}
+	add	r0, r5, r0			@ r0 = mcpm_entry_early_pokes
 	add	r6, r5, r6			@ r6 = mcpm_entry_vectors
 	ldr	r7, [r5, r7]			@ r7 = mcpm_power_up_setup_phys
 	add	r8, r5, r8			@ r8 = mcpm_sync
 	add	r11, r5, r11			@ r11 = first_man_locks
 
+	@ Perform an early poke, if any
+	add	r0, r0, r4, lsl #3
+	ldmia	r0, {r0, r1}
+	teq	r0, #0
+	strne	r1, [r0]
+
 	mov	r0, #MCPM_SYNC_CLUSTER_SIZE
 	mla	r8, r0, r10, r8			@ r8 = sync cluster base
 
@@ -195,7 +202,8 @@ mcpm_entry_gated:
 
 	.align	2
 
-3:	.word	mcpm_entry_vectors - .
+3:	.word	mcpm_entry_early_pokes - .
+	.word	mcpm_entry_vectors - 3b
 	.word	mcpm_power_up_setup_phys - 3b
 	.word	mcpm_sync - 3b
 	.word	first_man_locks - 3b
@@ -214,6 +222,10 @@ first_man_locks:
 ENTRY(mcpm_entry_vectors)
 	.space	4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
 
+	.type	mcpm_entry_early_pokes, #object
+ENTRY(mcpm_entry_early_pokes)
+	.space	8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+
 	.type	mcpm_power_up_setup_phys, #object
 ENTRY(mcpm_power_up_setup_phys)
 	.space  4		@ set by mcpm_sync_init()
diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S
index 19d6cd6f29f..3a14ea8fe97 100644
--- a/arch/arm/crypto/aes-armv4.S
+++ b/arch/arm/crypto/aes-armv4.S
@@ -148,7 +148,7 @@ AES_Te:
 @ 		 const AES_KEY *key) {
 .align	5
 ENTRY(AES_encrypt)
-	sub	r3,pc,#8		@ AES_encrypt
+	adr	r3,AES_encrypt
 	stmdb   sp!,{r1,r4-r12,lr}
 	mov	r12,r0		@ inp
 	mov	r11,r2
@@ -381,7 +381,7 @@ _armv4_AES_encrypt:
 .align	5
 ENTRY(private_AES_set_encrypt_key)
 _armv4_AES_set_encrypt_key:
-	sub	r3,pc,#8		@ AES_set_encrypt_key
+	adr	r3,_armv4_AES_set_encrypt_key
 	teq	r0,#0
 	moveq	r0,#-1
 	beq	.Labrt
@@ -843,7 +843,7 @@ AES_Td:
 @ 		 const AES_KEY *key) {
 .align	5
 ENTRY(AES_decrypt)
-	sub	r3,pc,#8		@ AES_decrypt
+	adr	r3,AES_decrypt
 	stmdb   sp!,{r1,r4-r12,lr}
 	mov	r12,r0		@ inp
 	mov	r11,r2
diff --git a/arch/arm/include/asm/a.out-core.h b/arch/arm/include/asm/a.out-core.h
deleted file mode 100644
index 92f10cb5c70..00000000000
--- a/arch/arm/include/asm/a.out-core.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* a.out coredump register dumper
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_A_OUT_CORE_H
-#define _ASM_A_OUT_CORE_H
-
-#ifdef __KERNEL__
-
-#include <linux/user.h>
-#include <linux/elfcore.h>
-
-/*
- * fill in the user structure for an a.out core dump
- */
-static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
-{
-	struct task_struct *tsk = current;
-
-	dump->magic = CMAGIC;
-	dump->start_code = tsk->mm->start_code;
-	dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1);
-
-	dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT;
-	dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	dump->u_ssize = 0;
-
-	memset(dump->u_debugreg, 0, sizeof(dump->u_debugreg));
-
-	if (dump->start_stack < 0x04000000)
-		dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT;
-
-	dump->regs = *regs;
-	dump->u_fpvalid = dump_fpu (regs, &dump->u_fp);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_A_OUT_CORE_H */
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 7c1bfc0aea0..f693038b888 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -80,15 +80,6 @@ static inline u32 arch_timer_get_cntfrq(void)
 	return val;
 }
 
-static inline u64 arch_counter_get_cntpct(void)
-{
-	u64 cval;
-
-	isb();
-	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
-	return cval;
-}
-
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
@@ -105,7 +96,7 @@ static inline void __cpuinit arch_counter_set_user_access(void)
 	asm volatile("mrc p15, 0, %0, c14, c1, 0" : "=r" (cntkctl));
 
 	/* disable user access to everything */
-	cntkctl &= ~((3 << 8) | (7 << 0));
+	cntkctl &= ~((3 << 8) | (3 << 0));
 
 	asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl));
 }
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
new file mode 100644
index 00000000000..482383b45c9
--- /dev/null
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -0,0 +1,83 @@
+/*
+ * arch/arm/include/asm/bL_switcher.h
+ *
+ * Created by:  Nicolas Pitre, April 2012
+ * Copyright:   (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_BL_SWITCHER_H
+#define ASM_BL_SWITCHER_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+typedef void (*bL_switch_completion_handler)(void *cookie);
+
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+			 bL_switch_completion_handler completer,
+			 void *completer_cookie);
+static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
+{
+	return bL_switch_request_cb(cpu, new_cluster_id, NULL, NULL);
+}
+
+/*
+ * Register here to be notified about runtime enabling/disabling of
+ * the switcher.
+ *
+ * The notifier chain is called with the switcher activation lock held:
+ * the switcher will not be enabled or disabled during callbacks.
+ * Callbacks must not call bL_switcher_{get,put}_enabled().
+ */
+#define BL_NOTIFY_PRE_ENABLE	0
+#define BL_NOTIFY_POST_ENABLE	1
+#define BL_NOTIFY_PRE_DISABLE	2
+#define BL_NOTIFY_POST_DISABLE	3
+
+#ifdef CONFIG_BL_SWITCHER
+
+void bL_switch_request_detach(unsigned int cpu,
+			      bL_switch_completion_handler completer);
+
+int bL_switcher_register_notifier(struct notifier_block *nb);
+int bL_switcher_unregister_notifier(struct notifier_block *nb);
+
+/*
+ * Use these functions to temporarily prevent enabling/disabling of
+ * the switcher.
+ * bL_switcher_get_enabled() returns true if the switcher is currently
+ * enabled.  Each call to bL_switcher_get_enabled() must be followed
+ * by a call to bL_switcher_put_enabled().  These functions are not
+ * recursive.
+ */
+bool bL_switcher_get_enabled(void);
+void bL_switcher_put_enabled(void);
+
+int bL_switcher_trace_trigger(void);
+int bL_switcher_get_logical_index(u32 mpidr);
+
+#else
+static void bL_switch_request_detach(unsigned int cpu,
+				     bL_switch_completion_handler completer) { }
+
+static inline int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline bool bL_switcher_get_enabled(void) { return false; }
+static inline void bL_switcher_put_enabled(void) { }
+static inline int bL_switcher_trace_trigger(void) { return 0; }
+static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; }
+#endif /* CONFIG_BL_SWITCHER */
+
+#endif
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 17d0ae8672f..beb87022178 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -436,4 +436,50 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size)
 #define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr))
 #define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr))
 
+/*
+ * Disabling cache access for one CPU in an ARMv7 SMP system is tricky.
+ * To do so we must:
+ *
+ * - Clear the SCTLR.C bit to prevent further cache allocations
+ * - Flush the desired level of cache
+ * - Clear the ACTLR "SMP" bit to disable local coherency
+ *
+ * ... and so without any intervening memory access in between those steps,
+ * not even to the stack.
+ *
+ * WARNING -- After this has been called:
+ *
+ * - No ldrex/strex (and similar) instructions must be used.
+ * - The CPU is obviously no longer coherent with the other CPUs.
+ * - This is unlikely to work as expected if Linux is running non-secure.
+ *
+ * Note:
+ *
+ * - This is known to apply to several ARMv7 processor implementations,
+ *   however some exceptions may exist.  Caveat emptor.
+ *
+ * - The clobber list is dictated by the call to v7_flush_dcache_*.
+ *   fp is preserved to the stack explicitly prior disabling the cache
+ *   since adding it to the clobber list is incompatible with having
+ *   CONFIG_FRAME_POINTER=y.  ip is saved as well if ever r12-clobbering
+ *   trampoline are inserted by the linker and to keep sp 64-bit aligned.
+ */
+#define v7_exit_coherency_flush(level) \
+	asm volatile( \
+	"stmfd	sp!, {fp, ip} \n\t" \
+	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR \n\t" \
+	"bic	r0, r0, #"__stringify(CR_C)" \n\t" \
+	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR \n\t" \
+	"isb	\n\t" \
+	"bl	v7_flush_dcache_"__stringify(level)" \n\t" \
+	"clrex	\n\t" \
+	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR \n\t" \
+	"bic	r0, r0, #(1 << 6)	@ disable local coherency \n\t" \
+	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR \n\t" \
+	"isb	\n\t" \
+	"dsb	\n\t" \
+	"ldmfd	sp!, {fp, ip}" \
+	: : : "r0","r1","r2","r3","r4","r5","r6","r7", \
+	      "r9","r10","lr","memory" )
+
 #endif
diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 1f3262e99d8..cedd3721318 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -61,6 +61,20 @@ static inline void set_cr(unsigned int val)
 	isb();
 }
 
+static inline unsigned int get_auxcr(void)
+{
+	unsigned int val;
+	asm("mrc p15, 0, %0, c1, c0, 1	@ get AUXCR" : "=r" (val));
+	return val;
+}
+
+static inline void set_auxcr(unsigned int val)
+{
+	asm volatile("mcr p15, 0, %0, c1, c0, 1	@ set AUXCR"
+	  : : "r" (val));
+	isb();
+}
+
 #ifndef CONFIG_SMP
 extern void adjust_cr(unsigned long mask, unsigned long set);
 #endif
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index 38050b1c480..56211f2084e 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -130,4 +130,10 @@ struct mm_struct;
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
+#ifdef CONFIG_MMU
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+int arch_setup_additional_pages(struct linux_binprm *, int);
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index 2740c2a2df6..3d7351c844a 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -5,7 +5,7 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 
-#define NR_IPI	6
+#define NR_IPI	7
 
 typedef struct {
 	unsigned int __softirq_pending;
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index bfc198c7591..863c892b4aa 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -16,7 +16,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-	asm goto("1:\n\t"
+	asm_volatile_goto("1:\n\t"
 		 JUMP_LABEL_NOP "\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 ".word 1b, %l[l_yes], %c0\n\t"
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 18d50322a9e..4bb08e3e52b 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -37,16 +37,18 @@
 #define c5_AIFSR	15	/* Auxilary Instrunction Fault Status R */
 #define c6_DFAR		16	/* Data Fault Address Register */
 #define c6_IFAR		17	/* Instruction Fault Address Register */
-#define c9_L2CTLR	18	/* Cortex A15 L2 Control Register */
-#define c10_PRRR	19	/* Primary Region Remap Register */
-#define c10_NMRR	20	/* Normal Memory Remap Register */
-#define c12_VBAR	21	/* Vector Base Address Register */
-#define c13_CID		22	/* Context ID Register */
-#define c13_TID_URW	23	/* Thread ID, User R/W */
-#define c13_TID_URO	24	/* Thread ID, User R/O */
-#define c13_TID_PRIV	25	/* Thread ID, Privileged */
-#define c14_CNTKCTL	26	/* Timer Control Register (PL1) */
-#define NR_CP15_REGS	27	/* Number of regs (incl. invalid) */
+#define c7_PAR		18	/* Physical Address Register */
+#define c7_PAR_high	19	/* PAR top 32 bits */
+#define c9_L2CTLR	20	/* Cortex A15 L2 Control Register */
+#define c10_PRRR	21	/* Primary Region Remap Register */
+#define c10_NMRR	22	/* Normal Memory Remap Register */
+#define c12_VBAR	23	/* Vector Base Address Register */
+#define c13_CID		24	/* Context ID Register */
+#define c13_TID_URW	25	/* Thread ID, User R/W */
+#define c13_TID_URO	26	/* Thread ID, User R/O */
+#define c13_TID_PRIV	27	/* Thread ID, Privileged */
+#define c14_CNTKCTL	28	/* Timer Control Register (PL1) */
+#define NR_CP15_REGS	29	/* Number of regs (incl. invalid) */
 
 #define ARM_EXCEPTION_RESET	  0
 #define ARM_EXCEPTION_UNDEFINED   1
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 308ad7d6f98..75bf07910b8 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -8,6 +8,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/types.h>
+
 #ifndef __ASSEMBLY__
 
 struct tag;
@@ -16,8 +18,10 @@ struct pt_regs;
 struct smp_operations;
 #ifdef CONFIG_SMP
 #define smp_ops(ops) (&(ops))
+#define smp_init_ops(ops) (&(ops))
 #else
 #define smp_ops(ops) (struct smp_operations *)NULL
+#define smp_init_ops(ops) (bool (*)(void))NULL
 #endif
 
 struct machine_desc {
@@ -41,6 +45,7 @@ struct machine_desc {
 	unsigned char		reserve_lp2 :1;	/* never has lp2	*/
 	char			restart_mode;	/* default restart mode	*/
 	struct smp_operations	*smp;		/* SMP operations	*/
+	bool			(*smp_init)(void);
 	void			(*fixup)(struct tag *, char **,
 					 struct meminfo *);
 	void			(*reserve)(void);/* reserve mem blocks	*/
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 0f7b7620e9a..7626a7fd493 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -42,6 +42,14 @@ extern void mcpm_entry_point(void);
 void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr);
 
 /*
+ * This sets an early poke i.e a value to be poked into some address
+ * from very early assembly code before the CPU is ungated.  The
+ * address must be physical, and if 0 then nothing will happen.
+ */
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+			 unsigned long poke_phys_addr, unsigned long poke_val);
+
+/*
  * CPU/cluster power operations API for higher subsystems to use.
  */
 
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index e3d55547e75..6f18da09668 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -6,8 +6,11 @@
 typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
 	atomic64_t	id;
+#else
+	int		switch_pending;
 #endif
 	unsigned int	vmalloc_seq;
+	unsigned long	sigpage;
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index a7b85e0d0cc..e0b10f19d67 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -27,7 +27,15 @@ void __check_vmalloc_seq(struct mm_struct *mm);
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
 #define init_new_context(tsk,mm)	({ atomic64_set(&mm->context.id, 0); 0; })
 
-DECLARE_PER_CPU(atomic64_t, active_asids);
+#ifdef CONFIG_ARM_ERRATA_798181
+void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+			     cpumask_t *mask);
+#else  /* !CONFIG_ARM_ERRATA_798181 */
+static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+					   cpumask_t *mask)
+{
+}
+#endif /* CONFIG_ARM_ERRATA_798181 */
 
 #else	/* !CONFIG_CPU_HAS_ASID */
 
@@ -47,7 +55,7 @@ static inline void check_and_switch_context(struct mm_struct *mm,
 		 * on non-ASID CPUs, the old mm will remain valid until the
 		 * finish_arch_post_lock_switch() call.
 		 */
-		set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
+		mm->context.switch_pending = 1;
 	else
 		cpu_switch_mm(mm->pgd, mm);
 }
@@ -56,9 +64,21 @@ static inline void check_and_switch_context(struct mm_struct *mm,
 	finish_arch_post_lock_switch
 static inline void finish_arch_post_lock_switch(void)
 {
-	if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
-		struct mm_struct *mm = current->mm;
-		cpu_switch_mm(mm->pgd, mm);
+	struct mm_struct *mm = current->mm;
+
+	if (mm && mm->context.switch_pending) {
+		/*
+		 * Preemption must be disabled during cpu_switch_mm() as we
+		 * have some stateful cache flush implementations. Check
+		 * switch_pending again in case we were preempted and the
+		 * switch to this mm was already done.
+		 */
+		preempt_disable();
+		if (mm->context.switch_pending) {
+			mm->context.switch_pending = 0;
+			cpu_switch_mm(mm->pgd, mm);
+		}
+		preempt_enable_no_resched();
 	}
 }
 
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 812a4944e78..cbdc7a21f86 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -142,7 +142,9 @@ extern void __cpu_copy_user_highpage(struct page *to, struct page *from,
 #define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
 extern void copy_page(void *to, const void *from);
 
+#ifdef CONFIG_KUSER_HELPERS
 #define __HAVE_ARCH_GATE_AREA 1
+#endif
 
 #ifdef CONFIG_ARM_LPAE
 #include <asm/pgtable-3level-types.h>
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 9bcd262a900..5aac06fcc97 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -58,7 +58,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
  * mapping to be mapped at.  This is particularly important for
  * non-high vector CPUs.
  */
-#define FIRST_USER_ADDRESS	PAGE_SIZE
+#define FIRST_USER_ADDRESS	(PAGE_SIZE * 2)
 
 /*
  * Use TASK_SIZE as the ceiling argument for free_pgtables() and
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index f24edad26c7..0cd7824ca76 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -62,9 +62,19 @@ struct pmu_hw_events {
 	raw_spinlock_t		pmu_lock;
 };
 
+struct cpupmu_regs {
+	u32 pmc;
+	u32 pmcntenset;
+	u32 pmuseren;
+	u32 pmintenset;
+	u32 pmxevttype[8];
+	u32 pmxevtcnt[8];
+};
+
 struct arm_pmu {
 	struct pmu	pmu;
 	cpumask_t	active_irqs;
+	cpumask_t	valid_cpus;
 	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct perf_event *event);
@@ -81,6 +91,8 @@ struct arm_pmu {
 	int		(*request_irq)(struct arm_pmu *, irq_handler_t handler);
 	void		(*free_irq)(struct arm_pmu *);
 	int		(*map_event)(struct perf_event *event);
+	void		(*save_regs)(struct arm_pmu *, struct cpupmu_regs *);
+	void		(*restore_regs)(struct arm_pmu *, struct cpupmu_regs *);
 	int		num_events;
 	atomic_t	active_events;
 	struct mutex	reserve_mutex;
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 06e7d509eaa..413f3876341 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -54,7 +54,6 @@ struct thread_struct {
 
 #define start_thread(regs,pc,sp)					\
 ({									\
-	unsigned long *stack = (unsigned long *)sp;			\
 	memset(regs->uregs, 0, sizeof(regs->uregs));			\
 	if (current->personality & ADDR_LIMIT_32BIT)			\
 		regs->ARM_cpsr = USR_MODE;				\
@@ -65,9 +64,6 @@ struct thread_struct {
 	regs->ARM_cpsr |= PSR_ENDSTATE;					\
 	regs->ARM_pc = pc & ~1;		/* pc */			\
 	regs->ARM_sp = sp;		/* sp */			\
-	regs->ARM_r2 = stack[2];	/* r2 (envp) */			\
-	regs->ARM_r1 = stack[1];	/* r1 (argv) */			\
-	regs->ARM_r0 = stack[0];	/* r0 (argc) */			\
 	nommu_start_thread(regs);					\
 })
 
diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h
index ce0dbe7c162..f0a8627c9f1 100644
--- a/arch/arm/include/asm/psci.h
+++ b/arch/arm/include/asm/psci.h
@@ -16,6 +16,10 @@
 
 #define PSCI_POWER_STATE_TYPE_STANDBY		0
 #define PSCI_POWER_STATE_TYPE_POWER_DOWN	1
+#define PSCI_POWER_STATE_AFFINITY_LEVEL0	0
+#define PSCI_POWER_STATE_AFFINITY_LEVEL1	1
+#define PSCI_POWER_STATE_AFFINITY_LEVEL2	2
+#define PSCI_POWER_STATE_AFFINITY_LEVEL3	3
 
 struct psci_power_state {
 	u16	id;
@@ -32,5 +36,22 @@ struct psci_operations {
 };
 
 extern struct psci_operations psci_ops;
+extern struct smp_operations psci_smp_ops;
 
+#ifdef CONFIG_ARM_PSCI
+void psci_init(void);
+bool psci_smp_available(void);
+#else
+static inline void psci_init(void) { }
+static inline bool psci_smp_available(void) { return false; }
+#endif
+
+#ifdef CONFIG_ARM_PSCI
+extern int __init psci_probe(void);
+#else
+static inline int psci_probe(void)
+{
+	return -ENODEV;
+}
+#endif
 #endif /* __ASM_ARM_PSCI_H */
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index d3a22bebe6c..610ccf33f5e 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -81,6 +81,8 @@ extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
 
+extern int register_ipi_completion(struct completion *completion, int cpu);
+
 struct smp_operations {
 #ifdef CONFIG_SMP
 	/*
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index f1d96d4e809..73ddd7239b3 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -57,6 +57,9 @@ static inline void syscall_get_arguments(struct task_struct *task,
 					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
+	if (n == 0)
+		return;
+
 	if (i + n > SYSCALL_MAX_ARGS) {
 		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
 		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
@@ -81,6 +84,9 @@ static inline void syscall_set_arguments(struct task_struct *task,
 					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
+	if (n == 0)
+		return;
+
 	if (i + n > SYSCALL_MAX_ARGS) {
 		pr_warning("%s called with max args %d, handling only %d\n",
 			   __func__, i + n, SYSCALL_MAX_ARGS);
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 1995d1a8406..f00b5692cd9 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -156,7 +156,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	20
-#define TIF_SWITCH_MM		22	/* deferred switch_mm */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index bdf2b8458ec..aa9b4ac3fdf 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -43,6 +43,7 @@ struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
 	struct vm_area_struct	*vma;
+	unsigned long		start, end;
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
@@ -107,10 +108,12 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = fullmm;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->vma = NULL;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 58b8b84adcd..983fa7c153a 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -26,11 +26,45 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask);
+
+#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) {				\
+	.min_interval		= 1,					\
+	.max_interval		= 4,					\
+	.busy_factor		= 64,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 2,					\
+	.idle_idx		= 1,					\
+	.newidle_idx		= 0,					\
+	.wake_idx		= 0,					\
+	.forkexec_idx		= 0,					\
+									\
+	.flags			= 0*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_BALANCE_WAKE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 0*SD_SERIALIZE			\
+				,					\
+	.last_balance		 = jiffies,				\
+	.balance_interval	= 1,					\
+}
+#endif
+#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */
 
 #else
 
 static inline void init_cpu_topology(void) { }
 static inline void store_cpu_topology(unsigned int cpuid) { }
+static inline int cluster_to_logical_mask(unsigned int socket_id,
+	cpumask_t *cluster_mask) { return -EINVAL; }
 
 #endif
 
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 47bcb2d254a..18d76fd5a2a 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -1,7 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += a.out.h
 header-y += byteorder.h
 header-y += fcntl.h
 header-y += hwcap.h
diff --git a/arch/arm/include/uapi/asm/a.out.h b/arch/arm/include/uapi/asm/a.out.h
deleted file mode 100644
index 083894b2e3b..00000000000
--- a/arch/arm/include/uapi/asm/a.out.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __ARM_A_OUT_H__
-#define __ARM_A_OUT_H__
-
-#include <linux/personality.h>
-#include <linux/types.h>
-
-struct exec
-{
-  __u32 a_info;		/* Use macros N_MAGIC, etc for access */
-  __u32 a_text;		/* length of text, in bytes */
-  __u32 a_data;		/* length of data, in bytes */
-  __u32 a_bss;		/* length of uninitialized data area for file, in bytes */
-  __u32 a_syms;		/* length of symbol table data in file, in bytes */
-  __u32 a_entry;	/* start address */
-  __u32 a_trsize;	/* length of relocation info for text, in bytes */
-  __u32 a_drsize;	/* length of relocation info for data, in bytes */
-};
-
-/*
- * This is always the same
- */
-#define N_TXTADDR(a)	(0x00008000)
-
-#define N_TRSIZE(a)	((a).a_trsize)
-#define N_DRSIZE(a)	((a).a_drsize)
-#define N_SYMSIZE(a)	((a).a_syms)
-
-#define M_ARM 103
-
-#ifndef LIBRARY_START_TEXT
-#define LIBRARY_START_TEXT	(0x00c00000)
-#endif
-
-#endif /* __A_OUT_GNU_H__ */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338eacad..dd9d90ab65d 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -82,6 +82,9 @@ obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
-obj-$(CONFIG_ARM_PSCI)		+= psci.o
+ifeq ($(CONFIG_ARM_PSCI),y)
+obj-y				+= psci.o
+obj-$(CONFIG_SMP)		+= psci_smp.o
+endif
 
 extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 582b405befc..32640ae7750 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -192,6 +192,7 @@ __dabt_svc:
 	svc_entry
 	mov	r2, sp
 	dabt_helper
+ THUMB(	ldr	r5, [sp, #S_PSR]	)	@ potentially updated CPSR
 	svc_exit r5				@ return from exception
  UNWIND(.fnend		)
 ENDPROC(__dabt_svc)
@@ -741,6 +742,18 @@ ENDPROC(__switch_to)
 #endif
 	.endm
 
+	.macro	kuser_pad, sym, size
+	.if	(. - \sym) & 3
+	.rept	4 - (. - \sym) & 3
+	.byte	0
+	.endr
+	.endif
+	.rept	(\size - (. - \sym)) / 4
+	.word	0xe7fddef1
+	.endr
+	.endm
+
+#ifdef CONFIG_KUSER_HELPERS
 	.align	5
 	.globl	__kuser_helper_start
 __kuser_helper_start:
@@ -831,18 +844,13 @@ kuser_cmpxchg64_fixup:
 #error "incoherent kernel configuration"
 #endif
 
-	/* pad to next slot */
-	.rept	(16 - (. - __kuser_cmpxchg64)/4)
-	.word	0
-	.endr
-
-	.align	5
+	kuser_pad __kuser_cmpxchg64, 64
 
 __kuser_memory_barrier:				@ 0xffff0fa0
 	smp_dmb	arm
 	usr_ret	lr
 
-	.align	5
+	kuser_pad __kuser_memory_barrier, 32
 
 __kuser_cmpxchg:				@ 0xffff0fc0
 
@@ -915,13 +923,14 @@ kuser_cmpxchg32_fixup:
 
 #endif
 
-	.align	5
+	kuser_pad __kuser_cmpxchg, 32
 
 __kuser_get_tls:				@ 0xffff0fe0
 	ldr	r0, [pc, #(16 - 8)]	@ read TLS, set in kuser_get_tls_init
 	usr_ret	lr
 	mrc	p15, 0, r0, c13, c0, 3	@ 0xffff0fe8 hardware TLS code
-	.rep	4
+	kuser_pad __kuser_get_tls, 16
+	.rep	3
 	.word	0			@ 0xffff0ff0 software TLS value, then
 	.endr				@ pad up to __kuser_helper_version
 
@@ -931,14 +940,16 @@ __kuser_helper_version:				@ 0xffff0ffc
 	.globl	__kuser_helper_end
 __kuser_helper_end:
 
+#endif
+
  THUMB(	.thumb	)
 
 /*
  * Vector stubs.
  *
- * This code is copied to 0xffff0200 so we can use branches in the
- * vectors, rather than ldr's.  Note that this code must not
- * exceed 0x300 bytes.
+ * This code is copied to 0xffff1000 so we can use branches in the
+ * vectors, rather than ldr's.  Note that this code must not exceed
+ * a page size.
  *
  * Common stub entry macro:
  *   Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
@@ -985,8 +996,17 @@ ENDPROC(vector_\name)
 1:
 	.endm
 
-	.globl	__stubs_start
+	.section .stubs, "ax", %progbits
 __stubs_start:
+	@ This must be the first word
+	.word	vector_swi
+
+vector_rst:
+ ARM(	swi	SYS_ERROR0	)
+ THUMB(	svc	#0		)
+ THUMB(	nop			)
+	b	vector_und
+
 /*
  * Interrupt dispatcher
  */
@@ -1081,6 +1101,16 @@ __stubs_start:
 	.align	5
 
 /*=============================================================================
+ * Address exception handler
+ *-----------------------------------------------------------------------------
+ * These aren't too critical.
+ * (they're not supposed to happen, and won't happen in 32-bit data mode).
+ */
+
+vector_addrexcptn:
+	b	vector_addrexcptn
+
+/*=============================================================================
  * Undefined FIQs
  *-----------------------------------------------------------------------------
  * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC
@@ -1093,45 +1123,19 @@ __stubs_start:
 vector_fiq:
 	subs	pc, lr, #4
 
-/*=============================================================================
- * Address exception handler
- *-----------------------------------------------------------------------------
- * These aren't too critical.
- * (they're not supposed to happen, and won't happen in 32-bit data mode).
- */
-
-vector_addrexcptn:
-	b	vector_addrexcptn
-
-/*
- * We group all the following data together to optimise
- * for CPUs with separate I & D caches.
- */
-	.align	5
-
-.LCvswi:
-	.word	vector_swi
-
-	.globl	__stubs_end
-__stubs_end:
-
-	.equ	stubs_offset, __vectors_start + 0x200 - __stubs_start
+	.globl	vector_fiq_offset
+	.equ	vector_fiq_offset, vector_fiq
 
-	.globl	__vectors_start
+	.section .vectors, "ax", %progbits
 __vectors_start:
- ARM(	swi	SYS_ERROR0	)
- THUMB(	svc	#0		)
- THUMB(	nop			)
-	W(b)	vector_und + stubs_offset
-	W(ldr)	pc, .LCvswi + stubs_offset
-	W(b)	vector_pabt + stubs_offset
-	W(b)	vector_dabt + stubs_offset
-	W(b)	vector_addrexcptn + stubs_offset
-	W(b)	vector_irq + stubs_offset
-	W(b)	vector_fiq + stubs_offset
-
-	.globl	__vectors_end
-__vectors_end:
+	W(b)	vector_rst
+	W(b)	vector_und
+	W(ldr)	pc, __vectors_start + 0x1000
+	W(b)	vector_pabt
+	W(b)	vector_dabt
+	W(b)	vector_addrexcptn
+	W(b)	vector_irq
+	W(b)	vector_fiq
 
 	.data
 
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index 2adda11f712..25442f45114 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -47,6 +47,11 @@
 #include <asm/irq.h>
 #include <asm/traps.h>
 
+#define FIQ_OFFSET ({					\
+		extern void *vector_fiq_offset;		\
+		(unsigned)&vector_fiq_offset;		\
+	})
+
 static unsigned long no_fiq_insn;
 
 /* Default reacquire function
@@ -80,13 +85,16 @@ int show_fiq_list(struct seq_file *p, int prec)
 void set_fiq_handler(void *start, unsigned int length)
 {
 #if defined(CONFIG_CPU_USE_DOMAINS)
-	memcpy((void *)0xffff001c, start, length);
+	void *base = (void *)0xffff0000;
 #else
-	memcpy(vectors_page + 0x1c, start, length);
+	void *base = vectors_page;
 #endif
-	flush_icache_range(0xffff001c, 0xffff001c + length);
+	unsigned offset = FIQ_OFFSET;
+
+	memcpy(base + offset, start, length);
+	flush_icache_range(0xffff0000 + offset, 0xffff0000 + offset + length);
 	if (!vectors_high())
-		flush_icache_range(0x1c, 0x1c + length);
+		flush_icache_range(offset, offset + length);
 }
 
 int claim_fiq(struct fiq_handler *f)
@@ -144,6 +152,7 @@ EXPORT_SYMBOL(disable_fiq);
 
 void __init init_FIQ(int start)
 {
-	no_fiq_insn = *(unsigned long *)0xffff001c;
+	unsigned offset = FIQ_OFFSET;
+	no_fiq_insn = *(unsigned long *)(0xffff0000 + offset);
 	fiq_start = start;
 }
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 8bac553fe21..2725c87fade 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -342,7 +342,6 @@ __turn_mmu_on_loc:
 	.long	__turn_mmu_on_end
 
 #if defined(CONFIG_SMP)
-	__CPUINIT
 ENTRY(secondary_startup)
 	/*
 	 * Common entry point for secondary CPUs.
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 1fd749ee4a1..1b803117ed9 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -1049,7 +1049,8 @@ static struct notifier_block dbg_cpu_pm_nb = {
 
 static void __init pm_init(void)
 {
-	cpu_pm_register_notifier(&dbg_cpu_pm_nb);
+	if (has_ossr)
+		cpu_pm_register_notifier(&dbg_cpu_pm_nb);
 }
 #else
 static inline void pm_init(void)
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 1315c4ccfa5..dbe21107945 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -153,6 +153,8 @@ THUMB(	orr	r7, #(1 << 30)	)	@ HSCTLR.TE
 	mrc	p15, 4, r7, c14, c1, 0	@ CNTHCTL
 	orr	r7, r7, #3		@ PL1PCEN | PL1PCTEN
 	mcr	p15, 4, r7, c14, c1, 0	@ CNTHCTL
+	mov	r7, #0
+	mcrr	p15, 4, r7, r7, c14	@ CNTVOFF
 1:
 #endif
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 8c3094d0f7b..3a9d627cab6 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -12,6 +12,7 @@
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
+#include <linux/cpumask.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -53,7 +54,12 @@ armpmu_map_cache_event(const unsigned (*cache_map)
 static int
 armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 {
-	int mapping = (*event_map)[config];
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -ENOENT;
+
+	mapping = (*event_map)[config];
 	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 }
 
@@ -81,6 +87,9 @@ armpmu_map_event(struct perf_event *event,
 		return armpmu_map_cache_event(cache_map, config);
 	case PERF_TYPE_RAW:
 		return armpmu_map_raw_event(raw_event_mask, config);
+	default:
+		if (event->attr.type >= PERF_TYPE_MAX)
+			return armpmu_map_raw_event(raw_event_mask, config);
 	}
 
 	return -ENOENT;
@@ -158,6 +167,8 @@ armpmu_stop(struct perf_event *event, int flags)
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
 	/*
 	 * ARM pmu always has to update the counter, so ignore
 	 * PERF_EF_UPDATE, see comments in armpmu_start().
@@ -174,6 +185,8 @@ static void armpmu_start(struct perf_event *event, int flags)
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
 	/*
 	 * ARM pmu always has to reprogram the period, so ignore
 	 * PERF_EF_RELOAD, see the comment below.
@@ -201,6 +214,9 @@ armpmu_del(struct perf_event *event, int flags)
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return;
+
 	armpmu_stop(event, PERF_EF_UPDATE);
 	hw_events->events[idx] = NULL;
 	clear_bit(idx, hw_events->used_mask);
@@ -217,6 +233,10 @@ armpmu_add(struct perf_event *event, int flags)
 	int idx;
 	int err = 0;
 
+	/* An event following a process won't be stopped earlier */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+		return 0;
+
 	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
@@ -253,6 +273,9 @@ validate_event(struct pmu_hw_events *hw_events,
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct pmu *leader_pmu = event->group_leader->pmu;
 
+	if (is_software_event(event))
+		return 1;
+
 	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
 		return 1;
 
@@ -416,6 +439,10 @@ static int armpmu_event_init(struct perf_event *event)
 	int err = 0;
 	atomic_t *active_events = &armpmu->active_events;
 
+	if (event->cpu != -1 &&
+		!cpumask_test_cpu(event->cpu, &armpmu->valid_cpus))
+		return -ENOENT;
+
 	/* does not support taken branch sampling */
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
@@ -569,6 +596,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 		return;
 	}
 
+	perf_callchain_store(entry, regs->ARM_pc);
 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
 	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 1f2740e3dbc..0b48a38e3cf 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -19,6 +19,7 @@
 #define pr_fmt(fmt) "CPU PMU: " fmt
 
 #include <linux/bitmap.h>
+#include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
@@ -31,33 +32,36 @@
 #include <asm/pmu.h>
 
 /* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
+static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);
 
 static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
 static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
 static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 
+static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
+
 /*
  * Despite the names, these two functions are CPU-specific and are used
  * by the OProfile/perf code.
  */
 const char *perf_pmu_name(void)
 {
-	if (!cpu_pmu)
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+	if (!pmu)
 		return NULL;
 
-	return cpu_pmu->name;
+	return pmu->name;
 }
 EXPORT_SYMBOL_GPL(perf_pmu_name);
 
 int perf_num_counters(void)
 {
-	int max_events = 0;
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
 
-	if (cpu_pmu != NULL)
-		max_events = cpu_pmu->num_events;
+	if (!pmu)
+		return 0;
 
-	return max_events;
+	return pmu->num_events;
 }
 EXPORT_SYMBOL_GPL(perf_num_counters);
 
@@ -75,11 +79,13 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 {
 	int i, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	int cpu = -1;
 
 	irqs = min(pmu_device->num_resources, num_possible_cpus());
 
 	for (i = 0; i < irqs; ++i) {
-		if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
+		cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
+		if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
 			continue;
 		irq = platform_get_irq(pmu_device, i);
 		if (irq >= 0)
@@ -91,6 +97,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 {
 	int i, err, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
+	int cpu = -1;
 
 	if (!pmu_device)
 		return -ENODEV;
@@ -103,6 +110,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 
 	for (i = 0; i < irqs; ++i) {
 		err = 0;
+		cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
 		irq = platform_get_irq(pmu_device, i);
 		if (irq < 0)
 			continue;
@@ -112,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 		 * assume that we're running on a uniprocessor machine and
 		 * continue. Otherwise, continue without this interrupt.
 		 */
-		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+		if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
 				    irq, i);
 			continue;
@@ -126,7 +134,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 			return err;
 		}
 
-		cpumask_set_cpu(i, &cpu_pmu->active_irqs);
+		cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
 	}
 
 	return 0;
@@ -135,7 +143,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int cpu;
-	for_each_possible_cpu(cpu) {
+	for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
 		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
 		events->events = per_cpu(hw_events, cpu);
 		events->used_mask = per_cpu(used_mask, cpu);
@@ -148,7 +156,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 
 	/* Ensure the PMU has sane values out of reset. */
 	if (cpu_pmu->reset)
-		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+		on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
 }
 
 /*
@@ -160,21 +168,46 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
 static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
 				    unsigned long action, void *hcpu)
 {
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
+
 	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
 		return NOTIFY_DONE;
 
-	if (cpu_pmu && cpu_pmu->reset)
-		cpu_pmu->reset(cpu_pmu);
+	if (pmu && pmu->reset)
+		pmu->reset(pmu);
 	else
 		return NOTIFY_DONE;
 
 	return NOTIFY_OK;
 }
 
+static int cpu_pmu_pm_notify(struct notifier_block *b,
+				    unsigned long action, void *hcpu)
+{
+	int cpu = smp_processor_id();
+	struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
+	struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
+
+	if (!pmu)
+		return NOTIFY_DONE;
+
+	if (action == CPU_PM_ENTER && pmu->save_regs) {
+		pmu->save_regs(pmu, pmuregs);
+	} else if (action == CPU_PM_EXIT && pmu->restore_regs) {
+		pmu->restore_regs(pmu, pmuregs);
+	}
+
+	return NOTIFY_OK;
+}
+
 static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
 	.notifier_call = cpu_pmu_notify,
 };
 
+static struct notifier_block __cpuinitdata cpu_pmu_pm_notifier = {
+	.notifier_call = cpu_pmu_pm_notify,
+};
+
 /*
  * PMU platform driver and devicetree bindings.
  */
@@ -246,6 +279,9 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 		}
 	}
 
+	/* assume PMU support all the CPUs in this case */
+	cpumask_setall(&pmu->valid_cpus);
+
 	put_cpu();
 	return ret;
 }
@@ -253,15 +289,10 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 static int cpu_pmu_device_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
-	int (*init_fn)(struct arm_pmu *);
 	struct device_node *node = pdev->dev.of_node;
 	struct arm_pmu *pmu;
-	int ret = -ENODEV;
-
-	if (cpu_pmu) {
-		pr_info("attempt to register multiple PMU devices!");
-		return -ENOSPC;
-	}
+	int ret = 0;
+	int cpu;
 
 	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
 	if (!pmu) {
@@ -270,8 +301,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 	}
 
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
-		init_fn = of_id->data;
-		ret = init_fn(pmu);
+		smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
+		struct device_node *ncluster;
+		int cluster = -1;
+		cpumask_t sibling_mask;
+
+		ncluster = of_parse_phandle(node, "cluster", 0);
+		if (ncluster) {
+			int len;
+			const u32 *hwid;
+			hwid = of_get_property(ncluster, "reg", &len);
+			if (hwid && len == 4)
+				cluster = be32_to_cpup(hwid);
+		}
+		/* set sibling mask to all cpu mask if socket is not specified */
+		if (cluster == -1 ||
+			cluster_to_logical_mask(cluster, &sibling_mask))
+			cpumask_setall(&sibling_mask);
+
+		smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
+
+		/* now set the valid_cpus after init */
+		cpumask_copy(&pmu->valid_cpus, &sibling_mask);
 	} else {
 		ret = probe_current_pmu(pmu);
 	}
@@ -281,10 +332,12 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
 		goto out_free;
 	}
 
-	cpu_pmu = pmu;
-	cpu_pmu->plat_device = pdev;
-	cpu_pmu_init(cpu_pmu);
-	ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
+	for_each_cpu_mask(cpu, pmu->valid_cpus)
+		per_cpu(cpu_pmu, cpu) = pmu;
+
+	pmu->plat_device = pdev;
+	cpu_pmu_init(pmu);
+	ret = armpmu_register(pmu, -1);
 
 	if (!ret)
 		return 0;
@@ -313,9 +366,17 @@ static int __init register_pmu_driver(void)
 	if (err)
 		return err;
 
+	err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier);
+	if (err) {
+		unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+		return err;
+	}
+
 	err = platform_driver_register(&cpu_pmu_driver);
-	if (err)
+	if (err) {
+		cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier);
 		unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+	}
 
 	return err;
 }
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 039cffb053a..654db5030c3 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -950,6 +950,51 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
 }
 #endif
 
+static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu,
+					struct cpupmu_regs *regs)
+{
+	unsigned int cnt;
+	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc));
+	if (!(regs->pmc & ARMV7_PMNC_E))
+		return;
+
+	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset));
+	asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren));
+	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset));
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0]));
+	for (cnt = ARMV7_IDX_COUNTER0;
+			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+		armv7_pmnc_select_counter(cnt);
+		asm volatile("mrc p15, 0, %0, c9, c13, 1"
+					: "=r"(regs->pmxevttype[cnt]));
+		asm volatile("mrc p15, 0, %0, c9, c13, 2"
+					: "=r"(regs->pmxevtcnt[cnt]));
+	}
+	return;
+}
+
+static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu,
+					struct cpupmu_regs *regs)
+{
+	unsigned int cnt;
+	if (!(regs->pmc & ARMV7_PMNC_E))
+		return;
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset));
+	asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren));
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset));
+	asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0]));
+	for (cnt = ARMV7_IDX_COUNTER0;
+			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+		armv7_pmnc_select_counter(cnt);
+		asm volatile("mcr p15, 0, %0, c9, c13, 1"
+					: : "r"(regs->pmxevttype[cnt]));
+		asm volatile("mcr p15, 0, %0, c9, c13, 2"
+					: : "r"(regs->pmxevtcnt[cnt]));
+	}
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc));
+}
+
 static void armv7pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags;
@@ -1223,6 +1268,8 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->start		= armv7pmu_start;
 	cpu_pmu->stop		= armv7pmu_stop;
 	cpu_pmu->reset		= armv7pmu_reset;
+	cpu_pmu->save_regs	= armv7pmu_save_regs;
+	cpu_pmu->restore_regs	= armv7pmu_restore_regs;
 	cpu_pmu->max_period	= (1LLU << 32) - 1;
 };
 
@@ -1240,7 +1287,7 @@ static u32 armv7_read_num_pmnc_events(void)
 static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A8";
+	cpu_pmu->name		= "ARMv7_Cortex_A8";
 	cpu_pmu->map_event	= armv7_a8_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	return 0;
@@ -1249,7 +1296,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A9";
+	cpu_pmu->name		= "ARMv7_Cortex_A9";
 	cpu_pmu->map_event	= armv7_a9_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	return 0;
@@ -1258,7 +1305,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A5";
+	cpu_pmu->name		= "ARMv7_Cortex_A5";
 	cpu_pmu->map_event	= armv7_a5_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	return 0;
@@ -1267,7 +1314,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A15";
+	cpu_pmu->name		= "ARMv7_Cortex_A15";
 	cpu_pmu->map_event	= armv7_a15_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@@ -1277,7 +1324,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	armv7pmu_init(cpu_pmu);
-	cpu_pmu->name		= "ARMv7 Cortex-A7";
+	cpu_pmu->name		= "ARMv7_Cortex_A7";
 	cpu_pmu->map_event	= armv7_a7_map_event;
 	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 6e8931ccf13..ac4c2e5e17e 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -408,6 +408,7 @@ EXPORT_SYMBOL(dump_fpu);
 unsigned long get_wchan(struct task_struct *p)
 {
 	struct stackframe frame;
+	unsigned long stack_page;
 	int count = 0;
 	if (!p || p == current || p->state == TASK_RUNNING)
 		return 0;
@@ -416,9 +417,11 @@ unsigned long get_wchan(struct task_struct *p)
 	frame.sp = thread_saved_sp(p);
 	frame.lr = 0;			/* recovered from the stack */
 	frame.pc = thread_saved_pc(p);
+	stack_page = (unsigned long)task_stack_page(p);
 	do {
-		int ret = unwind_frame(&frame);
-		if (ret < 0)
+		if (frame.sp < stack_page ||
+		    frame.sp >= stack_page + THREAD_SIZE ||
+		    unwind_frame(&frame) < 0)
 			return 0;
 		if (!in_sched_functions(frame.pc))
 			return frame.pc;
@@ -433,10 +436,11 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 }
 
 #ifdef CONFIG_MMU
+#ifdef CONFIG_KUSER_HELPERS
 /*
  * The vectors page is always readable from user space for the
- * atomic helpers and the signal restart code. Insert it into the
- * gate_vma so that it is visible through ptrace and /proc/<pid>/mem.
+ * atomic helpers. Insert it into the gate_vma so that it is visible
+ * through ptrace and /proc/<pid>/mem.
  */
 static struct vm_area_struct gate_vma = {
 	.vm_start	= 0xffff0000,
@@ -465,9 +469,48 @@ int in_gate_area_no_mm(unsigned long addr)
 {
 	return in_gate_area(NULL, addr);
 }
+#define is_gate_vma(vma)	((vma) == &gate_vma)
+#else
+#define is_gate_vma(vma)	0
+#endif
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
-	return (vma == &gate_vma) ? "[vectors]" : NULL;
+	return is_gate_vma(vma) ? "[vectors]" :
+		(vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ?
+		 "[sigpage]" : NULL;
+}
+
+static struct page *signal_page;
+extern struct page *get_signal_page(void);
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret;
+
+	if (!signal_page)
+		signal_page = get_signal_page();
+	if (!signal_page)
+		return -ENOMEM;
+
+	down_write(&mm->mmap_sem);
+	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+	if (IS_ERR_VALUE(addr)) {
+		ret = addr;
+		goto up_fail;
+	}
+
+	ret = install_special_mapping(mm, addr, PAGE_SIZE,
+		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+		&signal_page);
+
+	if (ret == 0)
+		mm->context.sigpage = addr;
+
+ up_fail:
+	up_write(&mm->mmap_sem);
+	return ret;
 }
 #endif
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
index 36531643cc2..0daf4f25228 100644
--- a/arch/arm/kernel/psci.c
+++ b/arch/arm/kernel/psci.c
@@ -17,6 +17,7 @@
 
 #include <linux/init.h>
 #include <linux/of.h>
+#include <linux/string.h>
 
 #include <asm/compiler.h>
 #include <asm/errno.h>
@@ -26,6 +27,11 @@
 
 struct psci_operations psci_ops;
 
+/* Type of psci support. Currently can only be enabled or disabled */
+#define PSCI_SUP_DISABLED		0
+#define PSCI_SUP_ENABLED		1
+
+static unsigned int psci;
 static int (*invoke_psci_fn)(u32, u32, u32, u32);
 
 enum psci_function {
@@ -42,6 +48,7 @@ static u32 psci_function_id[PSCI_FN_MAX];
 #define PSCI_RET_EOPNOTSUPP		-1
 #define PSCI_RET_EINVAL			-2
 #define PSCI_RET_EPERM			-3
+#define PSCI_RET_EALREADYON		-4
 
 static int psci_to_linux_errno(int errno)
 {
@@ -54,6 +61,8 @@ static int psci_to_linux_errno(int errno)
 		return -EINVAL;
 	case PSCI_RET_EPERM:
 		return -EPERM;
+	case PSCI_RET_EALREADYON:
+		return -EAGAIN;
 	};
 
 	return -EINVAL;
@@ -158,15 +167,18 @@ static const struct of_device_id psci_of_match[] __initconst = {
 	{},
 };
 
-static int __init psci_init(void)
+void __init psci_init(void)
 {
 	struct device_node *np;
 	const char *method;
 	u32 id;
 
+	if (psci == PSCI_SUP_DISABLED)
+		return;
+
 	np = of_find_matching_node(NULL, psci_of_match);
 	if (!np)
-		return 0;
+		return;
 
 	pr_info("probing function IDs from device-tree\n");
 
@@ -206,6 +218,35 @@ static int __init psci_init(void)
 
 out_put_node:
 	of_node_put(np);
-	return 0;
+	return;
+}
+
+int __init psci_probe(void)
+{
+	struct device_node *np;
+	int ret = -ENODEV;
+
+	if (psci == PSCI_SUP_ENABLED) {
+		np = of_find_matching_node(NULL, psci_of_match);
+		if (np)
+			ret = 0;
+	}
+
+	of_node_put(np);
+	return ret;
+}
+
+static int __init early_psci(char *val)
+{
+	int ret = 0;
+
+	if (strcmp(val, "enable") == 0)
+		psci = PSCI_SUP_ENABLED;
+	else if (strcmp(val, "disable") == 0)
+		psci = PSCI_SUP_DISABLED;
+	else
+		ret = -EINVAL;
+
+	return ret;
 }
-early_initcall(psci_init);
+early_param("psci", early_psci);
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
new file mode 100644
index 00000000000..23a11424c56
--- /dev/null
+++ b/arch/arm/kernel/psci_smp.c
@@ -0,0 +1,84 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/smp.h>
+#include <linux/of.h>
+
+#include <asm/psci.h>
+#include <asm/smp_plat.h>
+
+/*
+ * psci_smp assumes that the following is true about PSCI:
+ *
+ * cpu_suspend   Suspend the execution on a CPU
+ * @state        we don't currently describe affinity levels, so just pass 0.
+ * @entry_point  the first instruction to be executed on return
+ * returns 0  success, < 0 on failure
+ *
+ * cpu_off       Power down a CPU
+ * @state        we don't currently describe affinity levels, so just pass 0.
+ * no return on successful call
+ *
+ * cpu_on        Power up a CPU
+ * @cpuid        cpuid of target CPU, as from MPIDR
+ * @entry_point  the first instruction to be executed on return
+ * returns 0  success, < 0 on failure
+ *
+ * migrate       Migrate the context to a different CPU
+ * @cpuid        cpuid of target CPU, as from MPIDR
+ * returns 0  success, < 0 on failure
+ *
+ */
+
+extern void secondary_startup(void);
+
+static int __cpuinit psci_boot_secondary(unsigned int cpu,
+					 struct task_struct *idle)
+{
+	if (psci_ops.cpu_on)
+		return psci_ops.cpu_on(cpu_logical_map(cpu),
+				       __pa(secondary_startup));
+	return -ENODEV;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __ref psci_cpu_die(unsigned int cpu)
+{
+       const struct psci_power_state ps = {
+               .type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
+       };
+
+       if (psci_ops.cpu_off)
+               psci_ops.cpu_off(ps);
+
+       /* We should never return */
+       panic("psci: cpu %d failed to shutdown\n", cpu);
+}
+#else
+#define psci_cpu_die NULL
+#endif
+
+bool __init psci_smp_available(void)
+{
+	/* is cpu_on available at least? */
+	return (psci_ops.cpu_on != NULL);
+}
+
+struct smp_operations __initdata psci_smp_ops = {
+	.smp_boot_secondary	= psci_boot_secondary,
+	.cpu_die		= psci_cpu_die,
+};
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index b4b1d397592..6002fe06b12 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -37,6 +37,7 @@
 #include <asm/cputype.h>
 #include <asm/elf.h>
 #include <asm/procinfo.h>
+#include <asm/psci.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
@@ -261,6 +262,19 @@ static int cpu_has_aliasing_icache(unsigned int arch)
 	int aliasing_icache;
 	unsigned int id_reg, num_sets, line_size;
 
+#ifdef CONFIG_BIG_LITTLE
+	/*
+	 * We expect a combination of Cortex-A15 and Cortex-A7 cores.
+	 * A7 = VIPT aliasing I-cache
+	 * A15 = PIPT (non-aliasing) I-cache
+	 * To cater for this discrepancy, let's assume aliasing I-cache
+	 * all the time.  This means unneeded extra work on the A15 but
+	 * only ptrace is affected which is not performance critical.
+	 */
+	if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc0f0)
+		return 1;
+#endif
+
 	/* PIPT caches never alias. */
 	if (icache_is_pipt())
 		return 0;
@@ -796,9 +810,15 @@ void __init setup_arch(char **cmdline_p)
 	unflatten_device_tree();
 
 	arm_dt_init_cpu_maps();
+	psci_init();
 #ifdef CONFIG_SMP
 	if (is_smp()) {
-		smp_set_ops(mdesc->smp);
+		if (!mdesc->smp_init || !mdesc->smp_init()) {
+			if (psci_smp_available())
+				smp_set_ops(&psci_smp_ops);
+			else if (mdesc->smp)
+				smp_set_ops(mdesc->smp);
+		}
 		smp_init_cpus();
 	}
 #endif
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 296786bdbb7..5a42c12767a 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/errno.h>
+#include <linux/random.h>
 #include <linux/signal.h>
 #include <linux/personality.h>
 #include <linux/uaccess.h>
@@ -15,12 +16,11 @@
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
+#include <asm/traps.h>
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
 #include <asm/vfp.h>
 
-#include "signal.h"
-
 /*
  * For ARM syscalls, we encode the syscall number into the instruction.
  */
@@ -40,11 +40,13 @@
 #define SWI_THUMB_SIGRETURN	(0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE))
 #define SWI_THUMB_RT_SIGRETURN	(0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE))
 
-const unsigned long sigreturn_codes[7] = {
+static const unsigned long sigreturn_codes[7] = {
 	MOV_R7_NR_SIGRETURN,    SWI_SYS_SIGRETURN,    SWI_THUMB_SIGRETURN,
 	MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN,
 };
 
+static unsigned long signal_return_offset;
+
 #ifdef CONFIG_CRUNCH
 static int preserve_crunch_context(struct crunch_sigframe __user *frame)
 {
@@ -396,13 +398,19 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
 		    __put_user(sigreturn_codes[idx+1], rc+1))
 			return 1;
 
+#ifdef CONFIG_MMU
 		if (cpsr & MODE32_BIT) {
+			struct mm_struct *mm = current->mm;
 			/*
-			 * 32-bit code can use the new high-page
-			 * signal return code support.
+			 * 32-bit code can use the signal return page
+			 * except when the MPU has protected the vectors
+			 * page from PL0
 			 */
-			retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb;
-		} else {
+			retcode = mm->context.sigpage + signal_return_offset +
+				  (idx << 2) + thumb;
+		} else
+#endif
+		{
 			/*
 			 * Ensure that the instruction cache sees
 			 * the return code written onto the stack.
@@ -603,3 +611,33 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 	} while (thread_flags & _TIF_WORK_MASK);
 	return 0;
 }
+
+struct page *get_signal_page(void)
+{
+	unsigned long ptr;
+	unsigned offset;
+	struct page *page;
+	void *addr;
+
+	page = alloc_pages(GFP_KERNEL, 0);
+
+	if (!page)
+		return NULL;
+
+	addr = page_address(page);
+
+	/* Give the signal return code some randomness */
+	offset = 0x200 + (get_random_int() & 0x7fc);
+	signal_return_offset = offset;
+
+	/*
+	 * Copy signal return handlers into the vector page, and
+	 * set sigreturn to be a pointer to these.
+	 */
+	memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes));
+
+	ptr = (unsigned long)addr + offset;
+	flush_icache_range(ptr, ptr + sizeof(sigreturn_codes));
+
+	return page;
+}
diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h
deleted file mode 100644
index 5ff067b7c75..00000000000
--- a/arch/arm/kernel/signal.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- *  linux/arch/arm/kernel/signal.h
- *
- *  Copyright (C) 2005-2009 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define KERN_SIGRETURN_CODE	(CONFIG_VECTORS_BASE + 0x00000500)
-
-extern const unsigned long sigreturn_codes[7];
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 987dcf33415..b5c1e636ed8 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -4,6 +4,7 @@
 #include <asm/assembler.h>
 #include <asm/glue-cache.h>
 #include <asm/glue-proc.h>
+#include "entry-header.S"
 	.text
 
 /*
@@ -30,9 +31,8 @@ ENTRY(__cpu_suspend)
 	mov	r2, r5			@ virtual SP
 	ldr	r3, =sleep_save_sp
 #ifdef CONFIG_SMP
-	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
-	ALT_UP(mov lr, #0)
-	and	lr, lr, #15
+	get_thread_info	r5
+	ldr	lr, [r5, #TI_CPU] 	@ cpu logical index
 	add	r3, r3, lr, lsl #2
 #endif
 	bl	__cpu_suspend_save
@@ -82,10 +82,13 @@ ENDPROC(cpu_resume_after_mmu)
 	.align
 ENTRY(cpu_resume)
 #ifdef CONFIG_SMP
+	mov	r1, #0			@ fall-back logical index for UP
+	ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
+	ALT_UP_B(1f)
+	bic	r0, #0xff000000
+	bl	cpu_logical_index 	@ return logical index in r1
+1:
 	adr	r0, sleep_save_sp
-	ALT_SMP(mrc p15, 0, r1, c0, c0, 5)
-	ALT_UP(mov r1, #0)
-	and	r1, r1, #15
 	ldr	r0, [r0, r1, lsl #2]	@ stack phys addr
 #else
 	ldr	r0, sleep_save_sp	@ stack phys addr
@@ -102,3 +105,20 @@ sleep_save_sp:
 	.rept	CONFIG_NR_CPUS
 	.long	0				@ preserve stack phys ptr here
 	.endr
+
+#ifdef CONFIG_SMP
+cpu_logical_index:
+	adr	r3, cpu_map_ptr
+	ldr	r2, [r3]
+	add	r3, r3, r2		@ virt_to_phys(__cpu_logical_map)
+	mov	r1, #0
+1:
+	ldr	r2, [r3, r1, lsl #2]
+	cmp	r2, r0
+	moveq	pc, lr
+	add	r1, r1, #1
+	b	1b
+
+cpu_map_ptr:
+	.long __cpu_logical_map - .
+#endif
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5919eb451bb..dc2843f337a 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -46,6 +46,9 @@
 #include <asm/virt.h>
 #include <asm/mach/arch.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/arm-ipi.h>
+
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
  * so we need some other way of telling a new secondary core
@@ -57,7 +60,7 @@ struct secondary_data secondary_data;
  * control for which core is the next to come out of the secondary
  * boot "holding pen"
  */
-volatile int __cpuinitdata pen_release = -1;
+volatile int pen_release = -1;
 
 enum ipi_msg_type {
 	IPI_WAKEUP,
@@ -66,6 +69,7 @@ enum ipi_msg_type {
 	IPI_CALL_FUNC,
 	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP,
+	IPI_COMPLETION,
 };
 
 static DECLARE_COMPLETION(cpu_running);
@@ -463,6 +467,7 @@ static const char *ipi_types[NR_IPI] = {
 	S(IPI_CALL_FUNC, "Function call interrupts"),
 	S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
 	S(IPI_CPU_STOP, "CPU stop interrupts"),
+	S(IPI_COMPLETION, "completion interrupts"),
 };
 
 void show_ipi_list(struct seq_file *p, int prec)
@@ -588,6 +593,19 @@ static void ipi_cpu_stop(unsigned int cpu)
 		cpu_relax();
 }
 
+static DEFINE_PER_CPU(struct completion *, cpu_completion);
+
+int register_ipi_completion(struct completion *completion, int cpu)
+{
+	per_cpu(cpu_completion, cpu) = completion;
+	return IPI_COMPLETION;
+}
+
+static void ipi_complete(unsigned int cpu)
+{
+	complete(per_cpu(cpu_completion, cpu));
+}
+
 /*
  * Main handler for inter-processor interrupts
  */
@@ -604,6 +622,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 	if (ipinr < NR_IPI)
 		__inc_irq_stat(cpu, ipi_irqs[ipinr]);
 
+	trace_arm_ipi_entry(ipinr);
 	switch (ipinr) {
 	case IPI_WAKEUP:
 		break;
@@ -638,11 +657,18 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 		irq_exit();
 		break;
 
+	case IPI_COMPLETION:
+		irq_enter();
+		ipi_complete(cpu);
+		irq_exit();
+		break;
+
 	default:
 		printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
 		       cpu, ipinr);
 		break;
 	}
+	trace_arm_ipi_exit(ipinr);
 	set_irq_regs(old_regs);
 }
 
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 9a52a07aa40..a98b62dca2f 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -103,7 +103,7 @@ static void broadcast_tlb_a15_erratum(void)
 
 static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
 {
-	int cpu, this_cpu;
+	int this_cpu;
 	cpumask_t mask = { CPU_BITS_NONE };
 
 	if (!erratum_a15_798181())
@@ -111,21 +111,7 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
 
 	dummy_flush_tlb_a15_erratum();
 	this_cpu = get_cpu();
-	for_each_online_cpu(cpu) {
-		if (cpu == this_cpu)
-			continue;
-		/*
-		 * We only need to send an IPI if the other CPUs are running
-		 * the same ASID as the one being invalidated. There is no
-		 * need for locking around the active_asids check since the
-		 * switch_mm() function has at least one dmb() (as required by
-		 * this workaround) in case a context switch happens on
-		 * another CPU after the condition below.
-		 */
-		if (atomic64_read(&mm->context.id) ==
-		    atomic64_read(&per_cpu(active_asids, cpu)))
-			cpumask_set_cpu(cpu, &mask);
-	}
+	a15_erratum_get_cpumask(this_cpu, mm, &mask);
 	smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1);
 	put_cpu();
 }
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 90525d9d290..f6fd1d4398c 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -120,7 +120,7 @@ static int twd_rate_change(struct notifier_block *nb,
 	 * changing cpu.
 	 */
 	if (flags == POST_RATE_CHANGE)
-		smp_call_function(twd_update_frequency,
+		on_each_cpu(twd_update_frequency,
 				  (void *)&cnd->new_rate, 1);
 
 	return NOTIFY_OK;
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index 00f79e59985..af4e8c8a542 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -31,7 +31,7 @@ int notrace unwind_frame(struct stackframe *frame)
 	high = ALIGN(low, THREAD_SIZE);
 
 	/* check current frame pointer is within bounds */
-	if (fp < (low + 12) || fp + 4 >= high)
+	if (fp < low + 12 || fp > high - 4)
 		return -EINVAL;
 
 	/* restore the registers from the stack frame */
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index c5a59546a25..677da58d9e8 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 
 #include <asm/cputype.h>
+#include <asm/smp_plat.h>
 #include <asm/topology.h>
 
 /*
@@ -289,6 +290,140 @@ void store_cpu_topology(unsigned int cpuid)
 		cpu_topology[cpuid].socket_id, mpidr);
 }
 
+
+#ifdef CONFIG_SCHED_HMP
+
+static const char * const little_cores[] = {
+	"arm,cortex-a7",
+	NULL,
+};
+
+static bool is_little_cpu(struct device_node *cn)
+{
+	const char * const *lc;
+	for (lc = little_cores; *lc; lc++)
+		if (of_device_is_compatible(cn, *lc))
+			return true;
+	return false;
+}
+
+void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
+					struct cpumask *slow)
+{
+	struct device_node *cn = NULL;
+	int cpu;
+
+	cpumask_clear(fast);
+	cpumask_clear(slow);
+
+	/*
+	 * Use the config options if they are given. This helps testing
+	 * HMP scheduling on systems without a big.LITTLE architecture.
+	 */
+	if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
+		if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
+			WARN(1, "Failed to parse HMP fast cpu mask!\n");
+		if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
+			WARN(1, "Failed to parse HMP slow cpu mask!\n");
+		return;
+	}
+
+	/*
+	 * Else, parse device tree for little cores.
+	 */
+	while ((cn = of_find_node_by_type(cn, "cpu"))) {
+
+		const u32 *mpidr;
+		int len;
+
+		mpidr = of_get_property(cn, "reg", &len);
+		if (!mpidr || len != 4) {
+			pr_err("* %s missing reg property\n", cn->full_name);
+			continue;
+		}
+
+		cpu = get_logical_index(be32_to_cpup(mpidr));
+		if (cpu == -EINVAL) {
+			pr_err("couldn't get logical index for mpidr %x\n",
+							be32_to_cpup(mpidr));
+			break;
+		}
+
+		if (is_little_cpu(cn))
+			cpumask_set_cpu(cpu, slow);
+		else
+			cpumask_set_cpu(cpu, fast);
+	}
+
+	if (!cpumask_empty(fast) && !cpumask_empty(slow))
+		return;
+
+	/*
+	 * We didn't find both big and little cores so let's call all cores
+	 * fast as this will keep the system running, with all cores being
+	 * treated equal.
+	 */
+	cpumask_setall(fast);
+	cpumask_clear(slow);
+}
+
+struct cpumask hmp_slow_cpu_mask;
+
+void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
+{
+	struct cpumask hmp_fast_cpu_mask;
+	struct hmp_domain *domain;
+
+	arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask);
+
+	/*
+	 * Initialize hmp_domains
+	 * Must be ordered with respect to compute capacity.
+	 * Fastest domain at head of list.
+	 */
+	if(!cpumask_empty(&hmp_slow_cpu_mask)) {
+		domain = (struct hmp_domain *)
+			kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+		cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask);
+		cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+		list_add(&domain->hmp_domains, hmp_domains_list);
+	}
+	domain = (struct hmp_domain *)
+		kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+	cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask);
+	cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+	list_add(&domain->hmp_domains, hmp_domains_list);
+}
+#endif /* CONFIG_SCHED_HMP */
+
+
+/*
+ * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
+ * @socket_id:		cluster HW identifier
+ * @cluster_mask:	the cpumask location to be initialized, modified by the
+ *			function only if return value == 0
+ *
+ * Return:
+ *
+ * 0 on success
+ * -EINVAL if cluster_mask is NULL or there is no record matching socket_id
+ */
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
+{
+	int cpu;
+
+	if (!cluster_mask)
+		return -EINVAL;
+
+	for_each_online_cpu(cpu)
+		if (socket_id == topology_physical_package_id(cpu)) {
+			cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
+			return 0;
+		}
+
+	return -EINVAL;
+}
+
 /*
  * init_cpu_topology is called at boot when only one cpu is running
  * which prevent simultaneous write access to cpu_topology array
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 18b32e8e449..d6a0fdb6c2e 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -35,9 +35,13 @@
 #include <asm/tls.h>
 #include <asm/system_misc.h>
 
-#include "signal.h"
-
-static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
+static const char *handler[]= {
+	"prefetch abort",
+	"data abort",
+	"address exception",
+	"interrupt",
+	"undefined instruction",
+};
 
 void *vectors_page;
 
@@ -800,47 +804,55 @@ void __init trap_init(void)
 	return;
 }
 
-static void __init kuser_get_tls_init(unsigned long vectors)
+#ifdef CONFIG_KUSER_HELPERS
+static void __init kuser_init(void *vectors)
 {
+	extern char __kuser_helper_start[], __kuser_helper_end[];
+	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+
+	memcpy(vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
+
 	/*
 	 * vectors + 0xfe0 = __kuser_get_tls
 	 * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8
 	 */
 	if (tls_emu || has_tls_reg)
-		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
+		memcpy(vectors + 0xfe0, vectors + 0xfe8, 4);
 }
+#else
+static void __init kuser_init(void *vectors)
+{
+}
+#endif
 
 void __init early_trap_init(void *vectors_base)
 {
 	unsigned long vectors = (unsigned long)vectors_base;
 	extern char __stubs_start[], __stubs_end[];
 	extern char __vectors_start[], __vectors_end[];
-	extern char __kuser_helper_start[], __kuser_helper_end[];
-	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+	unsigned i;
 
 	vectors_page = vectors_base;
 
 	/*
+	 * Poison the vectors page with an undefined instruction.  This
+	 * instruction is chosen to be undefined for both ARM and Thumb
+	 * ISAs.  The Thumb version is an undefined instruction with a
+	 * branch back to the undefined instruction.
+	 */
+	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+		((u32 *)vectors_base)[i] = 0xe7fddef1;
+
+	/*
 	 * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
 	 * into the vector page, mapped at 0xffff0000, and ensure these
 	 * are visible to the instruction stream.
 	 */
 	memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
-	memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start);
-	memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
+	memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start);
 
-	/*
-	 * Do processor specific fixups for the kuser helpers
-	 */
-	kuser_get_tls_init(vectors);
-
-	/*
-	 * Copy signal return handlers into the vector page, and
-	 * set sigreturn to be a pointer to these.
-	 */
-	memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE),
-	       sigreturn_codes, sizeof(sigreturn_codes));
+	kuser_init(vectors_base);
 
-	flush_icache_range(vectors, vectors + PAGE_SIZE);
+	flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
 	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
 }
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index a871b8e00fc..33f2ea32f5a 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -152,6 +152,23 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	__init_begin = .;
 #endif
+	/*
+	 * The vectors and stubs are relocatable code, and the
+	 * only thing that matters is their relative offsets
+	 */
+	__vectors_start = .;
+	.vectors 0 : AT(__vectors_start) {
+		*(.vectors)
+	}
+	. = __vectors_start + SIZEOF(.vectors);
+	__vectors_end = .;
+
+	__stubs_start = .;
+	.stubs 0x1000 : AT(__stubs_start) {
+		*(.stubs)
+	}
+	. = __stubs_start + SIZEOF(.stubs);
+	__stubs_end = .;
 
 	INIT_TEXT_SECTION(8)
 	.exit.text : {
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 8eea97be1ed..db9cf692d4d 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -146,7 +146,11 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
 #define access_pmintenclr pm_fake
 
 /* Architected CP15 registers.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg cp15_regs[] = {
 	/* CSSELR: swapped by interrupt.S. */
@@ -154,8 +158,8 @@ static const struct coproc_reg cp15_regs[] = {
 			NULL, reset_unknown, c0_CSSELR },
 
 	/* TTBR0/TTBR1: swapped by interrupt.S. */
-	{ CRm( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
-	{ CRm( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
+	{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
+	{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
 
 	/* TTBCR: swapped by interrupt.S. */
 	{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
@@ -180,6 +184,10 @@ static const struct coproc_reg cp15_regs[] = {
 			NULL, reset_unknown, c6_DFAR },
 	{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
 			NULL, reset_unknown, c6_IFAR },
+
+	/* PAR swapped by interrupt.S */
+	{ CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
+
 	/*
 	 * DC{C,I,CI}SW operations:
 	 */
@@ -395,12 +403,13 @@ static bool index_to_params(u64 id, struct coproc_params *params)
 			      | KVM_REG_ARM_OPC1_MASK))
 			return false;
 		params->is_64bit = true;
-		params->CRm = ((id & KVM_REG_ARM_CRM_MASK)
+		/* CRm to CRn: see cp15_to_index for details */
+		params->CRn = ((id & KVM_REG_ARM_CRM_MASK)
 			       >> KVM_REG_ARM_CRM_SHIFT);
 		params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK)
 			       >> KVM_REG_ARM_OPC1_SHIFT);
 		params->Op2 = 0;
-		params->CRn = 0;
+		params->CRm = 0;
 		return true;
 	default:
 		return false;
@@ -894,7 +903,14 @@ static u64 cp15_to_index(const struct coproc_reg *reg)
 	if (reg->is_64) {
 		val |= KVM_REG_SIZE_U64;
 		val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
-		val |= (reg->CRm << KVM_REG_ARM_CRM_SHIFT);
+		/*
+		 * CRn always denotes the primary coproc. reg. nr. for the
+		 * in-kernel representation, but the user space API uses the
+		 * CRm for the encoding, because it is modelled after the
+		 * MRRC/MCRR instructions: see the ARM ARM rev. c page
+		 * B3-1445
+		 */
+		val |= (reg->CRn << KVM_REG_ARM_CRM_SHIFT);
 	} else {
 		val |= KVM_REG_SIZE_U32;
 		val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index b7301d3e479..0461d5c8d3d 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -135,6 +135,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 		return -1;
 	if (i1->CRn != i2->CRn)
 		return i1->CRn - i2->CRn;
+	if (i1->is_64 != i2->is_64)
+		return i2->is_64 - i1->is_64;
 	if (i1->CRm != i2->CRm)
 		return i1->CRm - i2->CRm;
 	if (i1->Op1 != i2->Op1)
@@ -145,6 +147,7 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 
 #define CRn(_x)		.CRn = _x
 #define CRm(_x) 	.CRm = _x
+#define CRm64(_x)       .CRn = _x, .CRm = 0
 #define Op1(_x) 	.Op1 = _x
 #define Op2(_x) 	.Op2 = _x
 #define is64		.is_64 = true
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index 685063a6d0c..cf93472b9dd 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -114,7 +114,11 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
 
 /*
  * A15-specific CP15 registers.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg a15_regs[] = {
 	/* MPIDR: we use VMPIDR for guest access. */
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index f7793df62f5..16cd4ba5d7f 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -49,6 +49,7 @@ __kvm_hyp_code_start:
 ENTRY(__kvm_tlb_flush_vmid_ipa)
 	push	{r2, r3}
 
+	dsb	ishst
 	add	r0, r0, #KVM_VTTBR
 	ldrd	r2, r3, [r0]
 	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
@@ -291,6 +292,7 @@ THUMB(	orr	r2, r2, #PSR_T_BIT	)
 	ldr	r2, =BSYM(panic)
 	msr	ELR_hyp, r2
 	ldr	r0, =\panic_str
+	clrex				@ Clear exclusive monitor
 	eret
 .endm
 
@@ -414,6 +416,10 @@ guest_trap:
 	mrcne	p15, 4, r2, c6, c0, 4	@ HPFAR
 	bne	3f
 
+	/* Preserve PAR */
+	mrrc	p15, 0, r0, r1, c7	@ PAR
+	push	{r0, r1}
+
 	/* Resolve IPA using the xFAR */
 	mcr	p15, 0, r2, c7, c8, 0	@ ATS1CPR
 	isb
@@ -424,13 +430,20 @@ guest_trap:
 	lsl	r2, r2, #4
 	orr	r2, r2, r1, lsl #24
 
+	/* Restore PAR */
+	pop	{r0, r1}
+	mcrr	p15, 0, r0, r1, c7	@ PAR
+
 3:	load_vcpu			@ Load VCPU pointer to r0
 	str	r2, [r0, #VCPU_HPFAR]
 
 1:	mov	r1, #ARM_EXCEPTION_HVC
 	b	__kvm_vcpu_return
 
-4:	pop	{r0, r1, r2}		@ Failed translation, return to guest
+4:	pop	{r0, r1}		@ Failed translation, return to guest
+	mcrr	p15, 0, r0, r1, c7	@ PAR
+	clrex
+	pop	{r0, r1, r2}
 	eret
 
 /*
@@ -456,6 +469,7 @@ switch_to_guest_vfp:
 
 	pop	{r3-r7}
 	pop	{r0-r2}
+	clrex
 	eret
 #endif
 
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 3c8f2f0b4c5..6f18695a09c 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -302,11 +302,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	.endif
 
 	mrc	p15, 0, r2, c14, c1, 0	@ CNTKCTL
+	mrrc	p15, 0, r4, r5, c7	@ PAR
 
 	.if \store_to_vcpu == 0
-	push	{r2}
+	push	{r2,r4-r5}
 	.else
 	str	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
+	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
+	strd	r4, r5, [r12]
 	.endif
 .endm
 
@@ -319,12 +322,15 @@ vcpu	.req	r0		@ vcpu pointer always in r0
  */
 .macro write_cp15_state read_from_vcpu
 	.if \read_from_vcpu == 0
-	pop	{r2}
+	pop	{r2,r4-r5}
 	.else
 	ldr	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
+	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
+	ldrd	r4, r5, [r12]
 	.endif
 
 	mcr	p15, 0, r2, c14, c1, 0	@ CNTKCTL
+	mcrr	p15, 0, r4, r5, c7	@ PAR
 
 	.if \read_from_vcpu == 0
 	pop	{r2-r12}
@@ -497,6 +503,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r5, vcpu, r4
 	strd	r2, r3, [r5]
 
+	@ Ensure host CNTVCT == CNTPCT
+	mov	r2, #0
+	mcrr	p15, 4, r2, r2, c14	@ CNTVOFF
+
 1:
 #endif
 	@ Allow physical timer/counter access for the host
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 84ba67b982c..e04613906f1 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -313,6 +313,17 @@ out:
 	return err;
 }
 
+static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
+{
+	if (!is_vmalloc_addr(kaddr)) {
+		BUG_ON(!virt_addr_valid(kaddr));
+		return __pa(kaddr);
+	} else {
+		return page_to_phys(vmalloc_to_page(kaddr)) +
+		       offset_in_page(kaddr);
+	}
+}
+
 /**
  * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
  * @from:	The virtual kernel start address of the range
@@ -324,16 +335,27 @@ out:
  */
 int create_hyp_mappings(void *from, void *to)
 {
-	unsigned long phys_addr = virt_to_phys(from);
+	phys_addr_t phys_addr;
+	unsigned long virt_addr;
 	unsigned long start = KERN_TO_HYP((unsigned long)from);
 	unsigned long end = KERN_TO_HYP((unsigned long)to);
 
-	/* Check for a valid kernel memory mapping */
-	if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
-		return -EINVAL;
+	start = start & PAGE_MASK;
+	end = PAGE_ALIGN(end);
 
-	return __create_hyp_mappings(hyp_pgd, start, end,
-				     __phys_to_pfn(phys_addr), PAGE_HYP);
+	for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
+		int err;
+
+		phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
+		err = __create_hyp_mappings(hyp_pgd, virt_addr,
+					    virt_addr + PAGE_SIZE,
+					    __phys_to_pfn(phys_addr),
+					    PAGE_HYP);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 /**
diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile
index 788562dccb4..c336efdf8f1 100644
--- a/arch/arm/mach-at91/Makefile
+++ b/arch/arm/mach-at91/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y		:= irq.o gpio.o setup.o
+obj-y		:= irq.o gpio.o setup.o sysirq_mask.o
 obj-m		:=
 obj-n		:=
 obj-		:=
diff --git a/arch/arm/mach-at91/at91sam9260.c b/arch/arm/mach-at91/at91sam9260.c
index a8ce24538da..cdb1fb695ed 100644
--- a/arch/arm/mach-at91/at91sam9260.c
+++ b/arch/arm/mach-at91/at91sam9260.c
@@ -351,6 +351,8 @@ static void __init at91sam9260_initialize(void)
 	at91_extern_irq = (1 << AT91SAM9260_ID_IRQ0) | (1 << AT91SAM9260_ID_IRQ1)
 			| (1 << AT91SAM9260_ID_IRQ2);
 
+	at91_sysirq_mask_rtt(AT91SAM9260_BASE_RTT);
+
 	/* Register GPIO subsystem */
 	at91_gpio_init(at91sam9260_gpio, 3);
 }
diff --git a/arch/arm/mach-at91/at91sam9261.c b/arch/arm/mach-at91/at91sam9261.c
index 25efb5ac30f..7c9d2ea5bb8 100644
--- a/arch/arm/mach-at91/at91sam9261.c
+++ b/arch/arm/mach-at91/at91sam9261.c
@@ -293,6 +293,8 @@ static void __init at91sam9261_initialize(void)
 	at91_extern_irq = (1 << AT91SAM9261_ID_IRQ0) | (1 << AT91SAM9261_ID_IRQ1)
 			| (1 << AT91SAM9261_ID_IRQ2);
 
+	at91_sysirq_mask_rtt(AT91SAM9261_BASE_RTT);
+
 	/* Register GPIO subsystem */
 	at91_gpio_init(at91sam9261_gpio, 3);
 }
diff --git a/arch/arm/mach-at91/at91sam9263.c b/arch/arm/mach-at91/at91sam9263.c
index f44ffd2105a..c6b2f477c71 100644
--- a/arch/arm/mach-at91/at91sam9263.c
+++ b/arch/arm/mach-at91/at91sam9263.c
@@ -329,6 +329,9 @@ static void __init at91sam9263_initialize(void)
 	arm_pm_restart = at91sam9_alt_restart;
 	at91_extern_irq = (1 << AT91SAM9263_ID_IRQ0) | (1 << AT91SAM9263_ID_IRQ1);
 
+	at91_sysirq_mask_rtt(AT91SAM9263_BASE_RTT0);
+	at91_sysirq_mask_rtt(AT91SAM9263_BASE_RTT1);
+
 	/* Register GPIO subsystem */
 	at91_gpio_init(at91sam9263_gpio, 5);
 }
diff --git a/arch/arm/mach-at91/at91sam9g45.c b/arch/arm/mach-at91/at91sam9g45.c
index 8b7fce06765..e381fa12501 100644
--- a/arch/arm/mach-at91/at91sam9g45.c
+++ b/arch/arm/mach-at91/at91sam9g45.c
@@ -376,6 +376,9 @@ static void __init at91sam9g45_initialize(void)
 	arm_pm_restart = at91sam9g45_restart;
 	at91_extern_irq = (1 << AT91SAM9G45_ID_IRQ0);
 
+	at91_sysirq_mask_rtc(AT91SAM9G45_BASE_RTC);
+	at91_sysirq_mask_rtt(AT91SAM9G45_BASE_RTT);
+
 	/* Register GPIO subsystem */
 	at91_gpio_init(at91sam9g45_gpio, 5);
 }
diff --git a/arch/arm/mach-at91/at91sam9n12.c b/arch/arm/mach-at91/at91sam9n12.c
index c7d670d1180..4d6001c355f 100644
--- a/arch/arm/mach-at91/at91sam9n12.c
+++ b/arch/arm/mach-at91/at91sam9n12.c
@@ -223,7 +223,13 @@ static void __init at91sam9n12_map_io(void)
 	at91_init_sram(0, AT91SAM9N12_SRAM_BASE, AT91SAM9N12_SRAM_SIZE);
 }
 
+static void __init at91sam9n12_initialize(void)
+{
+	at91_sysirq_mask_rtc(AT91SAM9N12_BASE_RTC);
+}
+
 AT91_SOC_START(at91sam9n12)
 	.map_io = at91sam9n12_map_io,
 	.register_clocks = at91sam9n12_register_clocks,
+	.init = at91sam9n12_initialize,
 AT91_SOC_END
diff --git a/arch/arm/mach-at91/at91sam9rl.c b/arch/arm/mach-at91/at91sam9rl.c
index f77fae5591b..5615d28a5b9 100644
--- a/arch/arm/mach-at91/at91sam9rl.c
+++ b/arch/arm/mach-at91/at91sam9rl.c
@@ -295,6 +295,9 @@ static void __init at91sam9rl_initialize(void)
 	arm_pm_restart = at91sam9_alt_restart;
 	at91_extern_irq = (1 << AT91SAM9RL_ID_IRQ0);
 
+	at91_sysirq_mask_rtc(AT91SAM9RL_BASE_RTC);
+	at91_sysirq_mask_rtt(AT91SAM9RL_BASE_RTT);
+
 	/* Register GPIO subsystem */
 	at91_gpio_init(at91sam9rl_gpio, 4);
 }
diff --git a/arch/arm/mach-at91/at91sam9x5.c b/arch/arm/mach-at91/at91sam9x5.c
index e631fec040c..7b4f848312e 100644
--- a/arch/arm/mach-at91/at91sam9x5.c
+++ b/arch/arm/mach-at91/at91sam9x5.c
@@ -318,6 +318,11 @@ static void __init at91sam9x5_map_io(void)
 	at91_init_sram(0, AT91SAM9X5_SRAM_BASE, AT91SAM9X5_SRAM_SIZE);
 }
 
+static void __init at91sam9x5_initialize(void)
+{
+	at91_sysirq_mask_rtc(AT91SAM9X5_BASE_RTC);
+}
+
 /* --------------------------------------------------------------------
  *  Interrupt initialization
  * -------------------------------------------------------------------- */
@@ -325,4 +330,5 @@ static void __init at91sam9x5_map_io(void)
 AT91_SOC_START(at91sam9x5)
 	.map_io = at91sam9x5_map_io,
 	.register_clocks = at91sam9x5_register_clocks,
+	.init = at91sam9x5_initialize,
 AT91_SOC_END
diff --git a/arch/arm/mach-at91/generic.h b/arch/arm/mach-at91/generic.h
index 78ab0654865..d949ab4e26f 100644
--- a/arch/arm/mach-at91/generic.h
+++ b/arch/arm/mach-at91/generic.h
@@ -33,6 +33,8 @@ extern int  __init at91_aic_of_init(struct device_node *node,
 				    struct device_node *parent);
 extern int  __init at91_aic5_of_init(struct device_node *node,
 				    struct device_node *parent);
+extern void __init at91_sysirq_mask_rtc(u32 rtc_base);
+extern void __init at91_sysirq_mask_rtt(u32 rtt_base);
 
 
  /* Timer */
diff --git a/arch/arm/mach-at91/include/mach/at91sam9n12.h b/arch/arm/mach-at91/include/mach/at91sam9n12.h
index d374b87c045..0151bcf6163 100644
--- a/arch/arm/mach-at91/include/mach/at91sam9n12.h
+++ b/arch/arm/mach-at91/include/mach/at91sam9n12.h
@@ -49,6 +49,11 @@
 #define AT91SAM9N12_BASE_USART3	0xf8028000
 
 /*
+ * System Peripherals
+ */
+#define AT91SAM9N12_BASE_RTC	0xfffffeb0
+
+/*
  * Internal Memory.
  */
 #define AT91SAM9N12_SRAM_BASE	0x00300000	/* Internal SRAM base address */
diff --git a/arch/arm/mach-at91/include/mach/at91sam9x5.h b/arch/arm/mach-at91/include/mach/at91sam9x5.h
index c75ee19b58d..2fc76c49e97 100644
--- a/arch/arm/mach-at91/include/mach/at91sam9x5.h
+++ b/arch/arm/mach-at91/include/mach/at91sam9x5.h
@@ -55,6 +55,11 @@
 #define AT91SAM9X5_BASE_USART2	0xf8024000
 
 /*
+ * System Peripherals
+ */
+#define AT91SAM9X5_BASE_RTC	0xfffffeb0
+
+/*
  * Internal Memory.
  */
 #define AT91SAM9X5_SRAM_BASE	0x00300000	/* Internal SRAM base address */
diff --git a/arch/arm/mach-at91/include/mach/sama5d3.h b/arch/arm/mach-at91/include/mach/sama5d3.h
index 6dc81ee3804..3abbc428687 100644
--- a/arch/arm/mach-at91/include/mach/sama5d3.h
+++ b/arch/arm/mach-at91/include/mach/sama5d3.h
@@ -65,6 +65,11 @@
 #define SAMA5D3_ID_IRQ0		47	/* Advanced Interrupt Controller (IRQ0) */
 
 /*
+ * System Peripherals
+ */
+#define SAMA5D3_BASE_RTC	0xfffffeb0
+
+/*
  * Internal Memory
  */
 #define SAMA5D3_SRAM_BASE	0x00300000	/* Internal SRAM base address */
diff --git a/arch/arm/mach-at91/sama5d3.c b/arch/arm/mach-at91/sama5d3.c
index 401279715ab..a28873fe304 100644
--- a/arch/arm/mach-at91/sama5d3.c
+++ b/arch/arm/mach-at91/sama5d3.c
@@ -95,19 +95,19 @@ static struct clk twi0_clk = {
 	.name		= "twi0_clk",
 	.pid		= SAMA5D3_ID_TWI0,
 	.type		= CLK_TYPE_PERIPHERAL,
-	.div		= AT91_PMC_PCR_DIV2,
+	.div		= AT91_PMC_PCR_DIV8,
 };
 static struct clk twi1_clk = {
 	.name		= "twi1_clk",
 	.pid		= SAMA5D3_ID_TWI1,
 	.type		= CLK_TYPE_PERIPHERAL,
-	.div		= AT91_PMC_PCR_DIV2,
+	.div		= AT91_PMC_PCR_DIV8,
 };
 static struct clk twi2_clk = {
 	.name		= "twi2_clk",
 	.pid		= SAMA5D3_ID_TWI2,
 	.type		= CLK_TYPE_PERIPHERAL,
-	.div		= AT91_PMC_PCR_DIV2,
+	.div		= AT91_PMC_PCR_DIV8,
 };
 static struct clk mmc0_clk = {
 	.name		= "mci0_clk",
@@ -371,7 +371,13 @@ static void __init sama5d3_map_io(void)
 	at91_init_sram(0, SAMA5D3_SRAM_BASE, SAMA5D3_SRAM_SIZE);
 }
 
+static void __init sama5d3_initialize(void)
+{
+	at91_sysirq_mask_rtc(SAMA5D3_BASE_RTC);
+}
+
 AT91_SOC_START(sama5d3)
 	.map_io = sama5d3_map_io,
 	.register_clocks = sama5d3_register_clocks,
+	.init = sama5d3_initialize,
 AT91_SOC_END
diff --git a/arch/arm/mach-at91/sysirq_mask.c b/arch/arm/mach-at91/sysirq_mask.c
new file mode 100644
index 00000000000..2ba694f9626
--- /dev/null
+++ b/arch/arm/mach-at91/sysirq_mask.c
@@ -0,0 +1,71 @@
+/*
+ * sysirq_mask.c - System-interrupt masking
+ *
+ * Copyright (C) 2013 Johan Hovold <jhovold@gmail.com>
+ *
+ * Functions to disable system interrupts from backup-powered peripherals.
+ *
+ * The RTC and RTT-peripherals are generally powered by backup power (VDDBU)
+ * and are not reset on wake-up, user, watchdog or software reset. This means
+ * that their interrupts may be enabled during early boot (e.g. after a user
+ * reset).
+ *
+ * As the RTC and RTT share the system-interrupt line with the PIT, an
+ * interrupt occurring before a handler has been installed would lead to the
+ * system interrupt being disabled and prevent the system from booting.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/io.h>
+#include <mach/at91_rtt.h>
+
+#include "generic.h"
+
+#define AT91_RTC_IDR	0x24	/* Interrupt Disable Register */
+#define AT91_RTC_IMR	0x28	/* Interrupt Mask Register */
+
+void __init at91_sysirq_mask_rtc(u32 rtc_base)
+{
+	void __iomem *base;
+	u32 mask;
+
+	base = ioremap(rtc_base, 64);
+	if (!base)
+		return;
+
+	mask = readl_relaxed(base + AT91_RTC_IMR);
+	if (mask) {
+		pr_info("AT91: Disabling rtc irq\n");
+		writel_relaxed(mask, base + AT91_RTC_IDR);
+		(void)readl_relaxed(base + AT91_RTC_IMR);	/* flush */
+	}
+
+	iounmap(base);
+}
+
+void __init at91_sysirq_mask_rtt(u32 rtt_base)
+{
+	void __iomem *base;
+	void __iomem *reg;
+	u32 mode;
+
+	base = ioremap(rtt_base, 16);
+	if (!base)
+		return;
+
+	reg = base + AT91_RTT_MR;
+
+	mode = readl_relaxed(reg);
+	if (mode & (AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN)) {
+		pr_info("AT91: Disabling rtt irq\n");
+		mode &= ~(AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN);
+		writel_relaxed(mode, reg);
+		(void)readl_relaxed(reg);			/* flush */
+	}
+
+	iounmap(base);
+}
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index dff4ddc5ef8..139e42da25f 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -75,6 +75,7 @@ static struct davinci_nand_pdata davinci_nand_data = {
 	.parts			= davinci_nand_partitions,
 	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
 	.ecc_mode		= NAND_ECC_HW_SYNDROME,
+	.ecc_bits		= 4,
 	.bbt_options		= NAND_BBT_USE_FLASH,
 };
 
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index a33686a6fbb..fa4bfaf952d 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -153,6 +153,7 @@ static struct davinci_nand_pdata davinci_evm_nandflash_data = {
 	.parts		= davinci_evm_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(davinci_evm_nandflash_partition),
 	.ecc_mode	= NAND_ECC_HW,
+	.ecc_bits	= 1,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 	.timing		= &davinci_evm_nandflash_timing,
 };
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index fbb8e5ab1dc..0c005e876ca 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -90,6 +90,7 @@ static struct davinci_nand_pdata davinci_nand_data = {
 	.parts			= davinci_nand_partitions,
 	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
 	.ecc_mode		= NAND_ECC_HW,
+	.ecc_bits		= 1,
 	.options		= 0,
 };
 
diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index 2bc112adf56..808233b60e3 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -88,6 +88,7 @@ static struct davinci_nand_pdata davinci_ntosd2_nandflash_data = {
 	.parts		= davinci_ntosd2_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(davinci_ntosd2_nandflash_partition),
 	.ecc_mode	= NAND_ECC_HW,
+	.ecc_bits	= 1,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 };
 
diff --git a/arch/arm/mach-exynos/mach-exynos5-dt.c b/arch/arm/mach-exynos/mach-exynos5-dt.c
index 753b94f3fca..d88234e14f9 100644
--- a/arch/arm/mach-exynos/mach-exynos5-dt.c
+++ b/arch/arm/mach-exynos/mach-exynos5-dt.c
@@ -14,6 +14,7 @@
 #include <linux/memblock.h>
 #include <linux/io.h>
 #include <linux/clocksource.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/mach/arch.h>
 #include <mach/regs-pmu.h>
@@ -23,11 +24,31 @@
 
 #include "common.h"
 
+static u64 dma_mask64 = DMA_BIT_MASK(64);
+
 static void __init exynos5_dt_map_io(void)
 {
 	exynos_init_io(NULL, 0);
 }
 
+static int exynos5250_platform_notifier(struct notifier_block *nb,
+				  unsigned long event, void *__dev)
+{
+	struct device *dev = __dev;
+
+	if (event != BUS_NOTIFY_ADD_DEVICE)
+		return NOTIFY_DONE;
+
+	dev->dma_mask = &dma_mask64;
+	dev->coherent_dma_mask = DMA_BIT_MASK(64);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block exynos5250_platform_nb = {
+	.notifier_call = exynos5250_platform_notifier,
+};
+
 static void __init exynos5_dt_machine_init(void)
 {
 	struct device_node *i2c_np;
@@ -52,6 +73,11 @@ static void __init exynos5_dt_machine_init(void)
 		}
 	}
 
+	if (config_enabled(CONFIG_ARM_LPAE) &&
+			of_machine_is_compatible("samsung,exynos5250"))
+		bus_register_notifier(&platform_bus_type,
+				&exynos5250_platform_nb);
+
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c
index a42b369bc43..95d0b6e6438 100644
--- a/arch/arm/mach-footbridge/common.c
+++ b/arch/arm/mach-footbridge/common.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/spinlock.h>
+#include <video/vga.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -196,6 +197,8 @@ void __init footbridge_map_io(void)
 		iotable_init(ebsa285_host_io_desc, ARRAY_SIZE(ebsa285_host_io_desc));
 		pci_map_io_early(__phys_to_pfn(DC21285_PCI_IO));
 	}
+
+	vga_base = PCIMEM_BASE;
 }
 
 void footbridge_restart(char mode, const char *cmd)
diff --git a/arch/arm/mach-footbridge/dc21285-timer.c b/arch/arm/mach-footbridge/dc21285-timer.c
index 9ee78f7b499..782f6c71fa0 100644
--- a/arch/arm/mach-footbridge/dc21285-timer.c
+++ b/arch/arm/mach-footbridge/dc21285-timer.c
@@ -96,11 +96,12 @@ static struct irqaction footbridge_timer_irq = {
 void __init footbridge_timer_init(void)
 {
 	struct clock_event_device *ce = &ckevt_dc21285;
+	unsigned rate = DIV_ROUND_CLOSEST(mem_fclk_21285, 16);
 
-	clocksource_register_hz(&cksrc_dc21285, (mem_fclk_21285 + 8) / 16);
+	clocksource_register_hz(&cksrc_dc21285, rate);
 
 	setup_irq(ce->irq, &footbridge_timer_irq);
 
 	ce->cpumask = cpumask_of(smp_processor_id());
-	clockevents_config_and_register(ce, mem_fclk_21285, 0x4, 0xffffff);
+	clockevents_config_and_register(ce, rate, 0x4, 0xffffff);
 }
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index a7cd2cf5e08..7c2fdae9a38 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -18,7 +18,6 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/spinlock.h>
-#include <video/vga.h>
 
 #include <asm/irq.h>
 #include <asm/mach/pci.h>
@@ -276,8 +275,6 @@ int __init dc21285_setup(int nr, struct pci_sys_data *sys)
 
 	sys->mem_offset  = DC21285_PCI_MEM;
 
-	pci_ioremap_io(0, DC21285_PCI_IO);
-
 	pci_add_resource_offset(&sys->resources, &res[0], sys->mem_offset);
 	pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset);
 
@@ -293,7 +290,6 @@ void __init dc21285_preinit(void)
 	int cfn_mode;
 
 	pcibios_min_mem = 0x81000000;
-	vga_base = PCIMEM_BASE;
 
 	mem_size = (unsigned int)high_memory - PAGE_OFFSET;
 	for (mem_mask = 0x00100000; mem_mask < 0x10000000; mem_mask <<= 1)
diff --git a/arch/arm/mach-footbridge/ebsa285.c b/arch/arm/mach-footbridge/ebsa285.c
index b08243500e2..1a7235fb52a 100644
--- a/arch/arm/mach-footbridge/ebsa285.c
+++ b/arch/arm/mach-footbridge/ebsa285.c
@@ -30,21 +30,24 @@ static const struct {
 	const char *name;
 	const char *trigger;
 } ebsa285_leds[] = {
-	{ "ebsa285:amber", "heartbeat", },
-	{ "ebsa285:green", "cpu0", },
+	{ "ebsa285:amber", "cpu0", },
+	{ "ebsa285:green", "heartbeat", },
 	{ "ebsa285:red",},
 };
 
+static unsigned char hw_led_state;
+
 static void ebsa285_led_set(struct led_classdev *cdev,
 		enum led_brightness b)
 {
 	struct ebsa285_led *led = container_of(cdev,
 			struct ebsa285_led, cdev);
 
-	if (b != LED_OFF)
-		*XBUS_LEDS |= led->mask;
+	if (b == LED_OFF)
+		hw_led_state |= led->mask;
 	else
-		*XBUS_LEDS &= ~led->mask;
+		hw_led_state &= ~led->mask;
+	*XBUS_LEDS = hw_led_state;
 }
 
 static enum led_brightness ebsa285_led_get(struct led_classdev *cdev)
@@ -52,18 +55,19 @@ static enum led_brightness ebsa285_led_get(struct led_classdev *cdev)
 	struct ebsa285_led *led = container_of(cdev,
 			struct ebsa285_led, cdev);
 
-	return (*XBUS_LEDS & led->mask) ? LED_FULL : LED_OFF;
+	return hw_led_state & led->mask ? LED_OFF : LED_FULL;
 }
 
 static int __init ebsa285_leds_init(void)
 {
 	int i;
 
-	if (machine_is_ebsa285())
+	if (!machine_is_ebsa285())
 		return -ENODEV;
 
-	/* 3 LEDS All ON */
-	*XBUS_LEDS |= XBUS_LED_AMBER | XBUS_LED_GREEN | XBUS_LED_RED;
+	/* 3 LEDS all off */
+	hw_led_state = XBUS_LED_AMBER | XBUS_LED_GREEN | XBUS_LED_RED;
+	*XBUS_LEDS = hw_led_state;
 
 	for (i = 0; i < ARRAY_SIZE(ebsa285_leds); i++) {
 		struct ebsa285_led *led;
diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c
index 4e3148ce852..0b9e437719b 100644
--- a/arch/arm/mach-imx/clk-imx6q.c
+++ b/arch/arm/mach-imx/clk-imx6q.c
@@ -424,7 +424,7 @@ int __init mx6q_clocks_init(void)
 	clk[asrc_podf]        = imx_clk_divider("asrc_podf",        "asrc_pred",         base + 0x30, 9,  3);
 	clk[spdif_pred]       = imx_clk_divider("spdif_pred",       "spdif_sel",         base + 0x30, 25, 3);
 	clk[spdif_podf]       = imx_clk_divider("spdif_podf",       "spdif_pred",        base + 0x30, 22, 3);
-	clk[can_root]         = imx_clk_divider("can_root",         "pll3_usb_otg",      base + 0x20, 2,  6);
+	clk[can_root]         = imx_clk_divider("can_root",         "pll3_60m",          base + 0x20, 2,  6);
 	clk[ecspi_root]       = imx_clk_divider("ecspi_root",       "pll3_60m",          base + 0x38, 19, 6);
 	clk[gpu2d_core_podf]  = imx_clk_divider("gpu2d_core_podf",  "gpu2d_core_sel",    base + 0x18, 23, 3);
 	clk[gpu3d_core_podf]  = imx_clk_divider("gpu3d_core_podf",  "gpu3d_core_sel",    base + 0x18, 26, 3);
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index 8c60fcb08a9..2f4c92486cf 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -199,7 +199,8 @@ static struct mmci_platform_data mmc_data = {
 static void cp_clcd_enable(struct clcd_fb *fb)
 {
 	struct fb_var_screeninfo *var = &fb->fb.var;
-	u32 val = CM_CTRL_STATIC1 | CM_CTRL_STATIC2;
+	u32 val = CM_CTRL_STATIC1 | CM_CTRL_STATIC2
+			| CM_CTRL_LCDEN0 | CM_CTRL_LCDEN1;
 
 	if (var->bits_per_pixel <= 8 ||
 	    (var->bits_per_pixel == 16 && var->green.length == 5))
diff --git a/arch/arm/mach-mxs/pm.h b/arch/arm/mach-mxs/pm.h
index f57e7cdece2..09d77b00a96 100644
--- a/arch/arm/mach-mxs/pm.h
+++ b/arch/arm/mach-mxs/pm.h
@@ -9,6 +9,10 @@
 #ifndef __ARCH_MXS_PM_H
 #define __ARCH_MXS_PM_H
 
+#ifdef CONFIG_PM
 void mxs_pm_init(void);
+#else
+#define mxs_pm_init NULL
+#endif
 
 #endif
diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c
index 3926f370448..e022a869bff 100644
--- a/arch/arm/mach-omap2/irq.c
+++ b/arch/arm/mach-omap2/irq.c
@@ -233,7 +233,7 @@ static inline void omap_intc_handle_irq(void __iomem *base_addr, struct pt_regs
 			goto out;
 
 		irqnr = readl_relaxed(base_addr + 0xd8);
-#ifdef CONFIG_SOC_TI81XX
+#if IS_ENABLED(CONFIG_SOC_TI81XX) || IS_ENABLED(CONFIG_SOC_AM33XX)
 		if (irqnr)
 			goto out;
 		irqnr = readl_relaxed(base_addr + 0xf8);
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 7341eff63f5..44c609a1ec5 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -399,7 +399,7 @@ static int _set_clockactivity(struct omap_hwmod *oh, u8 clockact, u32 *v)
 }
 
 /**
- * _set_softreset: set OCP_SYSCONFIG.CLOCKACTIVITY bits in @v
+ * _set_softreset: set OCP_SYSCONFIG.SOFTRESET bit in @v
  * @oh: struct omap_hwmod *
  * @v: pointer to register contents to modify
  *
@@ -427,6 +427,36 @@ static int _set_softreset(struct omap_hwmod *oh, u32 *v)
 }
 
 /**
+ * _clear_softreset: clear OCP_SYSCONFIG.SOFTRESET bit in @v
+ * @oh: struct omap_hwmod *
+ * @v: pointer to register contents to modify
+ *
+ * Clear the SOFTRESET bit in @v for hwmod @oh.  Returns -EINVAL upon
+ * error or 0 upon success.
+ */
+static int _clear_softreset(struct omap_hwmod *oh, u32 *v)
+{
+	u32 softrst_mask;
+
+	if (!oh->class->sysc ||
+	    !(oh->class->sysc->sysc_flags & SYSC_HAS_SOFTRESET))
+		return -EINVAL;
+
+	if (!oh->class->sysc->sysc_fields) {
+		WARN(1,
+		     "omap_hwmod: %s: sysc_fields absent for sysconfig class\n",
+		     oh->name);
+		return -EINVAL;
+	}
+
+	softrst_mask = (0x1 << oh->class->sysc->sysc_fields->srst_shift);
+
+	*v &= ~softrst_mask;
+
+	return 0;
+}
+
+/**
  * _wait_softreset_complete - wait for an OCP softreset to complete
  * @oh: struct omap_hwmod * to wait on
  *
@@ -1909,6 +1939,12 @@ static int _ocp_softreset(struct omap_hwmod *oh)
 	ret = _set_softreset(oh, &v);
 	if (ret)
 		goto dis_opt_clks;
+
+	_write_sysconfig(v, oh);
+	ret = _clear_softreset(oh, &v);
+	if (ret)
+		goto dis_opt_clks;
+
 	_write_sysconfig(v, oh);
 
 	if (oh->class->sysc->srst_udelay)
@@ -3148,6 +3184,11 @@ int omap_hwmod_softreset(struct omap_hwmod *oh)
 		goto error;
 	_write_sysconfig(v, oh);
 
+	ret = _clear_softreset(oh, &v);
+	if (ret)
+		goto error;
+	_write_sysconfig(v, oh);
+
 error:
 	return ret;
 }
diff --git a/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
index d05fc7b5456..83735b72895 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
@@ -796,7 +796,7 @@ struct omap_hwmod omap2xxx_counter_32k_hwmod = {
 
 /* gpmc */
 static struct omap_hwmod_irq_info omap2xxx_gpmc_irqs[] = {
-	{ .irq = 20 },
+	{ .irq = 20 + OMAP_INTC_START, },
 	{ .irq = -1 }
 };
 
@@ -841,7 +841,7 @@ static struct omap_hwmod_class omap2_rng_hwmod_class = {
 };
 
 static struct omap_hwmod_irq_info omap2_rng_mpu_irqs[] = {
-	{ .irq = 52 },
+	{ .irq = 52 + OMAP_INTC_START, },
 	{ .irq = -1 }
 };
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 31c7126eb3b..9f6238c9dfc 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -1930,7 +1930,8 @@ static struct omap_hwmod_class_sysconfig omap3xxx_usb_host_hs_sysc = {
 	.syss_offs	= 0x0014,
 	.sysc_flags	= (SYSC_HAS_MIDLEMODE | SYSC_HAS_CLOCKACTIVITY |
 			   SYSC_HAS_SIDLEMODE | SYSC_HAS_ENAWAKEUP |
-			   SYSC_HAS_SOFTRESET | SYSC_HAS_AUTOIDLE),
+			   SYSC_HAS_SOFTRESET | SYSC_HAS_AUTOIDLE |
+			   SYSS_HAS_RESET_STATUS),
 	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
 			   MSTANDBY_FORCE | MSTANDBY_NO | MSTANDBY_SMART),
 	.sysc_fields	= &omap_hwmod_sysc_type1,
@@ -2008,15 +2009,7 @@ static struct omap_hwmod omap3xxx_usb_host_hs_hwmod = {
 	 * hence HWMOD_SWSUP_MSTANDBY
 	 */
 
-	/*
-	 * During system boot; If the hwmod framework resets the module
-	 * the module will have smart idle settings; which can lead to deadlock
-	 * (above Errata Id:i660); so, dont reset the module during boot;
-	 * Use HWMOD_INIT_NO_RESET.
-	 */
-
-	.flags		= HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY |
-			  HWMOD_INIT_NO_RESET,
+	.flags		= HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY,
 };
 
 /*
@@ -2159,7 +2152,7 @@ static struct omap_hwmod_class omap3xxx_gpmc_hwmod_class = {
 };
 
 static struct omap_hwmod_irq_info omap3xxx_gpmc_irqs[] = {
-	{ .irq = 20 },
+	{ .irq = 20 + OMAP_INTC_START, },
 	{ .irq = -1 }
 };
 
@@ -2993,7 +2986,7 @@ static struct omap_mmu_dev_attr mmu_isp_dev_attr = {
 
 static struct omap_hwmod omap3xxx_mmu_isp_hwmod;
 static struct omap_hwmod_irq_info omap3xxx_mmu_isp_irqs[] = {
-	{ .irq = 24 },
+	{ .irq = 24 + OMAP_INTC_START, },
 	{ .irq = -1 }
 };
 
@@ -3035,7 +3028,7 @@ static struct omap_mmu_dev_attr mmu_iva_dev_attr = {
 
 static struct omap_hwmod omap3xxx_mmu_iva_hwmod;
 static struct omap_hwmod_irq_info omap3xxx_mmu_iva_irqs[] = {
-	{ .irq = 28 },
+	{ .irq = 28 + OMAP_INTC_START, },
 	{ .irq = -1 }
 };
 
diff --git a/arch/arm/mach-pxa/reset.c b/arch/arm/mach-pxa/reset.c
index 3fab583755d..0616390b857 100644
--- a/arch/arm/mach-pxa/reset.c
+++ b/arch/arm/mach-pxa/reset.c
@@ -13,6 +13,7 @@
 
 #include <mach/regs-ost.h>
 #include <mach/reset.h>
+#include <mach/smemc.h>
 
 unsigned int reset_status;
 EXPORT_SYMBOL(reset_status);
@@ -81,6 +82,12 @@ static void do_hw_reset(void)
 	writel_relaxed(OSSR_M3, OSSR);
 	/* ... in 100 ms */
 	writel_relaxed(readl_relaxed(OSCR) + 368640, OSMR3);
+	/*
+	 * SDRAM hangs on watchdog reset on Marvell PXA270 (erratum 71)
+	 * we put SDRAM into self-refresh to prevent that
+	 */
+	while (1)
+		writel_relaxed(MDREFR_SLFRSH, MDREFR);
 }
 
 void pxa_restart(char mode, const char *cmd)
@@ -104,4 +111,3 @@ void pxa_restart(char mode, const char *cmd)
 		break;
 	}
 }
-
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 3d91d2e5bf3..d91fcf403ce 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -424,57 +424,57 @@ static struct platform_device tosa_power_device = {
  * Tosa Keyboard
  */
 static const uint32_t tosakbd_keymap[] = {
-	KEY(0, 2, KEY_W),
-	KEY(0, 6, KEY_K),
-	KEY(0, 7, KEY_BACKSPACE),
-	KEY(0, 8, KEY_P),
-	KEY(1, 1, KEY_Q),
-	KEY(1, 2, KEY_E),
-	KEY(1, 3, KEY_T),
-	KEY(1, 4, KEY_Y),
-	KEY(1, 6, KEY_O),
-	KEY(1, 7, KEY_I),
-	KEY(1, 8, KEY_COMMA),
-	KEY(2, 1, KEY_A),
-	KEY(2, 2, KEY_D),
-	KEY(2, 3, KEY_G),
-	KEY(2, 4, KEY_U),
-	KEY(2, 6, KEY_L),
-	KEY(2, 7, KEY_ENTER),
-	KEY(2, 8, KEY_DOT),
-	KEY(3, 1, KEY_Z),
-	KEY(3, 2, KEY_C),
-	KEY(3, 3, KEY_V),
-	KEY(3, 4, KEY_J),
-	KEY(3, 5, TOSA_KEY_ADDRESSBOOK),
-	KEY(3, 6, TOSA_KEY_CANCEL),
-	KEY(3, 7, TOSA_KEY_CENTER),
-	KEY(3, 8, TOSA_KEY_OK),
-	KEY(3, 9, KEY_LEFTSHIFT),
-	KEY(4, 1, KEY_S),
-	KEY(4, 2, KEY_R),
-	KEY(4, 3, KEY_B),
-	KEY(4, 4, KEY_N),
-	KEY(4, 5, TOSA_KEY_CALENDAR),
-	KEY(4, 6, TOSA_KEY_HOMEPAGE),
-	KEY(4, 7, KEY_LEFTCTRL),
-	KEY(4, 8, TOSA_KEY_LIGHT),
-	KEY(4, 10, KEY_RIGHTSHIFT),
-	KEY(5, 1, KEY_TAB),
-	KEY(5, 2, KEY_SLASH),
-	KEY(5, 3, KEY_H),
-	KEY(5, 4, KEY_M),
-	KEY(5, 5, TOSA_KEY_MENU),
-	KEY(5, 7, KEY_UP),
-	KEY(5, 11, TOSA_KEY_FN),
-	KEY(6, 1, KEY_X),
-	KEY(6, 2, KEY_F),
-	KEY(6, 3, KEY_SPACE),
-	KEY(6, 4, KEY_APOSTROPHE),
-	KEY(6, 5, TOSA_KEY_MAIL),
-	KEY(6, 6, KEY_LEFT),
-	KEY(6, 7, KEY_DOWN),
-	KEY(6, 8, KEY_RIGHT),
+	KEY(0, 1, KEY_W),
+	KEY(0, 5, KEY_K),
+	KEY(0, 6, KEY_BACKSPACE),
+	KEY(0, 7, KEY_P),
+	KEY(1, 0, KEY_Q),
+	KEY(1, 1, KEY_E),
+	KEY(1, 2, KEY_T),
+	KEY(1, 3, KEY_Y),
+	KEY(1, 5, KEY_O),
+	KEY(1, 6, KEY_I),
+	KEY(1, 7, KEY_COMMA),
+	KEY(2, 0, KEY_A),
+	KEY(2, 1, KEY_D),
+	KEY(2, 2, KEY_G),
+	KEY(2, 3, KEY_U),
+	KEY(2, 5, KEY_L),
+	KEY(2, 6, KEY_ENTER),
+	KEY(2, 7, KEY_DOT),
+	KEY(3, 0, KEY_Z),
+	KEY(3, 1, KEY_C),
+	KEY(3, 2, KEY_V),
+	KEY(3, 3, KEY_J),
+	KEY(3, 4, TOSA_KEY_ADDRESSBOOK),
+	KEY(3, 5, TOSA_KEY_CANCEL),
+	KEY(3, 6, TOSA_KEY_CENTER),
+	KEY(3, 7, TOSA_KEY_OK),
+	KEY(3, 8, KEY_LEFTSHIFT),
+	KEY(4, 0, KEY_S),
+	KEY(4, 1, KEY_R),
+	KEY(4, 2, KEY_B),
+	KEY(4, 3, KEY_N),
+	KEY(4, 4, TOSA_KEY_CALENDAR),
+	KEY(4, 5, TOSA_KEY_HOMEPAGE),
+	KEY(4, 6, KEY_LEFTCTRL),
+	KEY(4, 7, TOSA_KEY_LIGHT),
+	KEY(4, 9, KEY_RIGHTSHIFT),
+	KEY(5, 0, KEY_TAB),
+	KEY(5, 1, KEY_SLASH),
+	KEY(5, 2, KEY_H),
+	KEY(5, 3, KEY_M),
+	KEY(5, 4, TOSA_KEY_MENU),
+	KEY(5, 6, KEY_UP),
+	KEY(5, 10, TOSA_KEY_FN),
+	KEY(6, 0, KEY_X),
+	KEY(6, 1, KEY_F),
+	KEY(6, 2, KEY_SPACE),
+	KEY(6, 3, KEY_APOSTROPHE),
+	KEY(6, 4, TOSA_KEY_MAIL),
+	KEY(6, 5, KEY_LEFT),
+	KEY(6, 6, KEY_DOWN),
+	KEY(6, 7, KEY_RIGHT),
 };
 
 static struct matrix_keymap_data tosakbd_keymap_data = {
diff --git a/arch/arm/mach-s3c24xx/clock-s3c2410.c b/arch/arm/mach-s3c24xx/clock-s3c2410.c
index 34fffdf6fc1..564553694b5 100644
--- a/arch/arm/mach-s3c24xx/clock-s3c2410.c
+++ b/arch/arm/mach-s3c24xx/clock-s3c2410.c
@@ -119,66 +119,101 @@ static struct clk init_clocks_off[] = {
 	}
 };
 
-static struct clk init_clocks[] = {
-	{
-		.name		= "lcd",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_LCDC,
-	}, {
-		.name		= "gpio",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_GPIO,
-	}, {
-		.name		= "usb-host",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_USBH,
-	}, {
-		.name		= "usb-device",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_USBD,
-	}, {
-		.name		= "timers",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_PWMT,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.0",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART0,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.1",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART1,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.2",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART2,
-	}, {
-		.name		= "rtc",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_RTC,
-	}, {
-		.name		= "watchdog",
-		.parent		= &clk_p,
-		.ctrlbit	= 0,
-	}, {
-		.name		= "usb-bus-host",
-		.parent		= &clk_usb_bus,
-	}, {
-		.name		= "usb-bus-gadget",
-		.parent		= &clk_usb_bus,
-	},
+static struct clk clk_lcd = {
+	.name		= "lcd",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_LCDC,
+};
+
+static struct clk clk_gpio = {
+	.name		= "gpio",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_GPIO,
+};
+
+static struct clk clk_usb_host = {
+	.name		= "usb-host",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_USBH,
+};
+
+static struct clk clk_usb_device = {
+	.name		= "usb-device",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_USBD,
+};
+
+static struct clk clk_timers = {
+	.name		= "timers",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_PWMT,
+};
+
+struct clk s3c24xx_clk_uart0 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.0",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART0,
+};
+
+struct clk s3c24xx_clk_uart1 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.1",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART1,
+};
+
+struct clk s3c24xx_clk_uart2 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.2",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART2,
+};
+
+static struct clk clk_rtc = {
+	.name		= "rtc",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_RTC,
+};
+
+static struct clk clk_watchdog = {
+	.name		= "watchdog",
+	.parent		= &clk_p,
+	.ctrlbit	= 0,
+};
+
+static struct clk clk_usb_bus_host = {
+	.name		= "usb-bus-host",
+	.parent		= &clk_usb_bus,
+};
+
+static struct clk clk_usb_bus_gadget = {
+	.name		= "usb-bus-gadget",
+	.parent		= &clk_usb_bus,
+};
+
+static struct clk *init_clocks[] = {
+	&clk_lcd,
+	&clk_gpio,
+	&clk_usb_host,
+	&clk_usb_device,
+	&clk_timers,
+	&s3c24xx_clk_uart0,
+	&s3c24xx_clk_uart1,
+	&s3c24xx_clk_uart2,
+	&clk_rtc,
+	&clk_watchdog,
+	&clk_usb_bus_host,
+	&clk_usb_bus_gadget,
 };
 
 /* s3c2410_baseclk_add()
@@ -195,7 +230,6 @@ int __init s3c2410_baseclk_add(void)
 {
 	unsigned long clkslow = __raw_readl(S3C2410_CLKSLOW);
 	unsigned long clkcon  = __raw_readl(S3C2410_CLKCON);
-	struct clk *clkp;
 	struct clk *xtal;
 	int ret;
 	int ptr;
@@ -207,8 +241,9 @@ int __init s3c2410_baseclk_add(void)
 
 	/* register clocks from clock array */
 
-	clkp = init_clocks;
-	for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++, clkp++) {
+	for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++) {
+		struct clk *clkp = init_clocks[ptr];
+
 		/* ensure that we note the clock state */
 
 		clkp->usage = clkcon & clkp->ctrlbit ? 1 : 0;
diff --git a/arch/arm/mach-s3c24xx/clock-s3c2440.c b/arch/arm/mach-s3c24xx/clock-s3c2440.c
index 1069b568082..aaf006d1d6d 100644
--- a/arch/arm/mach-s3c24xx/clock-s3c2440.c
+++ b/arch/arm/mach-s3c24xx/clock-s3c2440.c
@@ -166,6 +166,9 @@ static struct clk_lookup s3c2440_clk_lookup[] = {
 	CLKDEV_INIT(NULL, "clk_uart_baud1", &s3c24xx_uclk),
 	CLKDEV_INIT(NULL, "clk_uart_baud2", &clk_p),
 	CLKDEV_INIT(NULL, "clk_uart_baud3", &s3c2440_clk_fclk_n),
+	CLKDEV_INIT("s3c2440-uart.0", "uart", &s3c24xx_clk_uart0),
+	CLKDEV_INIT("s3c2440-uart.1", "uart", &s3c24xx_clk_uart1),
+	CLKDEV_INIT("s3c2440-uart.2", "uart", &s3c24xx_clk_uart2),
 	CLKDEV_INIT("s3c2440-camif", "camera", &s3c2440_clk_cam_upll),
 };
 
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index e838ba27e44..c9808c68415 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -512,6 +512,9 @@ static void __init assabet_map_io(void)
 	 * Its called GPCLKR0 in my SA1110 manual.
 	 */
 	Ser1SDCR0 |= SDCR0_SUS;
+	MSC1 = (MSC1 & ~0xffff) |
+		MSC_NonBrst | MSC_32BitStMem |
+		MSC_RdAcc(2) | MSC_WrAcc(2) | MSC_Rec(0);
 
 	if (!machine_has_neponset())
 		sa1100_register_uart_fns(&assabet_port_fns);
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index b85b2882dbd..803c9fcfb35 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -437,7 +437,7 @@ static struct platform_device lcdc0_device = {
 	.id		= 0,
 	.dev	= {
 		.platform_data	= &lcdc0_info,
-		.coherent_dma_mask = ~0,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
@@ -534,7 +534,7 @@ static struct platform_device hdmi_lcdc_device = {
 	.id		= 1,
 	.dev	= {
 		.platform_data	= &hdmi_lcdc_info,
-		.coherent_dma_mask = ~0,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index e6b775a10aa..4d610e6ea98 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -332,7 +332,7 @@ static struct platform_device lcdc_device = {
 	.resource	= lcdc_resources,
 	.dev	= {
 		.platform_data	= &lcdc_info,
-		.coherent_dma_mask = ~0,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index fa3407da682..3b917bc76a4 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -421,7 +421,7 @@ static struct platform_device lcdc_device = {
 	.resource	= lcdc_resources,
 	.dev	= {
 		.platform_data	= &lcdc_info,
-		.coherent_dma_mask = ~0,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
@@ -497,7 +497,7 @@ static struct platform_device hdmi_lcdc_device = {
 	.id		= 1,
 	.dev	= {
 		.platform_data	= &hdmi_lcdc_info,
-		.coherent_dma_mask = ~0,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
diff --git a/arch/arm/mach-shmobile/setup-emev2.c b/arch/arm/mach-shmobile/setup-emev2.c
index 899a86c31ec..1ccddd22811 100644
--- a/arch/arm/mach-shmobile/setup-emev2.c
+++ b/arch/arm/mach-shmobile/setup-emev2.c
@@ -287,14 +287,14 @@ static struct gpio_em_config gio3_config = {
 static struct resource gio3_resources[] = {
 	[0] = {
 		.name	= "GIO_096",
-		.start	= 0xe0050100,
-		.end	= 0xe005012b,
+		.start	= 0xe0050180,
+		.end	= 0xe00501ab,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
 		.name	= "GIO_096",
-		.start	= 0xe0050140,
-		.end	= 0xe005015f,
+		.start	= 0xe00501c0,
+		.end	= 0xe00501df,
 		.flags	= IORESOURCE_MEM,
 	},
 	[2] = {
diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c
index c5a75a7a508..7f45c2edbca 100644
--- a/arch/arm/mach-shmobile/setup-r8a73a4.c
+++ b/arch/arm/mach-shmobile/setup-r8a73a4.c
@@ -62,7 +62,7 @@ enum { SCIFA0, SCIFA1, SCIFB0, SCIFB1, SCIFB2, SCIFB3 };
 static const struct plat_sci_port scif[] = {
 	SCIFA_DATA(SCIFA0, 0xe6c40000, gic_spi(144)), /* SCIFA0 */
 	SCIFA_DATA(SCIFA1, 0xe6c50000, gic_spi(145)), /* SCIFA1 */
-	SCIFB_DATA(SCIFB0, 0xe6c50000, gic_spi(145)), /* SCIFB0 */
+	SCIFB_DATA(SCIFB0, 0xe6c20000, gic_spi(148)), /* SCIFB0 */
 	SCIFB_DATA(SCIFB1, 0xe6c30000, gic_spi(149)), /* SCIFB1 */
 	SCIFB_DATA(SCIFB2, 0xe6ce0000, gic_spi(150)), /* SCIFB2 */
 	SCIFB_DATA(SCIFB3, 0xe6cf0000, gic_spi(151)), /* SCIFB3 */
diff --git a/arch/arm/mach-versatile/include/mach/platform.h b/arch/arm/mach-versatile/include/mach/platform.h
index ec087407b16..6f938ccb0c5 100644
--- a/arch/arm/mach-versatile/include/mach/platform.h
+++ b/arch/arm/mach-versatile/include/mach/platform.h
@@ -231,12 +231,14 @@
 /* PCI space */
 #define VERSATILE_PCI_BASE             0x41000000	/* PCI Interface */
 #define VERSATILE_PCI_CFG_BASE	       0x42000000
+#define VERSATILE_PCI_IO_BASE          0x43000000
 #define VERSATILE_PCI_MEM_BASE0        0x44000000
 #define VERSATILE_PCI_MEM_BASE1        0x50000000
 #define VERSATILE_PCI_MEM_BASE2        0x60000000
 /* Sizes of above maps */
 #define VERSATILE_PCI_BASE_SIZE	       0x01000000
 #define VERSATILE_PCI_CFG_BASE_SIZE    0x02000000
+#define VERSATILE_PCI_IO_BASE_SIZE     0x01000000
 #define VERSATILE_PCI_MEM_BASE0_SIZE   0x0c000000	/* 32Mb */
 #define VERSATILE_PCI_MEM_BASE1_SIZE   0x10000000	/* 256Mb */
 #define VERSATILE_PCI_MEM_BASE2_SIZE   0x10000000	/* 256Mb */
diff --git a/arch/arm/mach-versatile/pci.c b/arch/arm/mach-versatile/pci.c
index e92e5e0705b..c97be4ea76d 100644
--- a/arch/arm/mach-versatile/pci.c
+++ b/arch/arm/mach-versatile/pci.c
@@ -43,9 +43,9 @@
 #define PCI_IMAP0		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0)
 #define PCI_IMAP1		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4)
 #define PCI_IMAP2		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8)
-#define PCI_SMAP0		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10)
-#define PCI_SMAP1		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14)
-#define PCI_SMAP2		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18)
+#define PCI_SMAP0		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14)
+#define PCI_SMAP1		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18)
+#define PCI_SMAP2		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x1c)
 #define PCI_SELFID		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc)
 
 #define DEVICE_ID_OFFSET		0x00
@@ -170,8 +170,8 @@ static struct pci_ops pci_versatile_ops = {
 	.write	= versatile_write_config,
 };
 
-static struct resource io_mem = {
-	.name	= "PCI I/O space",
+static struct resource unused_mem = {
+	.name	= "PCI unused",
 	.start	= VERSATILE_PCI_MEM_BASE0,
 	.end	= VERSATILE_PCI_MEM_BASE0+VERSATILE_PCI_MEM_BASE0_SIZE-1,
 	.flags	= IORESOURCE_MEM,
@@ -195,9 +195,9 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys)
 {
 	int ret = 0;
 
-	ret = request_resource(&iomem_resource, &io_mem);
+	ret = request_resource(&iomem_resource, &unused_mem);
 	if (ret) {
-		printk(KERN_ERR "PCI: unable to allocate I/O "
+		printk(KERN_ERR "PCI: unable to allocate unused "
 		       "memory region (%d)\n", ret);
 		goto out;
 	}
@@ -205,7 +205,7 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys)
 	if (ret) {
 		printk(KERN_ERR "PCI: unable to allocate non-prefetchable "
 		       "memory region (%d)\n", ret);
-		goto release_io_mem;
+		goto release_unused_mem;
 	}
 	ret = request_resource(&iomem_resource, &pre_mem);
 	if (ret) {
@@ -225,8 +225,8 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys)
 
  release_non_mem:
 	release_resource(&non_mem);
- release_io_mem:
-	release_resource(&io_mem);
+ release_unused_mem:
+	release_resource(&unused_mem);
  out:
 	return ret;
 }
@@ -246,7 +246,7 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys)
 		goto out;
 	}
 
-	ret = pci_ioremap_io(0, VERSATILE_PCI_MEM_BASE0);
+	ret = pci_ioremap_io(0, VERSATILE_PCI_IO_BASE);
 	if (ret)
 		goto out;
 
@@ -295,6 +295,19 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys)
 	__raw_writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_2);
 
 	/*
+	 * For many years the kernel and QEMU were symbiotically buggy
+	 * in that they both assumed the same broken IRQ mapping.
+	 * QEMU therefore attempts to auto-detect old broken kernels
+	 * so that they still work on newer QEMU as they did on old
+	 * QEMU. Since we now use the correct (ie matching-hardware)
+	 * IRQ mapping we write a definitely different value to a
+	 * PCI_INTERRUPT_LINE register to tell QEMU that we expect
+	 * real hardware behaviour and it need not be backwards
+	 * compatible for us. This write is harmless on real hardware.
+	 */
+	__raw_writel(0, VERSATILE_PCI_VIRT_BASE+PCI_INTERRUPT_LINE);
+
+	/*
 	 * Do not to map Versatile FPGA PCI device into memory space
 	 */
 	pci_slot_ignore |= (1 << myslot);
@@ -327,13 +340,13 @@ static int __init versatile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq;
 
-	/* slot,  pin,	irq
-	 *  24     1     IRQ_SIC_PCI0
-	 *  25     1     IRQ_SIC_PCI1
-	 *  26     1     IRQ_SIC_PCI2
-	 *  27     1     IRQ_SIC_PCI3
+	/*
+	 * Slot	INTA	INTB	INTC	INTD
+	 * 31	PCI1	PCI2	PCI3	PCI0
+	 * 30	PCI0	PCI1	PCI2	PCI3
+	 * 29	PCI3	PCI0	PCI1	PCI2
 	 */
-	irq = IRQ_SIC_PCI0 + ((slot - 24 + pin - 1) & 3);
+	irq = IRQ_SIC_PCI0 + ((slot + 2 + pin - 1) & 3);
 
 	return irq;
 }
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 5907e10c37f..dd3d5975a5c 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -1,5 +1,7 @@
 config ARCH_VEXPRESS
 	bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7
+	select ARCH_HAS_CPUFREQ
+	select ARCH_HAS_OPP
 	select ARCH_REQUIRE_GPIOLIB
 	select ARM_AMBA
 	select ARM_GIC
@@ -56,5 +58,23 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA
 
 config ARCH_VEXPRESS_CA9X4
 	bool "Versatile Express Cortex-A9x4 tile"
+	select ARM_ERRATA_643719
+
+config ARCH_VEXPRESS_DCSCB
+	bool "Dual Cluster System Control Block (DCSCB) support"
+	depends on MCPM
+	select ARM_CCI
+	help
+	  Support for the Dual Cluster System Configuration Block (DCSCB).
+	  This is needed to provide CPU and cluster power management
+	  on RTSM implementing big.LITTLE.
+
+config ARCH_VEXPRESS_TC2
+	bool "TC2 cluster management"
+	depends on MCPM
+	select VEXPRESS_SPC
+	select ARM_CCI
+	help
+	  Support for CPU and cluster power management on TC2.
 
 endmenu
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 42703e8b4d3..14193dc7e6e 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -6,5 +6,13 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
 
 obj-y					:= v2m.o
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
+obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)	+= dcscb.o	dcscb_setup.o
+CFLAGS_REMOVE_dcscb.o			= -pg
+obj-$(CONFIG_ARCH_VEXPRESS_TC2)		+= tc2_pm.o tc2_pm_setup.o
+CFLAGS_REMOVE_tc2_pm.o			= -pg
+ifeq ($(CONFIG_ARCH_VEXPRESS_TC2),y)
+obj-$(CONFIG_ARM_PSCI)			+= tc2_pm_psci.o
+CFLAGS_REMOVE_tc2_pm_psci.o		= -pg
+endif
 obj-$(CONFIG_SMP)			+= platsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h
index f134cd4a85f..bde4374ab6d 100644
--- a/arch/arm/mach-vexpress/core.h
+++ b/arch/arm/mach-vexpress/core.h
@@ -6,6 +6,8 @@
 
 void vexpress_dt_smp_map_io(void);
 
+bool vexpress_smp_init_ops(void);
+
 extern struct smp_operations	vexpress_smp_ops;
 
 extern void vexpress_cpu_die(unsigned int cpu);
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
new file mode 100644
index 00000000000..b35700f8e01
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -0,0 +1,236 @@
+/*
+ * arch/arm/mach-vexpress/dcscb.c - Dual Cluster System Configuration Block
+ *
+ * Created by:	Nicolas Pitre, May 2012
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/of_address.h>
+#include <linux/vexpress.h>
+#include <linux/arm-cci.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/psci.h>
+
+
+#define RST_HOLD0	0x0
+#define RST_HOLD1	0x4
+#define SYS_SWRESET	0x8
+#define RST_STAT0	0xc
+#define RST_STAT1	0x10
+#define EAG_CFG_R	0x20
+#define EAG_CFG_W	0x24
+#define KFC_CFG_R	0x28
+#define KFC_CFG_W	0x2c
+#define DCS_CFG_R	0x30
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() while its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+static void __iomem *dcscb_base;
+static int dcscb_use_count[4][2];
+static int dcscb_allcpus_mask[2];
+
+static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int rst_hold, cpumask = (1 << cpu);
+	unsigned int all_mask = dcscb_allcpus_mask[cluster];
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	if (cpu >= 4 || cluster >= 2)
+		return -EINVAL;
+
+	/*
+	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+	 * variant exists, we need to disable IRQs manually here.
+	 */
+	local_irq_disable();
+	arch_spin_lock(&dcscb_lock);
+
+	dcscb_use_count[cpu][cluster]++;
+	if (dcscb_use_count[cpu][cluster] == 1) {
+		rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+		if (rst_hold & (1 << 8)) {
+			/* remove cluster reset and add individual CPU's reset */
+			rst_hold &= ~(1 << 8);
+			rst_hold |= all_mask;
+		}
+		rst_hold &= ~(cpumask | (cpumask << 4));
+		writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+	} else if (dcscb_use_count[cpu][cluster] != 2) {
+		/*
+		 * The only possible values are:
+		 * 0 = CPU down
+		 * 1 = CPU (still) up
+		 * 2 = CPU requested to be up before it had a chance
+		 *     to actually make itself down.
+		 * Any other value is a bug.
+		 */
+		BUG();
+	}
+
+	arch_spin_unlock(&dcscb_lock);
+	local_irq_enable();
+
+	return 0;
+}
+
+static void dcscb_power_down(void)
+{
+	unsigned int mpidr, cpu, cluster, rst_hold, cpumask, all_mask;
+	bool last_man = false, skip_wfi = false;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	cpumask = (1 << cpu);
+	all_mask = dcscb_allcpus_mask[cluster];
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= 4 || cluster >= 2);
+
+	__mcpm_cpu_going_down(cpu, cluster);
+
+	arch_spin_lock(&dcscb_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+	dcscb_use_count[cpu][cluster]--;
+	if (dcscb_use_count[cpu][cluster] == 0) {
+		rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+		rst_hold |= cpumask;
+		if (((rst_hold | (rst_hold >> 4)) & all_mask) == all_mask) {
+			rst_hold |= (1 << 8);
+			last_man = true;
+		}
+		writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+	} else if (dcscb_use_count[cpu][cluster] == 1) {
+		/*
+		 * A power_up request went ahead of us.
+		 * Even if we do not want to shut this CPU down,
+		 * the caller expects a certain state as if the WFI
+		 * was aborted.  So let's continue with cache cleaning.
+		 */
+		skip_wfi = true;
+	} else
+		BUG();
+
+	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+		arch_spin_unlock(&dcscb_lock);
+
+		/* Flush all cache levels for this cluster. */
+		v7_exit_coherency_flush(all);
+
+		/*
+		 * This is a harmless no-op.  On platforms with a real
+		 * outer cache this might either be needed or not,
+		 * depending on where the outer cache sits.
+		 */
+		outer_flush_all();
+
+		/*
+		 * Disable cluster-level coherency by masking
+		 * incoming snoops and DVM messages:
+		 */
+		cci_disable_port_by_cpu(mpidr);
+
+		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	} else {
+		arch_spin_unlock(&dcscb_lock);
+
+		/* Disable and flush the local CPU cache. */
+		v7_exit_coherency_flush(louis);
+	}
+
+	__mcpm_cpu_down(cpu, cluster);
+
+	/* Now we are prepared for power-down, do it: */
+	dsb();
+	if (!skip_wfi)
+		wfi();
+
+	/* Not dead at this point?  Let our caller cope. */
+}
+
+static const struct mcpm_platform_ops dcscb_power_ops = {
+	.power_up	= dcscb_power_up,
+	.power_down	= dcscb_power_down,
+};
+
+static void __init dcscb_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= 4 || cluster >= 2);
+	dcscb_use_count[cpu][cluster] = 1;
+}
+
+extern void dcscb_power_up_setup(unsigned int affinity_level);
+
+static int __init dcscb_init(void)
+{
+	struct device_node *node;
+	unsigned int cfg;
+	int ret;
+
+	ret = psci_probe();
+	if (!ret) {
+		pr_debug("psci found. Aborting native init\n");
+		return -ENODEV;
+	}
+
+	if (!cci_probed())
+		return -ENODEV;
+
+	node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb");
+	if (!node)
+		return -ENODEV;
+	dcscb_base = of_iomap(node, 0);
+	if (!dcscb_base)
+		return -EADDRNOTAVAIL;
+	cfg = readl_relaxed(dcscb_base + DCS_CFG_R);
+	dcscb_allcpus_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1;
+	dcscb_allcpus_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1;
+	dcscb_usage_count_init();
+
+	ret = mcpm_platform_register(&dcscb_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(dcscb_power_up_setup);
+	if (ret) {
+		iounmap(dcscb_base);
+		return ret;
+	}
+
+	pr_info("VExpress DCSCB support installed\n");
+
+	/*
+	 * Future entries into the kernel can now go
+	 * through the cluster entry vectors.
+	 */
+	vexpress_flags_set(virt_to_phys(mcpm_entry_point));
+
+	return 0;
+}
+
+early_initcall(dcscb_init);
diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S
new file mode 100644
index 00000000000..4bb7fbe0f62
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb_setup.S
@@ -0,0 +1,38 @@
+/*
+ * arch/arm/include/asm/dcscb_setup.S
+ *
+ * Created by:  Dave Martin, 2012-06-22
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+
+ENTRY(dcscb_power_up_setup)
+
+	cmp	r0, #0			@ check affinity level
+	beq	2f
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ * The ACTLR SMP bit does not need to be set here, because cpu_resume()
+ * already restores that.
+ *
+ * A15/A7 may not require explicit L2 invalidation on reset, dependent
+ * on hardware integration decisions.
+ * For now, this code assumes that L2 is either already invalidated,
+ * or invalidation is not required.
+ */
+
+	b	cci_enable_port_for_self
+
+2:	@ Implementation-specific local CPU setup operations should go here,
+	@ if any.  In this case, there is nothing to do.
+
+	bx	lr
+
+ENDPROC(dcscb_power_up_setup)
diff --git a/arch/arm/mach-vexpress/include/mach/tc2.h b/arch/arm/mach-vexpress/include/mach/tc2.h
new file mode 100644
index 00000000000..d3b5a2225a0
--- /dev/null
+++ b/arch/arm/mach-vexpress/include/mach/tc2.h
@@ -0,0 +1,10 @@
+#ifndef __MACH_TC2_H
+#define __MACH_TC2_H
+
+/*
+ * cpu and cluster limits
+ */
+#define TC2_MAX_CPUS		3
+#define TC2_MAX_CLUSTERS	2
+
+#endif
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index dc1ace55d55..993c9ae5dc5 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -12,9 +12,11 @@
 #include <linux/errno.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/vexpress.h>
 
+#include <asm/mcpm.h>
 #include <asm/smp_scu.h>
 #include <asm/mach/map.h>
 
@@ -203,3 +205,21 @@ struct smp_operations __initdata vexpress_smp_ops = {
 	.cpu_die		= vexpress_cpu_die,
 #endif
 };
+
+bool __init vexpress_smp_init_ops(void)
+{
+#ifdef CONFIG_MCPM
+	/*
+	 * The best way to detect a multi-cluster configuration at the moment
+	 * is to look for the presence of a CCI in the system.
+	 * Override the default vexpress_smp_ops if so.
+	 */
+	struct device_node *node;
+	node = of_find_compatible_node(NULL, NULL, "arm,cci-400");
+	if (node && of_device_is_available(node)) {
+		mcpm_smp_set_ops();
+		return true;
+	}
+#endif
+	return false;
+}
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
new file mode 100644
index 00000000000..9fc264a3bad
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -0,0 +1,277 @@
+/*
+ * arch/arm/mach-vexpress/tc2_pm.c - TC2 power management support
+ *
+ * Created by:	Nicolas Pitre, October 2012
+ * Copyright:	(C) 2012  Linaro Limited
+ *
+ * Some portions of this file were originally written by Achin Gupta
+ * Copyright:   (C) 2012  ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/psci.h>
+
+#include <mach/motherboard.h>
+#include <mach/tc2.h>
+
+#include <linux/vexpress.h>
+#include <linux/arm-cci.h>
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() after its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t tc2_pm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+static int tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS];
+
+static int tc2_pm_power_up(unsigned int cpu, unsigned int cluster)
+{
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	if (cluster >= TC2_MAX_CLUSTERS ||
+	    cpu >= vexpress_spc_get_nb_cpus(cluster))
+		return -EINVAL;
+
+	/*
+	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+	 * variant exists, we need to disable IRQs manually here.
+	 */
+	local_irq_disable();
+	arch_spin_lock(&tc2_pm_lock);
+
+	if (!tc2_pm_use_count[0][cluster] &&
+	    !tc2_pm_use_count[1][cluster] &&
+	    !tc2_pm_use_count[2][cluster])
+		vexpress_spc_powerdown_enable(cluster, 0);
+
+	tc2_pm_use_count[cpu][cluster]++;
+	if (tc2_pm_use_count[cpu][cluster] == 1) {
+		vexpress_spc_write_resume_reg(cluster, cpu,
+					      virt_to_phys(mcpm_entry_point));
+		vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1);
+	} else if (tc2_pm_use_count[cpu][cluster] != 2) {
+		/*
+		 * The only possible values are:
+		 * 0 = CPU down
+		 * 1 = CPU (still) up
+		 * 2 = CPU requested to be up before it had a chance
+		 *     to actually make itself down.
+		 * Any other value is a bug.
+		 */
+		BUG();
+	}
+
+	arch_spin_unlock(&tc2_pm_lock);
+	local_irq_enable();
+
+	return 0;
+}
+
+static void tc2_pm_down(u64 residency)
+{
+	unsigned int mpidr, cpu, cluster;
+	bool last_man = false, skip_wfi = false;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	__mcpm_cpu_going_down(cpu, cluster);
+
+	arch_spin_lock(&tc2_pm_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+	tc2_pm_use_count[cpu][cluster]--;
+	if (tc2_pm_use_count[cpu][cluster] == 0) {
+		vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1);
+		if (!tc2_pm_use_count[0][cluster] &&
+		    !tc2_pm_use_count[1][cluster] &&
+		    !tc2_pm_use_count[2][cluster] &&
+		    (!residency || residency > 5000)) {
+			vexpress_spc_powerdown_enable(cluster, 1);
+			vexpress_spc_set_global_wakeup_intr(1);
+			last_man = true;
+		}
+	} else if (tc2_pm_use_count[cpu][cluster] == 1) {
+		/*
+		 * A power_up request went ahead of us.
+		 * Even if we do not want to shut this CPU down,
+		 * the caller expects a certain state as if the WFI
+		 * was aborted.  So let's continue with cache cleaning.
+		 */
+		skip_wfi = true;
+	} else
+		BUG();
+
+	/*
+	 * If the CPU is committed to power down, make sure
+	 * the power controller will be in charge of waking it
+	 * up upon IRQ, ie IRQ lines are cut from GIC CPU IF
+	 * to the CPU by disabling the GIC CPU IF to prevent wfi
+	 * from completing execution behind power controller back
+	 */
+	if (!skip_wfi)
+		gic_cpu_if_down();
+
+	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+		arch_spin_unlock(&tc2_pm_lock);
+
+		if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+			/*
+			 * On the Cortex-A15 we need to disable
+			 * L2 prefetching before flushing the cache.
+			 */
+			asm volatile(
+			"mcr	p15, 1, %0, c15, c0, 3 \n\t"
+			"isb	\n\t"
+			"dsb	"
+			: : "r" (0x400) );
+		}
+
+		v7_exit_coherency_flush(all);
+
+		cci_disable_port_by_cpu(mpidr);
+
+		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	} else {
+		/*
+		 * If last man then undo any setup done previously.
+		 */
+		if (last_man) {
+			vexpress_spc_powerdown_enable(cluster, 0);
+			vexpress_spc_set_global_wakeup_intr(0);
+		}
+
+		arch_spin_unlock(&tc2_pm_lock);
+
+		v7_exit_coherency_flush(louis);
+	}
+
+	__mcpm_cpu_down(cpu, cluster);
+
+	/* Now we are prepared for power-down, do it: */
+	if (!skip_wfi)
+		wfi();
+
+	/* Not dead at this point?  Let our caller cope. */
+}
+
+static void tc2_pm_power_down(void)
+{
+	tc2_pm_down(0);
+}
+
+static void tc2_pm_suspend(u64 residency)
+{
+	extern void tc2_resume(void);
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	vexpress_spc_write_resume_reg(cluster, cpu,
+				      virt_to_phys(tc2_resume));
+
+	tc2_pm_down(residency);
+}
+
+static void tc2_pm_powered_up(void)
+{
+	unsigned int mpidr, cpu, cluster;
+	unsigned long flags;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	local_irq_save(flags);
+	arch_spin_lock(&tc2_pm_lock);
+
+	if (!tc2_pm_use_count[0][cluster] &&
+	    !tc2_pm_use_count[1][cluster] &&
+	    !tc2_pm_use_count[2][cluster]) {
+		vexpress_spc_powerdown_enable(cluster, 0);
+		vexpress_spc_set_global_wakeup_intr(0);
+	}
+
+	if (!tc2_pm_use_count[cpu][cluster])
+		tc2_pm_use_count[cpu][cluster] = 1;
+
+	vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 0);
+	vexpress_spc_write_resume_reg(cluster, cpu, 0);
+
+	arch_spin_unlock(&tc2_pm_lock);
+	local_irq_restore(flags);
+}
+
+static const struct mcpm_platform_ops tc2_pm_power_ops = {
+	.power_up	= tc2_pm_power_up,
+	.power_down	= tc2_pm_power_down,
+	.suspend	= tc2_pm_suspend,
+	.powered_up	= tc2_pm_powered_up,
+};
+
+static void __init tc2_pm_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	tc2_pm_use_count[cpu][cluster] = 1;
+}
+
+extern void tc2_pm_power_up_setup(unsigned int affinity_level);
+
+static int __init tc2_pm_init(void)
+{
+	int ret;
+
+	ret = psci_probe();
+	if (!ret) {
+		pr_debug("psci found. Aborting native init\n");
+		return -ENODEV;
+	}
+
+	if (!vexpress_spc_check_loaded())
+		return -ENODEV;
+
+	tc2_pm_usage_count_init();
+
+	ret = mcpm_platform_register(&tc2_pm_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(tc2_pm_power_up_setup);
+	if (!ret)
+		pr_info("TC2 power management initialized\n");
+	return ret;
+}
+
+early_initcall(tc2_pm_init);
diff --git a/arch/arm/mach-vexpress/tc2_pm_psci.c b/arch/arm/mach-vexpress/tc2_pm_psci.c
new file mode 100644
index 00000000000..c2fdc22e4c0
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm_psci.c
@@ -0,0 +1,173 @@
+/*
+ * arch/arm/mach-vexpress/tc2_pm_psci.c - TC2 PSCI support
+ *
+ * Created by: Achin Gupta, December 2012
+ * Copyright:  (C) 2012  ARM Limited
+ *
+ * Some portions of this file were originally written by Nicolas Pitre
+ * Copyright:   (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/psci.h>
+#include <asm/atomic.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+
+#include <mach/motherboard.h>
+#include <mach/tc2.h>
+
+#include <linux/vexpress.h>
+
+/*
+ * Platform specific state id understood by the firmware and used to
+ * program the power controller
+ */
+#define PSCI_POWER_STATE_ID           0
+
+static atomic_t tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS];
+
+static int tc2_pm_psci_power_up(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int mpidr = (cluster << 8) | cpu;
+	int ret = 0;
+
+	BUG_ON(!psci_ops.cpu_on);
+
+	switch (atomic_inc_return(&tc2_pm_use_count[cpu][cluster])) {
+	case 1:
+		/*
+		 * This is a request to power up a cpu that linux thinks has
+		 * been powered down. Retries are needed if the firmware has
+		 * seen the power down request as yet.
+		 */
+		do
+			ret = psci_ops.cpu_on(mpidr,
+					      virt_to_phys(mcpm_entry_point));
+		while (ret == -EAGAIN);
+
+		return ret;
+	case 2:
+		/* This power up request has overtaken a power down request */
+		return ret;
+	default:
+		/* Any other value is a bug */
+		BUG();
+	}
+}
+
+static void tc2_pm_psci_power_down(void)
+{
+	struct psci_power_state power_state;
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	BUG_ON(!psci_ops.cpu_off);
+
+	switch (atomic_dec_return(&tc2_pm_use_count[cpu][cluster])) {
+	case 1:
+		/*
+		 * Overtaken by a power up. Flush caches, exit coherency,
+		 * return & fake a reset
+		 */
+		set_cr(get_cr() & ~CR_C);
+
+		flush_cache_louis();
+
+		asm volatile ("clrex");
+		set_auxcr(get_auxcr() & ~(1 << 6));
+
+		return;
+	case 0:
+		/* A normal request to possibly power down the cluster */
+		power_state.id = PSCI_POWER_STATE_ID;
+		power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN;
+		power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1;
+
+		psci_ops.cpu_off(power_state);
+
+		/* On success this function never returns */
+	default:
+		/* Any other value is a bug */
+		BUG();
+	}
+}
+
+static void tc2_pm_psci_suspend(u64 unused)
+{
+	struct psci_power_state power_state;
+
+	BUG_ON(!psci_ops.cpu_suspend);
+
+	/* On TC2 always attempt to power down the cluster */
+	power_state.id = PSCI_POWER_STATE_ID;
+	power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN;
+	power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1;
+
+	psci_ops.cpu_suspend(power_state, virt_to_phys(mcpm_entry_point));
+
+	/* On success this function never returns */
+	BUG();
+}
+
+static const struct mcpm_platform_ops tc2_pm_power_ops = {
+	.power_up      = tc2_pm_psci_power_up,
+	.power_down    = tc2_pm_psci_power_down,
+	.suspend       = tc2_pm_psci_suspend,
+};
+
+static void __init tc2_pm_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	atomic_set(&tc2_pm_use_count[cpu][cluster], 1);
+}
+
+static int __init tc2_pm_psci_init(void)
+{
+	int ret;
+
+	ret = psci_probe();
+	if (ret) {
+		pr_debug("psci not found. Aborting psci init\n");
+		return -ENODEV;
+	}
+
+	if (!vexpress_spc_check_loaded()) {
+		pr_debug("spc not found. Aborting psci init\n");
+		return -ENODEV;
+	}
+
+	tc2_pm_usage_count_init();
+
+	ret = mcpm_platform_register(&tc2_pm_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(NULL);
+	if (!ret)
+		pr_info("TC2 power management initialized\n");
+	return ret;
+}
+
+early_initcall(tc2_pm_psci_init);
diff --git a/arch/arm/mach-vexpress/tc2_pm_setup.S b/arch/arm/mach-vexpress/tc2_pm_setup.S
new file mode 100644
index 00000000000..a18dafeeb0e
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm_setup.S
@@ -0,0 +1,68 @@
+/*
+ * arch/arm/include/asm/tc2_pm_setup.S
+ *
+ * Created by: Nicolas Pitre, October 2012
+ (             (based on dcscb_setup.S by Dave Martin)
+ * Copyright:  (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/linkage.h>
+#include <asm/mcpm.h>
+
+
+#define SPC_PHYS_BASE		0x7FFF0000
+#define SPC_WAKE_INT_STAT	0xb2c
+
+#define SNOOP_CTL_A15		0x404
+#define SNOOP_CTL_A7		0x504
+
+#define A15_SNOOP_MASK		(0x3 << 7)
+#define A7_SNOOP_MASK		(0x1 << 13)
+
+#define A15_BX_ADDR0		0xB68
+
+
+ENTRY(tc2_resume)
+	mrc	p15, 0, r0, c0, c0, 5
+	ubfx	r1, r0, #0, #4		@ r1 = cpu
+	ubfx	r2, r0, #8, #4		@ r2 = cluster
+	add	r1, r1, r2, lsl #2	@ r1 = index of CPU in WAKE_INT_STAT
+	ldr	r3, =SPC_PHYS_BASE + SPC_WAKE_INT_STAT
+	ldr	r3, [r3]
+	lsr	r3, r1
+	tst	r3, #1
+	wfieq				@ if no pending IRQ reenters wfi
+	b	mcpm_entry_point
+ENDPROC(tc2_resume)
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ * The ACTLR SMP bit does not need to be set here, because cpu_resume()
+ * already restores that.
+ */
+
+ENTRY(tc2_pm_power_up_setup)
+
+	cmp	r0, #0
+	beq	2f
+
+	b cci_enable_port_for_self
+
+2:	@ Clear the BX addr register
+	ldr	r3, =SPC_PHYS_BASE + A15_BX_ADDR0
+	mrc	p15, 0, r0, c0, c0, 5	@ MPIDR
+	ubfx	r1, r0, #8, #4		@ cluster
+	ubfx	r0, r0, #0, #4		@ cpu
+	add	r3, r3, r1, lsl #4
+	mov	r1, #0
+	str	r1, [r3, r0, lsl #2]
+	dsb
+
+	bx	lr
+
+ENDPROC(tc2_pm_power_up_setup)
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 8802030df98..057f99b62ea 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -10,6 +10,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/irqchip.h>
+#include <linux/memblock.h>
 #include <linux/of_address.h>
 #include <linux/of_fdt.h>
 #include <linux/of_irq.h>
@@ -373,6 +374,31 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express")
 	.init_machine	= v2m_init,
 MACHINE_END
 
+static void __init v2m_dt_hdlcd_init(void)
+{
+	struct device_node *node;
+	int len, na, ns;
+	const __be32 *prop;
+	phys_addr_t fb_base, fb_size;
+
+	node = of_find_compatible_node(NULL, NULL, "arm,hdlcd");
+	if (!node)
+		return;
+
+	na = of_n_addr_cells(node);
+	ns = of_n_size_cells(node);
+
+	prop = of_get_property(node, "framebuffer", &len);
+	if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop)))
+		return;
+
+	fb_base = of_read_number(prop, na);
+	fb_size = of_read_number(prop + na, ns);
+
+	if (WARN_ON(memblock_remove(fb_base, fb_size)))
+		return;
+};
+
 static struct map_desc v2m_rs1_io_desc __initdata = {
 	.virtual	= V2M_PERIPH,
 	.pfn		= __phys_to_pfn(0x1c000000),
@@ -423,6 +449,8 @@ void __init v2m_dt_init_early(void)
 			pr_warning("vexpress: DT HBI (%x) is not matching "
 					"hardware (%x)!\n", dt_hbi, hbi);
 	}
+
+	v2m_dt_hdlcd_init();
 }
 
 static void __init v2m_dt_timer_init(void)
@@ -456,6 +484,7 @@ static const char * const v2m_dt_match[] __initconst = {
 DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
 	.dt_compat	= v2m_dt_match,
 	.smp		= smp_ops(vexpress_smp_ops),
+	.smp_init	= smp_init_ops(vexpress_smp_init_ops),
 	.map_io		= v2m_dt_map_io,
 	.init_early	= v2m_dt_init_early,
 	.init_irq	= irqchip_init,
diff --git a/arch/arm/mach-virt/Makefile b/arch/arm/mach-virt/Makefile
index 042afc1f8c4..7ddbfa60227 100644
--- a/arch/arm/mach-virt/Makefile
+++ b/arch/arm/mach-virt/Makefile
@@ -3,4 +3,3 @@
 #
 
 obj-y					:= virt.o
-obj-$(CONFIG_SMP)			+= platsmp.o
diff --git a/arch/arm/mach-virt/platsmp.c b/arch/arm/mach-virt/platsmp.c
deleted file mode 100644
index f4143f5bfa5..00000000000
--- a/arch/arm/mach-virt/platsmp.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Dummy Virtual Machine - does what it says on the tin.
- *
- * Copyright (C) 2012 ARM Ltd
- * Author: Will Deacon <will.deacon@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/of.h>
-
-#include <asm/psci.h>
-#include <asm/smp_plat.h>
-
-extern void secondary_startup(void);
-
-static void __init virt_smp_init_cpus(void)
-{
-}
-
-static void __init virt_smp_prepare_cpus(unsigned int max_cpus)
-{
-}
-
-static int __cpuinit virt_boot_secondary(unsigned int cpu,
-					 struct task_struct *idle)
-{
-	if (psci_ops.cpu_on)
-		return psci_ops.cpu_on(cpu_logical_map(cpu),
-				       __pa(secondary_startup));
-	return -ENODEV;
-}
-
-struct smp_operations __initdata virt_smp_ops = {
-	.smp_init_cpus		= virt_smp_init_cpus,
-	.smp_prepare_cpus	= virt_smp_prepare_cpus,
-	.smp_boot_secondary	= virt_boot_secondary,
-};
diff --git a/arch/arm/mach-virt/virt.c b/arch/arm/mach-virt/virt.c
index 061f283f579..a67d2dd5bb6 100644
--- a/arch/arm/mach-virt/virt.c
+++ b/arch/arm/mach-virt/virt.c
@@ -36,11 +36,8 @@ static const char *virt_dt_match[] = {
 	NULL
 };
 
-extern struct smp_operations virt_smp_ops;
-
 DT_MACHINE_START(VIRT, "Dummy Virtual Machine")
 	.init_irq	= irqchip_init,
 	.init_machine	= virt_init,
-	.smp		= smp_ops(virt_smp_ops),
 	.dt_compat	= virt_dt_match,
 MACHINE_END
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 35955b54944..08c9fe917d1 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -411,24 +411,28 @@ config CPU_32v3
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v4
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v4T
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v5
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v6
 	bool
@@ -756,6 +760,7 @@ config CPU_BPREDICT_DISABLE
 
 config TLS_REG_EMUL
 	bool
+	select NEED_KUSER_HELPERS
 	help
 	  An SMP system using a pre-ARMv6 processor (there are apparently
 	  a few prototypes like that in existence) and therefore access to
@@ -763,11 +768,43 @@ config TLS_REG_EMUL
 
 config NEEDS_SYSCALL_FOR_CMPXCHG
 	bool
+	select NEED_KUSER_HELPERS
 	help
 	  SMP on a pre-ARMv6 processor?  Well OK then.
 	  Forget about fast user space cmpxchg support.
 	  It is just not possible.
 
+config NEED_KUSER_HELPERS
+	bool
+
+config KUSER_HELPERS
+	bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS
+	default y
+	help
+	  Warning: disabling this option may break user programs.
+
+	  Provide kuser helpers in the vector page.  The kernel provides
+	  helper code to userspace in read only form at a fixed location
+	  in the high vector page to allow userspace to be independent of
+	  the CPU type fitted to the system.  This permits binaries to be
+	  run on ARMv4 through to ARMv7 without modification.
+
+	  See Documentation/arm/kernel_user_helpers.txt for details.
+
+	  However, the fixed address nature of these helpers can be used
+	  by ROP (return orientated programming) authors when creating
+	  exploits.
+
+	  If all of the binaries and libraries which run on your platform
+	  are built specifically for your platform, and make no use of
+	  these helpers, then you can turn this option off to hinder
+	  such exploits. However, in that case, if a binary or library
+	  relying on those helpers is run, it will receive a SIGILL signal,
+	  which will terminate the program.
+
+	  Say N here only if you are absolutely certain that you do not
+	  need these helpers; otherwise, the safe option is to say Y.
+
 config DMA_CACHE_RWFO
 	bool "Enable read/write for ownership DMA cache maintenance"
 	depends on CPU_V6K && SMP
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 2ac37372ef5..eeab06ebd06 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -39,19 +39,43 @@
  * non 64-bit operations.
  */
 #define ASID_FIRST_VERSION	(1ULL << ASID_BITS)
-#define NUM_USER_ASIDS		(ASID_FIRST_VERSION - 1)
-
-#define ASID_TO_IDX(asid)	((asid & ~ASID_MASK) - 1)
-#define IDX_TO_ASID(idx)	((idx + 1) & ~ASID_MASK)
+#define NUM_USER_ASIDS		ASID_FIRST_VERSION
 
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
 static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
 
-DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 static cpumask_t tlb_flush_pending;
 
+#ifdef CONFIG_ARM_ERRATA_798181
+void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+			     cpumask_t *mask)
+{
+	int cpu;
+	unsigned long flags;
+	u64 context_id, asid;
+
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+	context_id = mm->context.id.counter;
+	for_each_online_cpu(cpu) {
+		if (cpu == this_cpu)
+			continue;
+		/*
+		 * We only need to send an IPI if the other CPUs are
+		 * running the same ASID as the one being invalidated.
+		 */
+		asid = per_cpu(active_asids, cpu).counter;
+		if (asid == 0)
+			asid = per_cpu(reserved_asids, cpu);
+		if (context_id == asid)
+			cpumask_set_cpu(cpu, mask);
+	}
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+}
+#endif
+
 #ifdef CONFIG_ARM_LPAE
 static void cpu_set_reserved_ttbr0(void)
 {
@@ -128,7 +152,16 @@ static void flush_context(unsigned int cpu)
 			asid = 0;
 		} else {
 			asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
-			__set_bit(ASID_TO_IDX(asid), asid_map);
+			/*
+			 * If this CPU has already been through a
+			 * rollover, but hasn't run another task in
+			 * the meantime, we must preserve its reserved
+			 * ASID, as this is the only trace we have of
+			 * the process it is still running.
+			 */
+			if (asid == 0)
+				asid = per_cpu(reserved_asids, i);
+			__set_bit(asid & ~ASID_MASK, asid_map);
 		}
 		per_cpu(reserved_asids, i) = asid;
 	}
@@ -167,17 +200,19 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 		/*
 		 * Allocate a free ASID. If we can't find one, take a
 		 * note of the currently active ASIDs and mark the TLBs
-		 * as requiring flushes.
+		 * as requiring flushes. We always count from ASID #1,
+		 * as we reserve ASID #0 to switch via TTBR0 and indicate
+		 * rollover events.
 		 */
-		asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 		if (asid == NUM_USER_ASIDS) {
 			generation = atomic64_add_return(ASID_FIRST_VERSION,
 							 &asid_generation);
 			flush_context(cpu);
-			asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+			asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 		}
 		__set_bit(asid, asid_map);
-		asid = generation | IDX_TO_ASID(asid);
+		asid |= generation;
 		cpumask_clear(mm_cpumask(mm));
 	}
 
diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c
index 9d285626bc7..312e15e6d00 100644
--- a/arch/arm/mm/extable.c
+++ b/arch/arm/mm/extable.c
@@ -9,8 +9,13 @@ int fixup_exception(struct pt_regs *regs)
 	const struct exception_table_entry *fixup;
 
 	fixup = search_exception_tables(instruction_pointer(regs));
-	if (fixup)
+	if (fixup) {
 		regs->ARM_pc = fixup->fixup;
+#ifdef CONFIG_THUMB2_KERNEL
+		/* Clear the IT state to avoid nasty surprises in the fixup */
+		regs->ARM_cpsr &= ~PSR_IT_MASK;
+#endif
+	}
 
 	return fixup != NULL;
 }
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 5dbf13f954f..e207aa5f846 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -446,8 +446,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
 
 	if (pud_none(*pud_k))
 		goto bad_area;
-	if (!pud_present(*pud))
+	if (!pud_present(*pud)) {
 		set_pud(pud, *pud_k);
+		/*
+		 * There is a small window during free_pgtables() where the
+		 * user *pud entry is 0 but the TLB has not been invalidated
+		 * and we get a level 2 (pmd) translation fault caused by the
+		 * intermediate TLB caching of the old level 1 (pud) entry.
+		 */
+		flush_tlb_kernel_page(addr);
+	}
 
 	pmd = pmd_offset(pud, addr);
 	pmd_k = pmd_offset(pud_k, addr);
@@ -470,8 +478,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
 #endif
 	if (pmd_none(pmd_k[index]))
 		goto bad_area;
+	if (!pmd_present(pmd[index]))
+		copy_pmd(pmd, pmd_k);
 
-	copy_pmd(pmd, pmd_k);
 	return 0;
 
 bad_area:
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9a5cdc01fcd..0ecc43fd622 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -600,7 +600,7 @@ void __init mem_init(void)
 
 #ifdef CONFIG_SA1111
 	/* now that our DMA memory is actually so designated, we can free it */
-	free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, 0, NULL);
+	free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, 0, NULL);
 #endif
 
 	free_highpages();
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 10062ceadd1..f0ef2f7d4ad 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -146,7 +146,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
 	info.length = len;
-	info.low_limit = PAGE_SIZE;
+	info.low_limit = FIRST_USER_ADDRESS;
 	info.high_limit = mm->mmap_base;
 	info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4d409e6a552..daf336fe0be 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1175,7 +1175,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	/*
 	 * Allocate the vector page early.
 	 */
-	vectors = early_alloc(PAGE_SIZE);
+	vectors = early_alloc(PAGE_SIZE * 2);
 
 	early_trap_init(vectors);
 
@@ -1220,15 +1220,27 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	map.pfn = __phys_to_pfn(virt_to_phys(vectors));
 	map.virtual = 0xffff0000;
 	map.length = PAGE_SIZE;
+#ifdef CONFIG_KUSER_HELPERS
 	map.type = MT_HIGH_VECTORS;
+#else
+	map.type = MT_LOW_VECTORS;
+#endif
 	create_mapping(&map);
 
 	if (!vectors_high()) {
 		map.virtual = 0;
+		map.length = PAGE_SIZE * 2;
 		map.type = MT_LOW_VECTORS;
 		create_mapping(&map);
 	}
 
+	/* Now create a kernel read-only mapping */
+	map.pfn += 1;
+	map.virtual = 0xffff0000 + PAGE_SIZE;
+	map.length = PAGE_SIZE;
+	map.type = MT_LOW_VECTORS;
+	create_mapping(&map);
+
 	/*
 	 * Ask the machine support to map in the statically mapped devices.
 	 */
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index 0acb089d0f7..1046b373d1a 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -87,7 +87,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 		init_pud = pud_offset(init_pgd, 0);
 		init_pmd = pmd_offset(init_pud, 0);
 		init_pte = pte_offset_map(init_pmd, 0);
-		set_pte_ext(new_pte, *init_pte, 0);
+		set_pte_ext(new_pte + 0, init_pte[0], 0);
+		set_pte_ext(new_pte + 1, init_pte[1], 0);
 		pte_unmap(init_pte);
 		pte_unmap(new_pte);
 	}
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 9704097c450..b3997c70af3 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -110,7 +110,7 @@ ENTRY(cpu_v7_set_pte_ext)
  ARM(	str	r3, [r0, #2048]! )
  THUMB(	add	r0, r0, #2048 )
  THUMB(	str	r3, [r0] )
-	ALT_SMP(mov	pc,lr)
+	ALT_SMP(W(nop))
 	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
 #endif
 	mov	pc, lr
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 363027e811d..6ba4bd9118f 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -73,7 +73,7 @@ ENTRY(cpu_v7_set_pte_ext)
 	tst	r3, #1 << (55 - 32)		@ L_PTE_DIRTY
 	orreq	r2, #L_PTE_RDONLY
 1:	strd	r2, r3, [r0]
-	ALT_SMP(mov	pc, lr)
+	ALT_SMP(W(nop))
 	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
 #endif
 	mov	pc, lr
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index e35fec34453..5fbccee5f64 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -75,13 +75,14 @@ ENTRY(cpu_v7_do_idle)
 ENDPROC(cpu_v7_do_idle)
 
 ENTRY(cpu_v7_dcache_clean_area)
-	ALT_SMP(mov	pc, lr)			@ MP extensions imply L1 PTW
-	ALT_UP(W(nop))
-	dcache_line_size r2, r3
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	ALT_SMP(W(nop))			@ MP extensions imply L1 PTW
+	ALT_UP_B(1f)
+	mov	pc, lr
+1:	dcache_line_size r2, r3
+2:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	add	r0, r0, r2
 	subs	r1, r1, r2
-	bhi	1b
+	bhi	2b
 	dsb
 	mov	pc, lr
 ENDPROC(cpu_v7_dcache_clean_area)
diff --git a/arch/arm/plat-samsung/include/plat/clock.h b/arch/arm/plat-samsung/include/plat/clock.h
index a62753dc15b..df45d6edc98 100644
--- a/arch/arm/plat-samsung/include/plat/clock.h
+++ b/arch/arm/plat-samsung/include/plat/clock.h
@@ -83,6 +83,11 @@ extern struct clk clk_ext;
 extern struct clksrc_clk clk_epllref;
 extern struct clksrc_clk clk_esysclk;
 
+/* S3C24XX UART clocks */
+extern struct clk s3c24xx_clk_uart0;
+extern struct clk s3c24xx_clk_uart1;
+extern struct clk s3c24xx_clk_uart2;
+
 /* S3C64XX specific clocks */
 extern struct clk clk_h2;
 extern struct clk clk_27m;
diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S
index b178d44e9ea..2677bc3762d 100644
--- a/arch/arm/plat-versatile/headsmp.S
+++ b/arch/arm/plat-versatile/headsmp.S
@@ -11,8 +11,6 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 
-	__INIT
-
 /*
  * Realview/Versatile Express specific entry point for secondary CPUs.
  * This provides a "holding pen" into which all secondary cores are held
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 13609e01f4b..81edd31bb4a 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -170,6 +170,7 @@ static void __init xen_percpu_init(void *unused)
 	per_cpu(xen_vcpu, cpu) = vcpup;
 
 	enable_percpu_irq(xen_events_irq, 0);
+	put_cpu();
 }
 
 static void xen_restart(char str, const char *cmd)
@@ -272,12 +273,15 @@ core_initcall(xen_guest_init);
 
 static int __init xen_pm_init(void)
 {
+	if (!xen_domain())
+		return -ENODEV;
+
 	pm_power_off = xen_power_off;
 	arm_pm_restart = xen_restart;
 
 	return 0;
 }
-subsys_initcall(xen_pm_init);
+late_initcall(xen_pm_init);
 
 static irqreturn_t xen_arm_callback(int irq, void *arg)
 {