12 files changed, 961 insertions, 29 deletions
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 52d315b792c8..7b2ebc29ce3a 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -1,5 +1,6 @@
 config ARCH_VEXPRESS
 	bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7
+	select ARCH_HAS_CPUFREQ
 	select ARCH_REQUIRE_GPIOLIB
 	select ARM_AMBA
 	select ARM_GIC
@@ -52,4 +53,21 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA
 config ARCH_VEXPRESS_CA9X4
 	bool "Versatile Express Cortex-A9x4 tile"
 
+config ARCH_VEXPRESS_DCSCB
+	bool "Dual Cluster System Control Block (DCSCB) support"
+	depends on MCPM
+	select ARM_CCI
+	help
+	  Support for the Dual Cluster System Configuration Block (DCSCB).
+	  This is needed to provide CPU and cluster power management
+	  on RTSM.
+	  
+config ARCH_VEXPRESS_TC2
+	bool "TC2 cluster management"
+	depends on MCPM
+	select ARM_SPC
+	select ARM_CCI
+	help
+	  Support for CPU and cluster power management on TC2.
+
 endmenu
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 80b64971fbdd..b4117dbfe85a 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -6,5 +6,13 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
 
 obj-y					:= v2m.o reset.o
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
+obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)	+= dcscb.o	dcscb_setup.o
+CFLAGS_REMOVE_dcscb.o			= -pg
+obj-$(CONFIG_ARCH_VEXPRESS_TC2)		+= tc2_pm.o tc2_pm_setup.o
+CFLAGS_REMOVE_tc2_pm.o			= -pg
+ifeq ($(CONFIG_ARCH_VEXPRESS_TC2),y)
+obj-$(CONFIG_ARM_PSCI)			+= tc2_pm_psci.o
+CFLAGS_REMOVE_tc2_pm_psci.o		= -pg
+endif
 obj-$(CONFIG_SMP)			+= platsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h
index f134cd4a85f1..bde4374ab6d5 100644
--- a/arch/arm/mach-vexpress/core.h
+++ b/arch/arm/mach-vexpress/core.h
@@ -6,6 +6,8 @@
 
 void vexpress_dt_smp_map_io(void);
 
+bool vexpress_smp_init_ops(void);
+
 extern struct smp_operations	vexpress_smp_ops;
 
 extern void vexpress_cpu_die(unsigned int cpu);
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index 60838ddb8564..6f34497a4245 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -10,10 +10,10 @@
 #include <linux/amba/clcd.h>
 #include <linux/clkdev.h>
 #include <linux/vexpress.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/hardware/arm_timer.h>
 #include <asm/hardware/cache-l2x0.h>
-#include <asm/hardware/gic.h>
 #include <asm/smp_scu.h>
 #include <asm/smp_twd.h>
 
@@ -182,8 +182,6 @@ static void __init ct_ca9x4_init_cpu_map(void)
 
 	for (i = 0; i < ncores; ++i)
 		set_cpu_possible(i, true);
-
-	set_smp_cross_call(gic_raise_softirq);
 }
 
 static void __init ct_ca9x4_smp_enable(unsigned int max_cpus)
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
new file mode 100644
index 000000000000..0dc3caca227b
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -0,0 +1,256 @@
+/*
+ * arch/arm/mach-vexpress/dcscb.c - Dual Cluster System Configuration Block
+ *
+ * Created by:	Nicolas Pitre, May 2012
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/of_address.h>
+#include <linux/vexpress.h>
+#include <linux/arm-cci.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/psci.h>
+
+
+#define RST_HOLD0	0x0
+#define RST_HOLD1	0x4
+#define SYS_SWRESET	0x8
+#define RST_STAT0	0xc
+#define RST_STAT1	0x10
+#define EAG_CFG_R	0x20
+#define EAG_CFG_W	0x24
+#define KFC_CFG_R	0x28
+#define KFC_CFG_W	0x2c
+#define DCS_CFG_R	0x30
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() after its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+static void __iomem *dcscb_base;
+static int dcscb_use_count[4][2];
+static int dcscb_mcpm_cpu_mask[2];
+
+static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int rst_hold, cpumask = (1 << cpu);
+	unsigned int mcpm_mask = dcscb_mcpm_cpu_mask[cluster];
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	if (cpu >= 4 || cluster >= 2)
+		return -EINVAL;
+
+	/*
+	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+	 * variant exists, we need to disable IRQs manually here.
+	 */
+	local_irq_disable();
+	arch_spin_lock(&dcscb_lock);
+
+	dcscb_use_count[cpu][cluster]++;
+	if (dcscb_use_count[cpu][cluster] == 1) {
+		rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+		if (rst_hold & (1 << 8)) {
+			/* remove cluster reset and add individual CPU's reset */
+			rst_hold &= ~(1 << 8);
+			rst_hold |= mcpm_mask;
+		}
+		rst_hold &= ~(cpumask | (cpumask << 4));
+		writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+	} else if (dcscb_use_count[cpu][cluster] != 2) {
+		/*
+		 * The only possible values are:
+		 * 0 = CPU down
+		 * 1 = CPU (still) up
+		 * 2 = CPU requested to be up before it had a chance
+		 *     to actually make itself down.
+		 * Any other value is a bug.
+		 */
+		BUG();
+	}
+
+	arch_spin_unlock(&dcscb_lock);
+	local_irq_enable();
+
+	return 0;
+}
+
+static void dcscb_power_down(void)
+{
+	unsigned int mpidr, cpu, cluster, rst_hold, cpumask, mcpm_mask;
+	bool last_man = false, skip_wfi = false;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	cpumask = (1 << cpu);
+	mcpm_mask = dcscb_mcpm_cpu_mask[cluster];
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= 4 || cluster >= 2);
+
+	__mcpm_cpu_going_down(cpu, cluster);
+
+	arch_spin_lock(&dcscb_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+	dcscb_use_count[cpu][cluster]--;
+	if (dcscb_use_count[cpu][cluster] == 0) {
+		rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+		rst_hold |= cpumask;
+		if (((rst_hold | (rst_hold >> 4)) & mcpm_mask) == mcpm_mask) {
+			rst_hold |= (1 << 8);
+			last_man = true;
+		}
+		writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+	} else if (dcscb_use_count[cpu][cluster] == 1) {
+		/*
+		 * A power_up request went ahead of us.
+		 * Even if we do not want to shut this CPU down,
+		 * the caller expects a certain state as if the WFI
+		 * was aborted.  So let's continue with cache cleaning.
+		 */
+		skip_wfi = true;
+	} else
+		BUG();
+
+	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+		arch_spin_unlock(&dcscb_lock);
+
+		/*
+		 * Flush all cache levels for this cluster.
+		 *
+		 * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need
+		 * a preliminary flush here for those CPUs.  At least, that's
+		 * the theory -- without the extra flush, Linux explodes on
+		 * RTSM (maybe not needed anymore, to be investigated).
+		 */
+		flush_cache_all();
+		set_cr(get_cr() & ~CR_C);
+		flush_cache_all();
+
+		/*
+		 * This is a harmless no-op.  On platforms with a real
+		 * outer cache this might either be needed or not,
+		 * depending on where the outer cache sits.
+		 */
+		outer_flush_all();
+
+		/* Disable local coherency by clearing the ACTLR "SMP" bit: */
+		set_auxcr(get_auxcr() & ~(1 << 6));
+
+		/*
+		 * Disable cluster-level coherency by masking
+		 * incoming snoops and DVM messages:
+		 */
+		disable_cci(cluster);
+
+		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	} else {
+		arch_spin_unlock(&dcscb_lock);
+
+		/*
+		 * Flush the local CPU cache.
+		 *
+		 * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need
+		 * a preliminary flush here for those CPUs.  At least, that's
+		 * the theory -- without the extra flush, Linux explodes on
+		 * RTSM (maybe not needed anymore, to be investigated).
+		 */
+		flush_cache_louis();
+		set_cr(get_cr() & ~CR_C);
+		flush_cache_louis();
+
+		/* Disable local coherency by clearing the ACTLR "SMP" bit: */
+		set_auxcr(get_auxcr() & ~(1 << 6));
+	}
+
+	__mcpm_cpu_down(cpu, cluster);
+
+	/* Now we are prepared for power-down, do it: */
+	if (!skip_wfi) {
+		dsb();
+		wfi();
+	}
+
+	/* Not dead at this point?  Let our caller cope. */
+}
+
+static const struct mcpm_platform_ops dcscb_power_ops = {
+	.power_up	= dcscb_power_up,
+	.power_down	= dcscb_power_down,
+};
+
+static void __init dcscb_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= 4 || cluster >= 2);
+	dcscb_use_count[cpu][cluster] = 1;
+}
+
+extern void dcscb_power_up_setup(unsigned int affinity_level);
+
+static int __init dcscb_init(void)
+{
+	struct device_node *node;
+	unsigned int cfg;
+	int ret;
+
+	ret = psci_probe();
+	if (!ret) {
+		pr_debug("psci found. Aborting native init\n");
+		return -ENODEV;
+	}
+
+	node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb");
+	if (!node)
+		return -ENODEV;
+	dcscb_base= of_iomap(node, 0);
+	if (!dcscb_base)
+		return -EADDRNOTAVAIL;
+	cfg = readl_relaxed(dcscb_base + DCS_CFG_R);
+	dcscb_mcpm_cpu_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1;
+	dcscb_mcpm_cpu_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1;
+	dcscb_usage_count_init();
+
+	ret = mcpm_platform_register(&dcscb_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(dcscb_power_up_setup);
+	if (ret) {
+		iounmap(dcscb_base);
+		return ret;
+	}
+
+	/*
+	 * Future entries into the kernel can now go
+	 * through the cluster entry vectors.
+	 */
+	vexpress_flags_set(virt_to_phys(mcpm_entry_point));
+
+	return 0;
+}
+
+early_initcall(dcscb_init);
diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S
new file mode 100644
index 000000000000..93bd13f458aa
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb_setup.S
@@ -0,0 +1,80 @@
+/*
+ * arch/arm/include/asm/dcscb_setup.S
+ *
+ * Created by:  Dave Martin, 2012-06-22
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/linkage.h>
+#include <asm/mcpm.h>
+
+
+#define SLAVE_SNOOPCTL_OFFSET	0
+#define SNOOPCTL_SNOOP_ENABLE	(1 << 0)
+#define SNOOPCTL_DVM_ENABLE	(1 << 1)
+
+#define CCI_STATUS_OFFSET	0xc
+#define STATUS_CHANGE_PENDING	(1 << 0)
+
+#define CCI_SLAVE_OFFSET(n)	(0x1000 + 0x1000 * (n))
+
+#define RTSM_CCI_PHYS_BASE	0x2c090000
+#define RTSM_CCI_SLAVE_A15	3
+#define RTSM_CCI_SLAVE_A7	4
+
+#define RTSM_CCI_A15_OFFSET	CCI_SLAVE_OFFSET(RTSM_CCI_SLAVE_A15)
+#define RTSM_CCI_A7_OFFSET	CCI_SLAVE_OFFSET(RTSM_CCI_SLAVE_A7)
+
+
+ENTRY(dcscb_power_up_setup)
+
+	cmp	r0, #0			@ check affinity level
+	beq	2f
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ * The ACTLR SMP bit does not need to be set here, because cpu_resume()
+ * already restores that.
+ */
+
+	mrc	p15, 0, r0, c0, c0, 5	@ MPIDR
+	ubfx	r0, r0, #8, #4		@ cluster
+
+	@ A15/A7 may not require explicit L2 invalidation on reset, dependent
+	@ on hardware integration desicions.
+	@ For now, this code assumes that L2 is either already invalidated, or
+	@ invalidation is not required.
+
+	ldr	r3, =RTSM_CCI_PHYS_BASE + RTSM_CCI_A15_OFFSET
+	cmp	r0, #0		@ A15 cluster?
+	addne	r3, r3, #RTSM_CCI_A7_OFFSET - RTSM_CCI_A15_OFFSET
+
+	@ r3 now points to the correct CCI slave register block
+
+	ldr	r0, [r3, #SLAVE_SNOOPCTL_OFFSET]
+	orr	r0, r0, #SNOOPCTL_SNOOP_ENABLE | SNOOPCTL_DVM_ENABLE
+	str	r0, [r3, #SLAVE_SNOOPCTL_OFFSET]	@ enable CCI snoops
+
+	@ Wait for snoop control change to complete:
+
+	ldr	r3, =RTSM_CCI_PHYS_BASE
+
+1:	ldr	r0, [r3, #CCI_STATUS_OFFSET]
+	tst	r0, #STATUS_CHANGE_PENDING
+	bne	1b
+
+	dsb		@ Synchronise side-effects of enabling CCI
+
+	bx	lr
+
+2:	@ Implementation-specific local CPU setup operations should go here,
+	@ if any.  In this case, there is nothing to do.
+
+	bx	lr
+
+ENDPROC(dcscb_power_up_setup)
diff --git a/arch/arm/mach-vexpress/include/mach/tc2.h b/arch/arm/mach-vexpress/include/mach/tc2.h
new file mode 100644
index 000000000000..d3b5a2225a0e
--- /dev/null
+++ b/arch/arm/mach-vexpress/include/mach/tc2.h
@@ -0,0 +1,10 @@
+#ifndef __MACH_TC2_H
+#define __MACH_TC2_H
+
+/*
+ * cpu and cluster limits
+ */
+#define TC2_MAX_CPUS		3
+#define TC2_MAX_CLUSTERS	2
+
+#endif
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index c5d70de9bb4e..21368ba6ca2f 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -12,11 +12,12 @@
 #include <linux/errno.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/vexpress.h>
 
+#include <asm/mcpm.h>
 #include <asm/smp_scu.h>
-#include <asm/hardware/gic.h>
 #include <asm/mach/map.h>
 
 #include <mach/motherboard.h>
@@ -128,8 +129,6 @@ static void __init vexpress_dt_smp_init_cpus(void)
 
 	for (i = 0; i < ncores; ++i)
 		set_cpu_possible(i, true);
-
-	set_smp_cross_call(gic_raise_softirq);
 }
 
 static void __init vexpress_dt_smp_prepare_cpus(unsigned int max_cpus)
@@ -206,3 +205,14 @@ struct smp_operations __initdata vexpress_smp_ops = {
 	.cpu_die		= vexpress_cpu_die,
 #endif
 };
+
+bool __init vexpress_smp_init_ops(void)
+{
+#ifdef CONFIG_MCPM
+	if(of_find_compatible_node(NULL, NULL, "arm,cci")) {
+		mcpm_smp_set_ops();
+		return true;
+	}
+#endif
+	return false;
+}
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
new file mode 100644
index 000000000000..f2e9959fb26c
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -0,0 +1,271 @@
+/*
+ * arch/arm/mach-vexpress/tc2_pm.c - TC2 power management support
+ *
+ * Created by:	Nicolas Pitre, October 2012
+ * Copyright:	(C) 2012  Linaro Limited
+ *
+ * Some portions of this file were originally written by Achin Gupta
+ * Copyright:   (C) 2012  ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/psci.h>
+
+#include <mach/motherboard.h>
+#include <mach/tc2.h>
+
+#include <linux/vexpress.h>
+#include <linux/arm-cci.h>
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() after its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t tc2_pm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+static int tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS];
+
+static int tc2_pm_power_up(unsigned int cpu, unsigned int cluster)
+{
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	if (cluster >= TC2_MAX_CLUSTERS ||
+	    cpu >= vexpress_spc_get_nb_cpus(cluster))
+		return -EINVAL;
+
+	/*
+	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+	 * variant exists, we need to disable IRQs manually here.
+	 */
+	local_irq_disable();
+	arch_spin_lock(&tc2_pm_lock);
+
+	if (!tc2_pm_use_count[0][cluster] &&
+	    !tc2_pm_use_count[1][cluster] &&
+	    !tc2_pm_use_count[2][cluster])
+		vexpress_spc_powerdown_enable(cluster, 0);
+
+	tc2_pm_use_count[cpu][cluster]++;
+	if (tc2_pm_use_count[cpu][cluster] == 1) {
+		vexpress_spc_write_bxaddr_reg(cluster, cpu,
+					      virt_to_phys(mcpm_entry_point));
+		vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1);
+	} else if (tc2_pm_use_count[cpu][cluster] != 2) {
+		/*
+		 * The only possible values are:
+		 * 0 = CPU down
+		 * 1 = CPU (still) up
+		 * 2 = CPU requested to be up before it had a chance
+		 *     to actually make itself down.
+		 * Any other value is a bug.
+		 */
+		BUG();
+	}
+
+	arch_spin_unlock(&tc2_pm_lock);
+	local_irq_enable();
+
+	return 0;
+}
+
+static void tc2_pm_down(u64 residency)
+{
+	unsigned int mpidr, cpu, cluster;
+	bool last_man = false, skip_wfi = false;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	__mcpm_cpu_going_down(cpu, cluster);
+
+	arch_spin_lock(&tc2_pm_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+	tc2_pm_use_count[cpu][cluster]--;
+	if (tc2_pm_use_count[cpu][cluster] == 0) {
+		vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1);
+		if (!tc2_pm_use_count[0][cluster] &&
+		    !tc2_pm_use_count[1][cluster] &&
+		    !tc2_pm_use_count[2][cluster] &&
+		    (!residency || residency > 5000)) {
+			vexpress_spc_powerdown_enable(cluster, 1);
+			vexpress_spc_set_global_wakeup_intr(1);
+			last_man = true;
+		}
+	} else if (tc2_pm_use_count[cpu][cluster] == 1) {
+		/*
+		 * A power_up request went ahead of us.
+		 * Even if we do not want to shut this CPU down,
+		 * the caller expects a certain state as if the WFI
+		 * was aborted.  So let's continue with cache cleaning.
+		 */
+		skip_wfi = true;
+	} else
+		BUG();
+
+	gic_cpu_if_down();
+
+	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+		arch_spin_unlock(&tc2_pm_lock);
+
+		set_cr(get_cr() & ~CR_C);
+		flush_cache_all();
+		asm volatile ("clrex");
+		set_auxcr(get_auxcr() & ~(1 << 6));
+
+		disable_cci(cluster);
+
+		/*
+		 * Ensure that both C & I bits are disabled in the SCTLR
+		 * before disabling ACE snoops. This ensures that no
+		 * coherency traffic will originate from this cpu after
+		 * ACE snoops are turned off.
+		 */
+		cpu_proc_fin();
+
+		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	} else {
+		/*
+		 * If last man then undo any setup done previously.
+		 */
+		if (last_man) {
+			vexpress_spc_powerdown_enable(cluster, 0);
+			vexpress_spc_set_global_wakeup_intr(0);
+		}
+
+		arch_spin_unlock(&tc2_pm_lock);
+
+		set_cr(get_cr() & ~CR_C);
+		flush_cache_louis();
+		asm volatile ("clrex");
+		set_auxcr(get_auxcr() & ~(1 << 6));
+	}
+
+	__mcpm_cpu_down(cpu, cluster);
+
+	/* Now we are prepared for power-down, do it: */
+	if (!skip_wfi)
+		wfi();
+
+	/* Not dead at this point?  Let our caller cope. */
+}
+
+static void tc2_pm_power_down(void)
+{
+	tc2_pm_down(0);
+}
+
+static void tc2_pm_suspend(u64 residency)
+{
+	extern void tc2_resume(void);
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	vexpress_spc_write_bxaddr_reg(cluster, cpu,
+				      virt_to_phys(tc2_resume));
+
+	tc2_pm_down(residency);
+}
+
+static void tc2_pm_powered_up(void)
+{
+	unsigned int mpidr, cpu, cluster;
+	unsigned long flags;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	local_irq_save(flags);
+	arch_spin_lock(&tc2_pm_lock);
+
+	if (!tc2_pm_use_count[0][cluster] &&
+	    !tc2_pm_use_count[1][cluster] &&
+	    !tc2_pm_use_count[2][cluster]) {
+		vexpress_spc_powerdown_enable(cluster, 0);
+		vexpress_spc_set_global_wakeup_intr(0);
+	}
+
+	if (!tc2_pm_use_count[cpu][cluster])
+		tc2_pm_use_count[cpu][cluster] = 1;
+
+	vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 0);
+	vexpress_spc_write_bxaddr_reg(cluster, cpu, 0);
+
+	arch_spin_unlock(&tc2_pm_lock);
+	local_irq_restore(flags);
+}
+
+static const struct mcpm_platform_ops tc2_pm_power_ops = {
+	.power_up	= tc2_pm_power_up,
+	.power_down	= tc2_pm_power_down,
+	.suspend	= tc2_pm_suspend,
+	.powered_up	= tc2_pm_powered_up,
+};
+
+static void __init tc2_pm_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	tc2_pm_use_count[cpu][cluster] = 1;
+}
+
+extern void tc2_pm_power_up_setup(unsigned int affinity_level);
+
+static int __init tc2_pm_init(void)
+{
+	int ret;
+
+	ret = psci_probe();
+	if (!ret) {
+		pr_debug("psci found. Aborting native init\n");
+		return -ENODEV;
+	}
+
+	if (!vexpress_spc_check_loaded())
+		return -ENODEV;
+
+	tc2_pm_usage_count_init();
+
+	ret = mcpm_platform_register(&tc2_pm_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(tc2_pm_power_up_setup);
+	if (!ret)
+		pr_info("TC2 power management initialized\n");
+	return ret;
+}
+
+early_initcall(tc2_pm_init);
diff --git a/arch/arm/mach-vexpress/tc2_pm_psci.c b/arch/arm/mach-vexpress/tc2_pm_psci.c
new file mode 100644
index 000000000000..5a5e4f568497
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm_psci.c
@@ -0,0 +1,168 @@
+/*
+ * arch/arm/mach-vexpress/tc2_pm_psci.c - TC2 PSCI support
+ *
+ * Created by: Achin Gupta, December 2012
+ * Copyright:  (C) 2012  ARM Limited
+ *
+ * Some portions of this file were originally written by Nicolas Pitre
+ * Copyright:   (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/psci.h>
+#include <asm/atomic.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+
+#include <mach/motherboard.h>
+#include <mach/tc2.h>
+
+#include <linux/vexpress.h>
+
+/*
+ * Platform specific state id understood by the firmware and used to
+ * program the power controller
+ */
+#define PSCI_POWER_STATE_ID           0
+
+static atomic_t tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS];
+
+static int tc2_pm_psci_power_up(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int mpidr = (cluster << 8) | cpu;
+	int ret = 0;
+
+	BUG_ON(!psci_ops.cpu_on);
+
+	switch (atomic_inc_return(&tc2_pm_use_count[cpu][cluster])) {
+	case 1:
+		/*
+		 * This is a request to power up a cpu that linux thinks has
+		 * been powered down. Retries are needed if the firmware has
+		 * seen the power down request as yet.
+		 */
+		do
+			ret = psci_ops.cpu_on(mpidr,
+					      virt_to_phys(mcpm_entry_point));
+		while (ret == -EAGAIN);
+
+		return ret;
+	case 2:
+		/* This power up request has overtaken a power down request */
+		return ret;
+	default:
+		/* Any other value is a bug */
+		BUG();
+	}
+}
+
+static void tc2_pm_psci_power_down(void)
+{
+	struct psci_power_state power_state;
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	BUG_ON(!psci_ops.cpu_off);
+
+	switch (atomic_dec_return(&tc2_pm_use_count[cpu][cluster])) {
+	case 1:
+		/*
+		 * Overtaken by a power up. Flush caches, exit coherency,
+		 * return & fake a reset
+		 */
+		set_cr(get_cr() & ~CR_C);
+
+		flush_cache_louis();
+
+		asm volatile ("clrex");
+		set_auxcr(get_auxcr() & ~(1 << 6));
+
+		return;
+	case 0:
+		/* A normal request to possibly power down the cluster */
+		power_state.id = PSCI_POWER_STATE_ID;
+		power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN;
+		power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1;
+
+		psci_ops.cpu_off(power_state);
+
+		/* On success this function never returns */
+	default:
+		/* Any other value is a bug */
+		BUG();
+	}
+}
+
+static void tc2_pm_psci_suspend(u64 unused)
+{
+	struct psci_power_state power_state;
+
+	BUG_ON(!psci_ops.cpu_suspend);
+
+	/* On TC2 always attempt to power down the cluster */
+	power_state.id = PSCI_POWER_STATE_ID;
+	power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN;
+	power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1;
+
+	psci_ops.cpu_suspend(power_state, virt_to_phys(mcpm_entry_point));
+
+	/* On success this function never returns */
+	BUG();
+}
+
+static const struct mcpm_platform_ops tc2_pm_power_ops = {
+	.power_up      = tc2_pm_psci_power_up,
+	.power_down    = tc2_pm_psci_power_down,
+	.suspend       = tc2_pm_psci_suspend,
+};
+
+static void __init tc2_pm_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+	       cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+	atomic_set(&tc2_pm_use_count[cpu][cluster], 1);
+}
+
+static int __init tc2_pm_psci_init(void)
+{
+	int ret;
+
+	ret = psci_probe();
+	if (ret) {
+		pr_debug("psci not found. Aborting psci init\n");
+		return -ENODEV;
+	}
+
+	tc2_pm_usage_count_init();
+
+	ret = mcpm_platform_register(&tc2_pm_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(NULL);
+	if (!ret)
+		pr_info("TC2 power management initialized\n");
+	return ret;
+}
+
+early_initcall(tc2_pm_psci_init);
diff --git a/arch/arm/mach-vexpress/tc2_pm_setup.S b/arch/arm/mach-vexpress/tc2_pm_setup.S
new file mode 100644
index 000000000000..4728f83731a4
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm_setup.S
@@ -0,0 +1,102 @@
+/*
+ * arch/arm/include/asm/tc2_pm_setup.S
+ *
+ * Created by: Nicolas Pitre, October 2012
+ (             (based on dcscb_setup.S by Dave Martin)
+ * Copyright:  (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/linkage.h>
+#include <asm/mcpm.h>
+
+
+#define SPC_PHYS_BASE		0x7FFF0000
+#define SPC_WAKE_INT_STAT	0xb2c
+
+#define SNOOP_CTL_A15		0x404
+#define SNOOP_CTL_A7		0x504
+
+#define A15_SNOOP_MASK		(0x3 << 7)
+#define A7_SNOOP_MASK		(0x1 << 13)
+
+#define A15_BX_ADDR0		0xB68
+
+
+#define CCI_PHYS_BASE		0x2c090000
+
+#define SLAVE_SNOOPCTL_OFFSET	0
+#define SNOOPCTL_SNOOP_ENABLE	(1 << 0)
+#define SNOOPCTL_DVM_ENABLE	(1 << 1)
+
+#define CCI_STATUS_OFFSET	0xc
+#define STATUS_CHANGE_PENDING	(1 << 0)
+
+#define CCI_SLAVE_OFFSET(n)	(0x1000 + 0x1000 * (n))
+#define CCI_SLAVE_A15		3
+#define CCI_SLAVE_A7		4
+#define CCI_A15_OFFSET		CCI_SLAVE_OFFSET(CCI_SLAVE_A15)
+#define CCI_A7_OFFSET		CCI_SLAVE_OFFSET(CCI_SLAVE_A7)
+
+
+ENTRY(tc2_resume)
+	mrc	p15, 0, r0, c0, c0, 5
+	ubfx	r1, r0, #0, #4		@ r1 = cpu
+	ubfx	r2, r0, #8, #4		@ r2 = cluster
+	add	r1, r1, r2, lsl #2	@ r1 = index of CPU in WAKE_INT_STAT
+	ldr	r3, =SPC_PHYS_BASE + SPC_WAKE_INT_STAT
+	ldr	r3, [r3]
+	lsr	r3, r1
+	tst	r3, #1
+	wfieq				@ if no pending IRQ reenters wfi
+	b	mcpm_entry_point
+ENDPROC(tc2_resume)
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ * The ACTLR SMP bit does not need to be set here, because cpu_resume()
+ * already restores that.
+ */
+
+ENTRY(tc2_pm_power_up_setup)
+
+	cmp	r0, #0
+	beq	2f
+
+	@ Enable CCI snoops
+	mrc	p15, 0, r0, c0, c0, 5	@ MPIDR
+	ubfx	r0, r0, #8, #4		@ cluster
+	ldr	r3, =CCI_PHYS_BASE + CCI_A15_OFFSET
+	cmp	r0, #0		@ A15 cluster?
+	addne	r3, r3, #CCI_A7_OFFSET - CCI_A15_OFFSET
+
+	@ r3 now points to the correct CCI slave register block
+	ldr	r0, [r3, #SLAVE_SNOOPCTL_OFFSET]
+	orr	r0, r0, #SNOOPCTL_SNOOP_ENABLE | SNOOPCTL_DVM_ENABLE
+	str	r0, [r3, #SLAVE_SNOOPCTL_OFFSET]	@ enable CCI snoops
+
+	@ Wait for snoop control change to complete:
+	ldr	r3, =CCI_PHYS_BASE
+1:	ldr	r0, [r3, #CCI_STATUS_OFFSET]
+	tst	r0, #STATUS_CHANGE_PENDING
+	bne	1b
+
+	bx	lr
+
+2:	@ Clear the BX addr register
+	ldr	r3, =SPC_PHYS_BASE + A15_BX_ADDR0
+	mrc	p15, 0, r0, c0, c0, 5	@ MPIDR
+	ubfx	r1, r0, #8, #4		@ cluster
+	ubfx	r0, r0, #0, #4		@ cpu
+	add	r3, r3, r1, lsl #4
+	mov	r1, #0
+	str	r1, [r3, r0, lsl #2]
+	dsb
+
+	bx	lr
+
+ENDPROC(tc2_pm_power_up_setup)
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 011661a6c5cb..772b7a179dde 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -7,6 +7,8 @@
 #include <linux/io.h>
 #include <linux/smp.h>
 #include <linux/init.h>
+#include <linux/irqchip.h>
+#include <linux/memblock.h>
 #include <linux/of_address.h>
 #include <linux/of_fdt.h>
 #include <linux/of_irq.h>
@@ -30,7 +32,6 @@
 #include <asm/mach/time.h>
 #include <asm/hardware/arm_timer.h>
 #include <asm/hardware/cache-l2x0.h>
-#include <asm/hardware/gic.h>
 #include <asm/hardware/timer-sp.h>
 
 #include <mach/ct-ca9x4.h>
@@ -291,10 +292,6 @@ static void __init v2m_timer_init(void)
 	v2m_sp804_init(ioremap(V2M_TIMER01, SZ_4K), IRQ_V2M_TIMER0);
 }
 
-static struct sys_timer v2m_timer = {
-	.init	= v2m_timer_init,
-};
-
 static void __init v2m_init_early(void)
 {
 	if (ct_desc->init_early)
@@ -376,12 +373,36 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express")
 	.map_io		= v2m_map_io,
 	.init_early	= v2m_init_early,
 	.init_irq	= v2m_init_irq,
-	.timer		= &v2m_timer,
-	.handle_irq	= gic_handle_irq,
+	.init_time	= v2m_timer_init,
 	.init_machine	= v2m_init,
 	.restart	= vexpress_restart,
 MACHINE_END
 
+static void __init v2m_dt_hdlcd_init(void)
+{
+	struct device_node *node;
+	int len, na, ns;
+	const __be32 *prop;
+	phys_addr_t fb_base, fb_size;
+
+	node = of_find_compatible_node(NULL, NULL, "arm,hdlcd");
+	if (!node)
+		return;
+
+	na = of_n_addr_cells(node);
+	ns = of_n_size_cells(node);
+
+	prop = of_get_property(node, "framebuffer", &len);
+	if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop)))
+		return;
+
+	fb_base = of_read_number(prop, na);
+	fb_size = of_read_number(prop + na, ns);
+
+	if (WARN_ON(memblock_remove(fb_base, fb_size)))
+		return;
+};
+
 static struct map_desc v2m_rs1_io_desc __initdata = {
 	.virtual	= V2M_PERIPH,
 	.pfn		= __phys_to_pfn(0x1c000000),
@@ -432,16 +453,8 @@ void __init v2m_dt_init_early(void)
 			pr_warning("vexpress: DT HBI (%x) is not matching "
 					"hardware (%x)!\n", dt_hbi, hbi);
 	}
-}
-
-static  struct of_device_id vexpress_irq_match[] __initdata = {
-	{ .compatible = "arm,cortex-a9-gic", .data = gic_of_init, },
-	{}
-};
 
-static void __init v2m_dt_init_irq(void)
-{
-	of_irq_init(vexpress_irq_match);
+	v2m_dt_hdlcd_init();
 }
 
 static void __init v2m_dt_timer_init(void)
@@ -468,10 +481,6 @@ static void __init v2m_dt_timer_init(void)
 				24000000);
 }
 
-static struct sys_timer v2m_dt_timer = {
-	.init = v2m_dt_timer_init,
-};
-
 static const struct of_device_id v2m_dt_bus_match[] __initconst = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "arm,amba-bus", },
@@ -495,11 +504,11 @@ static const char * const v2m_dt_match[] __initconst = {
 DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
 	.dt_compat	= v2m_dt_match,
 	.smp		= smp_ops(vexpress_smp_ops),
+	.smp_init	= smp_init_ops(vexpress_smp_init_ops),
 	.map_io		= v2m_dt_map_io,
 	.init_early	= v2m_dt_init_early,
-	.init_irq	= v2m_dt_init_irq,
-	.timer		= &v2m_dt_timer,
+	.init_irq	= irqchip_init,
+	.init_time	= v2m_dt_timer_init,
 	.init_machine	= v2m_dt_init,
-	.handle_irq	= gic_handle_irq,
 	.restart	= vexpress_restart,
 MACHINE_END