7 files changed, 718 insertions, 6 deletions
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index dc8dd0de5c0..bd48ab52544 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -11,3 +11,5 @@ obj-$(CONFIG_SHARP_PARAM)	+= sharpsl_param.o
 obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
 obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
+obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
+CFLAGS_REMOVE_mcpm_entry.o	= -pg
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
new file mode 100644
index 00000000000..370236dd1a0
--- /dev/null
+++ b/arch/arm/common/mcpm_entry.c
@@ -0,0 +1,263 @@
+/*
+ * arch/arm/common/mcpm_entry.c -- entry point for multi-cluster PM
+ *
+ * Created by:  Nicolas Pitre, March 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+
+#include <asm/mcpm.h>
+#include <asm/cacheflush.h>
+#include <asm/idmap.h>
+#include <asm/cputype.h>
+
+extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
+
+void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
+{
+	unsigned long val = ptr ? virt_to_phys(ptr) : 0;
+	mcpm_entry_vectors[cluster][cpu] = val;
+	sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
+}
+
+static const struct mcpm_platform_ops *platform_ops;
+
+int __init mcpm_platform_register(const struct mcpm_platform_ops *ops)
+{
+	if (platform_ops)
+		return -EBUSY;
+	platform_ops = ops;
+	return 0;
+}
+
+int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster)
+{
+	if (!platform_ops)
+		return -EUNATCH; /* try not to shadow power_up errors */
+	might_sleep();
+	return platform_ops->power_up(cpu, cluster);
+}
+
+typedef void (*phys_reset_t)(unsigned long);
+
+void mcpm_cpu_power_down(void)
+{
+	phys_reset_t phys_reset;
+
+	BUG_ON(!platform_ops);
+	BUG_ON(!irqs_disabled());
+
+	/*
+	 * Do this before calling into the power_down method,
+	 * as it might not always be safe to do afterwards.
+	 */
+	setup_mm_for_reboot();
+
+	platform_ops->power_down();
+
+	/*
+	 * It is possible for a power_up request to happen concurrently
+	 * with a power_down request for the same CPU. In this case the
+	 * power_down method might not be able to actually enter a
+	 * powered down state with the WFI instruction if the power_up
+	 * method has removed the required reset condition.  The
+	 * power_down method is then allowed to return. We must perform
+	 * a re-entry in the kernel as if the power_up method just had
+	 * deasserted reset on the CPU.
+	 *
+	 * To simplify race issues, the platform specific implementation
+	 * must accommodate for the possibility of unordered calls to
+	 * power_down and power_up with a usage count. Therefore, if a
+	 * call to power_up is issued for a CPU that is not down, then
+	 * the next call to power_down must not attempt a full shutdown
+	 * but only do the minimum (normally disabling L1 cache and CPU
+	 * coherency) and return just as if a concurrent power_up request
+	 * had happened as described above.
+	 */
+
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset(virt_to_phys(mcpm_entry_point));
+
+	/* should never get here */
+	BUG();
+}
+
+void mcpm_cpu_suspend(u64 expected_residency)
+{
+	phys_reset_t phys_reset;
+
+	BUG_ON(!platform_ops);
+	BUG_ON(!irqs_disabled());
+
+	/* Very similar to mcpm_cpu_power_down() */
+	setup_mm_for_reboot();
+	platform_ops->suspend(expected_residency);
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset(virt_to_phys(mcpm_entry_point));
+	BUG();
+}
+
+int mcpm_cpu_powered_up(void)
+{
+	if (!platform_ops)
+		return -EUNATCH;
+	if (platform_ops->powered_up)
+		platform_ops->powered_up();
+	return 0;
+}
+
+struct sync_struct mcpm_sync;
+
+/*
+ * __mcpm_cpu_going_down: Indicates that the cpu is being torn down.
+ *    This must be called at the point of committing to teardown of a CPU.
+ *    The CPU cache (SCTRL.C bit) is expected to still be active.
+ */
+void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster)
+{
+	mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
+}
+
+/*
+ * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the
+ *    cluster can be torn down without disrupting this CPU.
+ *    To avoid deadlocks, this must be called before a CPU is powered down.
+ *    The CPU cache (SCTRL.C bit) is expected to be off.
+ *    However L2 cache might or might not be active.
+ */
+void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster)
+{
+	dmb();
+	mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
+	dsb_sev();
+}
+
+/*
+ * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section.
+ * @state: the final state of the cluster:
+ *     CLUSTER_UP: no destructive teardown was done and the cluster has been
+ *         restored to the previous state (CPU cache still active); or
+ *     CLUSTER_DOWN: the cluster has been torn-down, ready for power-off
+ *         (CPU cache disabled, L2 cache either enabled or disabled).
+ */
+void __mcpm_outbound_leave_critical(unsigned int cluster, int state)
+{
+	dmb();
+	mcpm_sync.clusters[cluster].cluster = state;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cluster);
+	dsb_sev();
+}
+
+/*
+ * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section.
+ * This function should be called by the last man, after local CPU teardown
+ * is complete.  CPU cache expected to be active.
+ *
+ * Returns:
+ *     false: the critical section was not entered because an inbound CPU was
+ *         observed, or the cluster is already being set up;
+ *     true: the critical section was entered: it is now safe to tear down the
+ *         cluster.
+ */
+bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int i;
+	struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster];
+
+	/* Warn inbound CPUs that the cluster is being torn down: */
+	c->cluster = CLUSTER_GOING_DOWN;
+	sync_cache_w(&c->cluster);
+
+	/* Back out if the inbound cluster is already in the critical region: */
+	sync_cache_r(&c->inbound);
+	if (c->inbound == INBOUND_COMING_UP)
+		goto abort;
+
+	/*
+	 * Wait for all CPUs to get out of the GOING_DOWN state, so that local
+	 * teardown is complete on each CPU before tearing down the cluster.
+	 *
+	 * If any CPU has been woken up again from the DOWN state, then we
+	 * shouldn't be taking the cluster down at all: abort in that case.
+	 */
+	sync_cache_r(&c->cpus);
+	for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) {
+		int cpustate;
+
+		if (i == cpu)
+			continue;
+
+		while (1) {
+			cpustate = c->cpus[i].cpu;
+			if (cpustate != CPU_GOING_DOWN)
+				break;
+
+			wfe();
+			sync_cache_r(&c->cpus[i].cpu);
+		}
+
+		switch (cpustate) {
+		case CPU_DOWN:
+			continue;
+
+		default:
+			goto abort;
+		}
+	}
+
+	return true;
+
+abort:
+	__mcpm_outbound_leave_critical(cluster, CLUSTER_UP);
+	return false;
+}
+
+int __mcpm_cluster_state(unsigned int cluster)
+{
+	sync_cache_r(&mcpm_sync.clusters[cluster].cluster);
+	return mcpm_sync.clusters[cluster].cluster;
+}
+
+extern unsigned long mcpm_power_up_setup_phys;
+
+int __init mcpm_sync_init(
+	void (*power_up_setup)(unsigned int affinity_level))
+{
+	unsigned int i, j, mpidr, this_cluster;
+
+	BUILD_BUG_ON(MCPM_SYNC_CLUSTER_SIZE * MAX_NR_CLUSTERS != sizeof mcpm_sync);
+	BUG_ON((unsigned long)&mcpm_sync & (__CACHE_WRITEBACK_GRANULE - 1));
+
+	/*
+	 * Set initial CPU and cluster states.
+	 * Only one cluster is assumed to be active at this point.
+	 */
+	for (i = 0; i < MAX_NR_CLUSTERS; i++) {
+		mcpm_sync.clusters[i].cluster = CLUSTER_DOWN;
+		mcpm_sync.clusters[i].inbound = INBOUND_NOT_COMING_UP;
+		for (j = 0; j < MAX_CPUS_PER_CLUSTER; j++)
+			mcpm_sync.clusters[i].cpus[j].cpu = CPU_DOWN;
+	}
+	mpidr = read_cpuid_mpidr();
+	this_cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	for_each_online_cpu(i)
+		mcpm_sync.clusters[this_cluster].cpus[i].cpu = CPU_UP;
+	mcpm_sync.clusters[this_cluster].cluster = CLUSTER_UP;
+	sync_cache_w(&mcpm_sync);
+
+	if (power_up_setup) {
+		mcpm_power_up_setup_phys = virt_to_phys(power_up_setup);
+		sync_cache_w(&mcpm_power_up_setup_phys);
+	}
+
+	return 0;
+}
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
new file mode 100644
index 00000000000..8178705c4b2
--- /dev/null
+++ b/arch/arm/common/mcpm_head.S
@@ -0,0 +1,219 @@
+/*
+ * arch/arm/common/mcpm_head.S -- kernel entry point for multi-cluster PM
+ *
+ * Created by:  Nicolas Pitre, March 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *
+ * Refer to Documentation/arm/cluster-pm-race-avoidance.txt
+ * for details of the synchronisation algorithms used here.
+ */
+
+#include <linux/linkage.h>
+#include <asm/mcpm.h>
+
+#include "vlock.h"
+
+.if MCPM_SYNC_CLUSTER_CPUS
+.error "cpus must be the first member of struct mcpm_sync_struct"
+.endif
+
+	.macro	pr_dbg	string
+#if defined(CONFIG_DEBUG_LL) && defined(DEBUG)
+	b	1901f
+1902:	.asciz	"CPU"
+1903:	.asciz	" cluster"
+1904:	.asciz	": \string"
+	.align
+1901:	adr	r0, 1902b
+	bl	printascii
+	mov	r0, r9
+	bl	printhex8
+	adr	r0, 1903b
+	bl	printascii
+	mov	r0, r10
+	bl	printhex8
+	adr	r0, 1904b
+	bl	printascii
+#endif
+	.endm
+
+	.arm
+	.align
+
+ENTRY(mcpm_entry_point)
+
+ THUMB(	adr	r12, BSYM(1f)	)
+ THUMB(	bx	r12		)
+ THUMB(	.thumb			)
+1:
+	mrc	p15, 0, r0, c0, c0, 5		@ MPIDR
+	ubfx	r9, r0, #0, #8			@ r9 = cpu
+	ubfx	r10, r0, #8, #8			@ r10 = cluster
+	mov	r3, #MAX_CPUS_PER_CLUSTER
+	mla	r4, r3, r10, r9			@ r4 = canonical CPU index
+	cmp	r4, #(MAX_CPUS_PER_CLUSTER * MAX_NR_CLUSTERS)
+	blo	2f
+
+	/* We didn't expect this CPU.  Try to cheaply make it quiet. */
+1:	wfi
+	wfe
+	b	1b
+
+2:	pr_dbg	"kernel mcpm_entry_point\n"
+
+	/*
+	 * MMU is off so we need to get to various variables in a
+	 * position independent way.
+	 */
+	adr	r5, 3f
+	ldmia	r5, {r6, r7, r8, r11}
+	add	r6, r5, r6			@ r6 = mcpm_entry_vectors
+	ldr	r7, [r5, r7]			@ r7 = mcpm_power_up_setup_phys
+	add	r8, r5, r8			@ r8 = mcpm_sync
+	add	r11, r5, r11			@ r11 = first_man_locks
+
+	mov	r0, #MCPM_SYNC_CLUSTER_SIZE
+	mla	r8, r0, r10, r8			@ r8 = sync cluster base
+
+	@ Signal that this CPU is coming UP:
+	mov	r0, #CPU_COMING_UP
+	mov	r5, #MCPM_SYNC_CPU_SIZE
+	mla	r5, r9, r5, r8			@ r5 = sync cpu address
+	strb	r0, [r5]
+
+	@ At this point, the cluster cannot unexpectedly enter the GOING_DOWN
+	@ state, because there is at least one active CPU (this CPU).
+
+	mov	r0, #VLOCK_SIZE
+	mla	r11, r0, r10, r11		@ r11 = cluster first man lock
+	mov	r0, r11
+	mov	r1, r9				@ cpu
+	bl	vlock_trylock			@ implies DMB
+
+	cmp	r0, #0				@ failed to get the lock?
+	bne	mcpm_setup_wait		@ wait for cluster setup if so
+
+	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	cmp	r0, #CLUSTER_UP			@ cluster already up?
+	bne	mcpm_setup			@ if not, set up the cluster
+
+	@ Otherwise, release the first man lock and skip setup:
+	mov	r0, r11
+	bl	vlock_unlock
+	b	mcpm_setup_complete
+
+mcpm_setup:
+	@ Control dependency implies strb not observable before previous ldrb.
+
+	@ Signal that the cluster is being brought up:
+	mov	r0, #INBOUND_COMING_UP
+	strb	r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND]
+	dmb
+
+	@ Any CPU trying to take the cluster into CLUSTER_GOING_DOWN from this
+	@ point onwards will observe INBOUND_COMING_UP and abort.
+
+	@ Wait for any previously-pending cluster teardown operations to abort
+	@ or complete:
+mcpm_teardown_wait:
+	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	cmp	r0, #CLUSTER_GOING_DOWN
+	bne	first_man_setup
+	wfe
+	b	mcpm_teardown_wait
+
+first_man_setup:
+	dmb
+
+	@ If the outbound gave up before teardown started, skip cluster setup:
+
+	cmp	r0, #CLUSTER_UP
+	beq	mcpm_setup_leave
+
+	@ power_up_setup is now responsible for setting up the cluster:
+
+	cmp	r7, #0
+	mov	r0, #1		@ second (cluster) affinity level
+	blxne	r7		@ Call power_up_setup if defined
+	dmb
+
+	mov	r0, #CLUSTER_UP
+	strb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	dmb
+
+mcpm_setup_leave:
+	@ Leave the cluster setup critical section:
+
+	mov	r0, #INBOUND_NOT_COMING_UP
+	strb	r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND]
+	dsb
+	sev
+
+	mov	r0, r11
+	bl	vlock_unlock	@ implies DMB
+	b	mcpm_setup_complete
+
+	@ In the contended case, non-first men wait here for cluster setup
+	@ to complete:
+mcpm_setup_wait:
+	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	cmp	r0, #CLUSTER_UP
+	wfene
+	bne	mcpm_setup_wait
+	dmb
+
+mcpm_setup_complete:
+	@ If a platform-specific CPU setup hook is needed, it is
+	@ called from here.
+
+	cmp	r7, #0
+	mov	r0, #0		@ first (CPU) affinity level
+	blxne	r7		@ Call power_up_setup if defined
+	dmb
+
+	@ Mark the CPU as up:
+
+	mov	r0, #CPU_UP
+	strb	r0, [r5]
+
+	@ Observability order of CPU_UP and opening of the gate does not matter.
+
+mcpm_entry_gated:
+	ldr	r5, [r6, r4, lsl #2]		@ r5 = CPU entry vector
+	cmp	r5, #0
+	wfeeq
+	beq	mcpm_entry_gated
+	dmb
+
+	pr_dbg	"released\n"
+	bx	r5
+
+	.align	2
+
+3:	.word	mcpm_entry_vectors - .
+	.word	mcpm_power_up_setup_phys - 3b
+	.word	mcpm_sync - 3b
+	.word	first_man_locks - 3b
+
+ENDPROC(mcpm_entry_point)
+
+	.bss
+
+	.align	CACHE_WRITEBACK_ORDER
+	.type	first_man_locks, #object
+first_man_locks:
+	.space	VLOCK_SIZE * MAX_NR_CLUSTERS
+	.align	CACHE_WRITEBACK_ORDER
+
+	.type	mcpm_entry_vectors, #object
+ENTRY(mcpm_entry_vectors)
+	.space	4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+
+	.type	mcpm_power_up_setup_phys, #object
+ENTRY(mcpm_power_up_setup_phys)
+	.space  4		@ set by mcpm_sync_init()
diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c
new file mode 100644
index 00000000000..3caed0db698
--- /dev/null
+++ b/arch/arm/common/mcpm_platsmp.c
@@ -0,0 +1,89 @@
+/*
+ * linux/arch/arm/mach-vexpress/mcpm_platsmp.c
+ *
+ * Created by:  Nicolas Pitre, November 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Code to handle secondary CPU bringup and hotplug for the cluster power API.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+
+#include <asm/mcpm.h>
+#include <asm/smp.h>
+#include <asm/smp_plat.h>
+
+static void __init simple_smp_init_cpus(void)
+{
+}
+
+static int __cpuinit mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	unsigned int mpidr, pcpu, pcluster, ret;
+	extern void secondary_startup(void);
+
+	mpidr = cpu_logical_map(cpu);
+	pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	pr_debug("%s: logical CPU %d is physical CPU %d cluster %d\n",
+		 __func__, cpu, pcpu, pcluster);
+
+	mcpm_set_entry_vector(pcpu, pcluster, NULL);
+	ret = mcpm_cpu_power_up(pcpu, pcluster);
+	if (ret)
+		return ret;
+	mcpm_set_entry_vector(pcpu, pcluster, secondary_startup);
+	arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+	dsb_sev();
+	return 0;
+}
+
+static void __cpuinit mcpm_secondary_init(unsigned int cpu)
+{
+	mcpm_cpu_powered_up();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int mcpm_cpu_disable(unsigned int cpu)
+{
+	/*
+	 * We assume all CPUs may be shut down.
+	 * This would be the hook to use for eventual Secure
+	 * OS migration requests as described in the PSCI spec.
+	 */
+	return 0;
+}
+
+static void mcpm_cpu_die(unsigned int cpu)
+{
+	unsigned int mpidr, pcpu, pcluster;
+	mpidr = read_cpuid_mpidr();
+	pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	mcpm_set_entry_vector(pcpu, pcluster, NULL);
+	mcpm_cpu_power_down();
+}
+
+#endif
+
+static struct smp_operations __initdata mcpm_smp_ops = {
+	.smp_init_cpus		= simple_smp_init_cpus,
+	.smp_boot_secondary	= mcpm_boot_secondary,
+	.smp_secondary_init	= mcpm_secondary_init,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable		= mcpm_cpu_disable,
+	.cpu_die		= mcpm_cpu_die,
+#endif
+};
+
+void __init mcpm_smp_set_ops(void)
+{
+	smp_set_ops(&mcpm_smp_ops);
+}
diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c
index 9d2d3ba339f..2c64c4bc6d5 100644
--- a/arch/arm/common/timer-sp.c
+++ b/arch/arm/common/timer-sp.c
@@ -29,13 +29,14 @@
 #include <asm/sched_clock.h>
 #include <asm/hardware/arm_timer.h>
 
-static long __init sp804_get_clock_rate(const char *name)
+static long __init sp804_get_clock_rate(struct clk *clk,
+		const char *name)
 {
-	struct clk *clk;
 	long rate;
 	int err;
 
-	clk = clk_get_sys("sp804", name);
+	if (!clk)
+		clk = clk_get_sys("sp804", name);
 	if (IS_ERR(clk)) {
 		pr_err("sp804: %s clock not found: %d\n", name,
 			(int)PTR_ERR(clk));
@@ -77,9 +78,10 @@ static u32 sp804_read(void)
 
 void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
 						     const char *name,
+						     struct clk *clk,
 						     int use_sched_clock)
 {
-	long rate = sp804_get_clock_rate(name);
+	long rate = sp804_get_clock_rate(clk, name);
 
 	if (rate < 0)
 		return;
@@ -172,10 +174,10 @@ static struct irqaction sp804_timer_irq = {
 };
 
 void __init sp804_clockevents_init(void __iomem *base, unsigned int irq,
-	const char *name)
+	const char *name, struct clk *clk)
 {
 	struct clock_event_device *evt = &sp804_clockevent;
-	long rate = sp804_get_clock_rate(name);
+	long rate = sp804_get_clock_rate(clk, name);
 
 	if (rate < 0)
 		return;
diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S
new file mode 100644
index 00000000000..ff198583f68
--- /dev/null
+++ b/arch/arm/common/vlock.S
@@ -0,0 +1,108 @@
+/*
+ * vlock.S - simple voting lock implementation for ARM
+ *
+ * Created by:	Dave Martin, 2012-08-16
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ *
+ * This algorithm is described in more detail in
+ * Documentation/arm/vlocks.txt.
+ */
+
+#include <linux/linkage.h>
+#include "vlock.h"
+
+/* Select different code if voting flags  can fit in a single word. */
+#if VLOCK_VOTING_SIZE > 4
+#define FEW(x...)
+#define MANY(x...) x
+#else
+#define FEW(x...) x
+#define MANY(x...)
+#endif
+
+@ voting lock for first-man coordination
+
+.macro voting_begin rbase:req, rcpu:req, rscratch:req
+	mov	\rscratch, #1
+	strb	\rscratch, [\rbase, \rcpu]
+	dmb
+.endm
+
+.macro voting_end rbase:req, rcpu:req, rscratch:req
+	dmb
+	mov	\rscratch, #0
+	strb	\rscratch, [\rbase, \rcpu]
+	dsb
+	sev
+.endm
+
+/*
+ * The vlock structure must reside in Strongly-Ordered or Device memory.
+ * This implementation deliberately eliminates most of the barriers which
+ * would be required for other memory types, and assumes that independent
+ * writes to neighbouring locations within a cacheline do not interfere
+ * with one another.
+ */
+
+@ r0: lock structure base
+@ r1: CPU ID (0-based index within cluster)
+ENTRY(vlock_trylock)
+	add	r1, r1, #VLOCK_VOTING_OFFSET
+
+	voting_begin	r0, r1, r2
+
+	ldrb	r2, [r0, #VLOCK_OWNER_OFFSET]	@ check whether lock is held
+	cmp	r2, #VLOCK_OWNER_NONE
+	bne	trylock_fail			@ fail if so
+
+	@ Control dependency implies strb not observable before previous ldrb.
+
+	strb	r1, [r0, #VLOCK_OWNER_OFFSET]	@ submit my vote
+
+	voting_end	r0, r1, r2		@ implies DMB
+
+	@ Wait for the current round of voting to finish:
+
+ MANY(	mov	r3, #VLOCK_VOTING_OFFSET			)
+0:
+ MANY(	ldr	r2, [r0, r3]					)
+ FEW(	ldr	r2, [r0, #VLOCK_VOTING_OFFSET]			)
+	cmp	r2, #0
+	wfene
+	bne	0b
+ MANY(	add	r3, r3, #4					)
+ MANY(	cmp	r3, #VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE	)
+ MANY(	bne	0b						)
+
+	@ Check who won:
+
+	dmb
+	ldrb	r2, [r0, #VLOCK_OWNER_OFFSET]
+	eor	r0, r1, r2			@ zero if I won, else nonzero
+	bx	lr
+
+trylock_fail:
+	voting_end	r0, r1, r2
+	mov	r0, #1				@ nonzero indicates that I lost
+	bx	lr
+ENDPROC(vlock_trylock)
+
+@ r0: lock structure base
+ENTRY(vlock_unlock)
+	dmb
+	mov	r1, #VLOCK_OWNER_NONE
+	strb	r1, [r0, #VLOCK_OWNER_OFFSET]
+	dsb
+	sev
+	bx	lr
+ENDPROC(vlock_unlock)
diff --git a/arch/arm/common/vlock.h b/arch/arm/common/vlock.h
new file mode 100644
index 00000000000..3b441475a59
--- /dev/null
+++ b/arch/arm/common/vlock.h
@@ -0,0 +1,29 @@
+/*
+ * vlock.h - simple voting lock implementation
+ *
+ * Created by:	Dave Martin, 2012-08-16
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __VLOCK_H
+#define __VLOCK_H
+
+#include <asm/mcpm.h>
+
+/* Offsets and sizes are rounded to a word (4 bytes) */
+#define VLOCK_OWNER_OFFSET	0
+#define VLOCK_VOTING_OFFSET	4
+#define VLOCK_VOTING_SIZE	((MAX_CPUS_PER_CLUSTER + 3) / 4 * 4)
+#define VLOCK_SIZE		(VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE)
+#define VLOCK_OWNER_NONE	0
+
+#endif /* ! __VLOCK_H */