Automatically merging tracking-iks into merge-manifest

Conflicting files:
author: Mark Brown <broonie@sirena.org.uk> 2013-06-14 12:08:31 +0100
committer: Mark Brown <broonie@sirena.org.uk> 2013-06-14 12:08:31 +0100
commit: d3c2ad71e5f3fcffa55ba9eeb8121b3f179ae249 (patch)
tree: 27229267b3816cb00660fa5a7c57604de8c2a3e5
parent: bdc04bac744054924dd5017e17fbcfdd87fda292 (diff)
parent: 2be12549c900303c418fcbb7044979e190fd4e7e (diff)
45 files changed, 1866 insertions, 433 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b67e45a98306..49edb6bea6b6 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1706,6 +1706,24 @@ config BIG_LITTLE
 	help
 	  This option enables support for the big.LITTLE architecture.
 
+config BL_SWITCHER
+	bool "big.LITTLE switcher support"
+	depends on BIG_LITTLE && MCPM && HOTPLUG_CPU
+	select CPU_PM
+	select ARM_CPU_SUSPEND
+	help
+	  The big.LITTLE "switcher" provides the core functionality to
+	  transparently handle transition between a cluster of A15's
+	  and a cluster of A7's in a big.LITTLE system.
+
+config BL_SWITCHER_DUMMY_IF
+	tristate "Simple big.LITTLE switcher user interface"
+	depends on BL_SWITCHER && DEBUG_KERNEL
+	help
+	  This is a simple and dummy char dev interface to control
+	  the big.LITTLE switcher core code.  It is meant for
+	  debugging purposes only.
+
 choice
 	prompt "Memory split"
 	default VMSPLIT_3G
@@ -1793,8 +1811,9 @@ config SCHED_HRTICK
 	def_bool HIGH_RES_TIMERS
 
 config THUMB2_KERNEL
-	bool "Compile the kernel in Thumb-2 mode"
+	bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY
 	depends on CPU_V7 && !CPU_V6 && !CPU_V6K
+	default y if CPU_THUMBONLY
 	select AEABI
 	select ARM_ASM_UNIFIED
 	select ARM_UNWIND
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index bd48ab525443..d412ff0b15a5 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -13,3 +13,7 @@ obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
 obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
 obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
 CFLAGS_REMOVE_mcpm_entry.o	= -pg
+AFLAGS_mcpm_head.o		:= -march=armv7-a
+AFLAGS_vlock.o			:= -march=armv7-a
+obj-$(CONFIG_BL_SWITCHER)	+= bL_switcher.o
+obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
new file mode 100644
index 000000000000..1506bf536d1f
--- /dev/null
+++ b/arch/arm/common/bL_switcher.c
@@ -0,0 +1,732 @@
+/*
+ * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
+ *
+ * Created by:	Nicolas Pitre, March 2012
+ * Copyright:	(C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/time.h>
+#include <linux/clockchips.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/moduleparam.h>
+
+#include <asm/smp_plat.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/suspend.h>
+#include <asm/mcpm.h>
+#include <asm/bL_switcher.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/power_cpu_migrate.h>
+
+
+/*
+ * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
+ * __attribute_const__ and we don't want the compiler to assume any
+ * constness here.
+ */
+
+static int read_mpidr(void)
+{
+	unsigned int id;
+	asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));
+	return id;
+}
+
+/*
+ * Get a global nanosecond time stamp for tracing.
+ */
+static s64 get_ns(void)
+{
+	struct timespec ts;
+	getnstimeofday(&ts);
+	return timespec_to_ns(&ts);
+}
+
+/*
+ * bL switcher core code.
+ */
+
+static void bL_do_switch(void *_arg)
+{
+	unsigned mpidr, cpuid, clusterid, ob_cluster, ib_cluster;
+	long volatile handshake, **handshake_ptr = _arg;
+
+	pr_debug("%s\n", __func__);
+
+	mpidr = read_mpidr();
+	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	ob_cluster = clusterid;
+	ib_cluster = clusterid ^ 1;
+
+	/* Advertise our handshake location */
+	if (handshake_ptr) {
+		handshake = 0;
+		*handshake_ptr = &handshake;
+	} else
+		handshake = -1;
+
+	/*
+	 * Our state has been saved at this point.  Let's release our
+	 * inbound CPU.
+	 */
+	mcpm_set_entry_vector(cpuid, ib_cluster, cpu_resume);
+	sev();
+
+	/*
+	 * From this point, we must assume that our counterpart CPU might
+	 * have taken over in its parallel world already, as if execution
+	 * just returned from cpu_suspend().  It is therefore important to
+	 * be very careful not to make any change the other guy is not
+	 * expecting.  This is why we need stack isolation.
+	 *
+	 * Fancy under cover tasks could be performed here.  For now
+	 * we have none.
+	 */
+
+	/*
+	 * Let's wait until our inbound is alive.
+	 */
+	while (!handshake) {
+		wfe();
+		smp_mb();
+	}
+
+	/* Let's put ourself down. */
+	mcpm_cpu_power_down();
+
+	/* should never get here */
+	BUG();
+}
+
+/*
+ * Stack isolation.  To ensure 'current' remains valid, we just use another
+ * piece of our thread's stack space which should be fairly lightly used.
+ * The selected area starts just above the thread_info structure located
+ * at the very bottom of the stack, aligned to a cache line, and indexed
+ * with the cluster number.
+ */
+#define STACK_SIZE 512
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+static int bL_switchpoint(unsigned long _arg)
+{
+	unsigned int mpidr = read_mpidr();
+	unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	void *stack = current_thread_info() + 1;
+	stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
+	stack += clusterid * STACK_SIZE + STACK_SIZE;
+	call_with_stack(bL_do_switch, (void *)_arg, stack);
+	BUG();
+}
+
+/*
+ * Generic switcher interface
+ */
+
+static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
+
+/*
+ * bL_switch_to - Switch to a specific cluster for the current CPU
+ * @new_cluster_id: the ID of the cluster to switch to.
+ *
+ * This function must be called on the CPU to be switched.
+ * Returns 0 on success, else a negative status code.
+ */
+static int bL_switch_to(unsigned int new_cluster_id)
+{
+	unsigned int mpidr, cpuid, clusterid, ob_cluster, ib_cluster, this_cpu;
+	struct completion inbound_alive;
+	struct tick_device *tdev;
+	enum clock_event_mode tdev_mode;
+	long volatile *handshake_ptr;
+	int ipi_nr, ret;
+
+	mpidr = read_mpidr();
+	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	ob_cluster = clusterid;
+	ib_cluster = clusterid ^ 1;
+
+	if (new_cluster_id == clusterid)
+		return 0;
+
+	pr_debug("before switch: CPU %d in cluster %d\n", cpuid, clusterid);
+
+	this_cpu = smp_processor_id();
+
+	/* Close the gate for our entry vectors */
+	mcpm_set_entry_vector(cpuid, ob_cluster, NULL);
+	mcpm_set_entry_vector(cpuid, ib_cluster, NULL);
+
+	/* Install our "inbound alive" notifier. */
+	init_completion(&inbound_alive);
+	ipi_nr = register_ipi_completion(&inbound_alive, this_cpu);
+	ipi_nr |= ((1 << 16) << bL_gic_id[cpuid][ob_cluster]);
+	mcpm_set_early_poke(cpuid, ib_cluster, gic_get_sgir_physaddr(), ipi_nr);
+
+	/*
+	 * Let's wake up the inbound CPU now in case it requires some delay
+	 * to come online, but leave it gated in our entry vector code.
+	 */
+	ret = mcpm_cpu_power_up(cpuid, ib_cluster);
+	if (ret) {
+		pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
+		return ret;
+	}
+
+	/*
+	 * Raise a SGI on the inbound CPU to make sure it doesn't stall
+	 * in a possible WFI, such as in bL_power_down().
+	 */
+	gic_send_sgi(bL_gic_id[cpuid][ib_cluster], 0);
+
+	/*
+	 * Wait for the inbound to come up.  This allows for other
+	 * tasks to be scheduled in the mean time.
+	 */
+	wait_for_completion(&inbound_alive);
+	mcpm_set_early_poke(cpuid, ib_cluster, 0, 0);
+
+	/*
+	 * From this point we are entering the switch critical zone
+	 * and can't sleep/schedule anymore.
+	 */
+	local_irq_disable();
+	local_fiq_disable();
+	trace_cpu_migrate_begin(get_ns(), mpidr & MPIDR_HWID_BITMASK);
+
+	/* redirect GIC's SGIs to our counterpart */
+	gic_migrate_target(bL_gic_id[cpuid][ib_cluster]);
+
+	tdev = tick_get_device(this_cpu);
+	if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
+		tdev = NULL;
+	if (tdev) {
+		tdev_mode = tdev->evtdev->mode;
+		clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+	}
+
+	ret = cpu_pm_enter();
+
+	/* we can not tolerate errors at this point */
+	if (ret)
+		panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
+
+	/*
+	 * Flip the cluster in the CPU logical map for this CPU.
+	 * This must be flushed to RAM as the resume code
+	 * needs to access it while the caches are still disabled.
+	 */
+	cpu_logical_map(this_cpu) ^= (1 << 8);
+	__cpuc_flush_dcache_area(&cpu_logical_map(this_cpu),
+				 sizeof(cpu_logical_map(this_cpu)));
+
+	/* Let's do the actual CPU switch. */
+	ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint);
+	if (ret > 0)
+		panic("%s: cpu_suspend() returned %d\n", __func__, ret);
+
+	/* We are executing on the inbound CPU at this point */
+	mpidr = read_mpidr();
+	cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	pr_debug("after switch: CPU %d in cluster %d\n", cpuid, clusterid);
+	BUG_ON(clusterid != ib_cluster);
+
+	mcpm_cpu_powered_up();
+
+	ret = cpu_pm_exit();
+
+	if (tdev) {
+		clockevents_set_mode(tdev->evtdev, tdev_mode);
+		clockevents_program_event(tdev->evtdev,
+					  tdev->evtdev->next_event, 1);
+	}
+
+	trace_cpu_migrate_finish(get_ns(), mpidr & MPIDR_HWID_BITMASK);
+	local_fiq_enable();
+	local_irq_enable();
+
+	*handshake_ptr = 1;
+	dsb_sev();
+
+	if (ret)
+		pr_err("%s exiting with error %d\n", __func__, ret);
+	return ret;
+}
+
+struct bL_thread {
+	struct task_struct *task;
+	wait_queue_head_t wq;
+	int wanted_cluster;
+	struct completion started;
+};
+
+static struct bL_thread bL_threads[MAX_CPUS_PER_CLUSTER];
+
+static int bL_switcher_thread(void *arg)
+{
+	struct bL_thread *t = arg;
+	struct sched_param param = { .sched_priority = 1 };
+	int cluster;
+
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+	complete(&t->started);
+
+	do {
+		if (signal_pending(current))
+			flush_signals(current);
+		wait_event_interruptible(t->wq,
+				t->wanted_cluster != -1 ||
+				kthread_should_stop());
+		cluster = xchg(&t->wanted_cluster, -1);
+		if (cluster != -1)
+			bL_switch_to(cluster);
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+static struct task_struct * bL_switcher_thread_create(int cpu, void *arg)
+{
+	struct task_struct *task;
+
+	task = kthread_create_on_node(bL_switcher_thread, arg,
+				      cpu_to_node(cpu), "kswitcher_%d", cpu);
+	if (!IS_ERR(task)) {
+		kthread_bind(task, cpu);
+		wake_up_process(task);
+	} else
+		pr_err("%s failed for CPU %d\n", __func__, cpu);
+	return task;
+}
+
+/*
+ * bL_switch_request - Switch to a specific cluster for the given CPU
+ *
+ * @cpu: the CPU to switch
+ * @new_cluster_id: the ID of the cluster to switch to.
+ *
+ * This function causes a cluster switch on the given CPU by waking up
+ * the appropriate switcher thread.  This function may or may not return
+ * before the switch has occurred.
+ */
+int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
+{
+	struct bL_thread *t;
+
+	if (cpu >= MAX_CPUS_PER_CLUSTER) {
+		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
+		return -EINVAL;
+	}
+
+	t = &bL_threads[cpu];
+	if (IS_ERR(t->task))
+		return PTR_ERR(t->task);
+	if (!t->task)
+		return -ESRCH;
+
+	t->wanted_cluster = new_cluster_id;
+	wake_up(&t->wq);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(bL_switch_request);
+
+/*
+ * Activation and configuration code.
+ */
+
+static DEFINE_MUTEX(bL_switcher_activation_lock);
+static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier);
+static unsigned int bL_switcher_active;
+static unsigned int bL_switcher_cpu_original_cluster[MAX_CPUS_PER_CLUSTER];
+static cpumask_t bL_switcher_removed_logical_cpus;
+
+int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_register_notifier);
+
+int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier);
+
+static int bL_activation_notify(unsigned long val)
+{
+	int ret;
+       
+	ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL);
+	if (ret & NOTIFY_STOP_MASK)
+		pr_err("%s: notifier chain failed with status 0x%x\n",
+			__func__, ret);
+	return notifier_to_errno(ret);
+}
+
+static void bL_switcher_restore_cpus(void)
+{
+	int i;
+
+	for_each_cpu(i, &bL_switcher_removed_logical_cpus)
+		cpu_up(i);
+}
+
+static int bL_switcher_halve_cpus(void)
+{
+	int cpu, cluster, i, ret;
+	cpumask_t cluster_mask[2], common_mask;
+
+	cpumask_clear(&bL_switcher_removed_logical_cpus);
+	cpumask_clear(&cluster_mask[0]);
+	cpumask_clear(&cluster_mask[1]);
+
+	for_each_online_cpu(i) {
+		cpu = cpu_logical_map(i) & 0xff;
+		cluster = (cpu_logical_map(i) >> 8) & 0xff;
+		if (cluster >= 2) {
+			pr_err("%s: only dual cluster systems are supported\n", __func__);
+			return -EINVAL;
+		}
+		cpumask_set_cpu(cpu, &cluster_mask[cluster]);
+	}
+
+	if (!cpumask_and(&common_mask, &cluster_mask[0], &cluster_mask[1])) {
+		pr_err("%s: no common set of CPUs\n", __func__);
+		return -EINVAL;
+	}
+
+	for_each_online_cpu(i) {
+		cpu = cpu_logical_map(i) & 0xff;
+		cluster = (cpu_logical_map(i) >> 8) & 0xff;
+
+		if (cpumask_test_cpu(cpu, &common_mask)) {
+			/* Let's take note of the GIC ID for this CPU */
+			int gic_id = gic_get_cpu_id(i);
+			if (gic_id < 0) {
+				pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
+				return -EINVAL;
+			}
+			bL_gic_id[cpu][cluster] = gic_id;
+			pr_info("GIC ID for CPU %u cluster %u is %u\n",
+				cpu, cluster, gic_id);
+
+			/*
+			 * We keep only those logical CPUs which number
+			 * is equal to their physical CPU number. This is
+			 * not perfect but good enough in most cases.
+			 */
+			if (cpu == i) {
+				bL_switcher_cpu_original_cluster[cpu] = cluster;
+				continue;
+			}
+		}
+
+		ret = cpu_down(i);
+		if (ret) {
+			bL_switcher_restore_cpus();
+			return ret;
+		}
+		cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus);
+	}
+
+	return 0;
+}
+
+static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
+{
+	trace_cpu_migrate_current(get_ns(), read_mpidr() & MPIDR_HWID_BITMASK);
+}
+
+int bL_switcher_trace_trigger(void)
+{
+	int ret;
+
+	preempt_disable();
+
+	bL_switcher_trace_trigger_cpu(NULL);
+	ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
+
+	preempt_enable();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
+
+static int bL_switcher_enable(void)
+{
+	int cpu, ret;
+
+	mutex_lock(&bL_switcher_activation_lock);
+	cpu_hotplug_driver_lock();
+	if (bL_switcher_active) {
+		cpu_hotplug_driver_unlock();
+		mutex_unlock(&bL_switcher_activation_lock);
+		return 0;
+	}
+
+	pr_info("big.LITTLE switcher initializing\n");
+
+	ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE);
+	if (ret)
+		goto error;
+
+	ret = bL_switcher_halve_cpus();
+	if (ret)
+		goto error;
+
+	bL_switcher_trace_trigger();
+
+	for_each_online_cpu(cpu) {
+		struct bL_thread *t = &bL_threads[cpu];
+		init_waitqueue_head(&t->wq);
+		init_completion(&t->started);
+		t->wanted_cluster = -1;
+		t->task = bL_switcher_thread_create(cpu, t);
+	}
+
+	bL_switcher_active = 1;
+	bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+	pr_info("big.LITTLE switcher initialized\n");
+	goto out;
+
+error:
+	pr_warning("big.LITTLE switcher initialization failed\n");
+	bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+	cpu_hotplug_driver_unlock();
+	mutex_unlock(&bL_switcher_activation_lock);
+	return ret;
+}
+
+#ifdef CONFIG_SYSFS
+
+static void bL_switcher_disable(void)
+{
+	unsigned int cpu, cluster, i;
+	struct bL_thread *t;
+	struct task_struct *task;
+
+	mutex_lock(&bL_switcher_activation_lock);
+	cpu_hotplug_driver_lock();
+
+	if (!bL_switcher_active)
+		goto out;
+
+	if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) {
+		bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+		goto out;
+	}
+
+	bL_switcher_active = 0;
+
+	/*
+	 * To deactivate the switcher, we must shut down the switcher
+	 * threads to prevent any other requests from being accepted.
+	 * Then, if the final cluster for given logical CPU is not the
+	 * same as the original one, we'll recreate a switcher thread
+	 * just for the purpose of switching the CPU back without any
+	 * possibility for interference from external requests.
+	 */
+	for_each_online_cpu(cpu) {
+		BUG_ON(cpu != (cpu_logical_map(cpu) & 0xff));
+		t = &bL_threads[cpu];
+		task = t->task;
+		t->task = NULL;
+		if (IS_ERR_OR_NULL(task))
+			continue;
+		kthread_stop(task);
+		/* no more switch may happen on this CPU at this point */
+		cluster = (cpu_logical_map(cpu) >> 8) & 0xff;
+		if (cluster == bL_switcher_cpu_original_cluster[cpu])
+			continue;
+		init_completion(&t->started);
+		t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu];
+		task = bL_switcher_thread_create(cpu, t);
+		if (!IS_ERR(task)) {
+			wait_for_completion(&t->started);
+			kthread_stop(task);
+			cluster = (cpu_logical_map(cpu) >> 8) & 0xff;
+			if (cluster == bL_switcher_cpu_original_cluster[cpu])
+				continue;
+		}
+		/* If execution gets here, we're in trouble. */
+		pr_crit("%s: unable to restore original cluster for CPU %d\n",
+			__func__, cpu);
+		for_each_cpu(i, &bL_switcher_removed_logical_cpus) {
+			if ((cpu_logical_map(i) & 0xff) != cpu)
+				continue;
+			pr_crit("%s: CPU %d can't be restored\n",
+				__func__, i);
+			cpumask_clear_cpu(i, &bL_switcher_removed_logical_cpus);
+			break;
+		}
+	}
+
+	bL_switcher_restore_cpus();
+	bL_switcher_trace_trigger();
+
+	bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+	cpu_hotplug_driver_unlock();
+	mutex_unlock(&bL_switcher_activation_lock);
+}
+
+static ssize_t bL_switcher_active_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", bL_switcher_active);
+}
+
+static ssize_t bL_switcher_active_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+
+	switch (buf[0]) {
+	case '0':
+		bL_switcher_disable();
+		ret = 0;
+		break;
+	case '1':
+		ret = bL_switcher_enable();
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return (ret >= 0) ? count : ret;
+}
+
+static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret = bL_switcher_trace_trigger();
+
+	return ret ? ret : count;
+}
+
+static struct kobj_attribute bL_switcher_active_attr =
+	__ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store);
+
+static struct kobj_attribute bL_switcher_trace_trigger_attr =
+	__ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store);
+
+static struct attribute *bL_switcher_attrs[] = {
+	&bL_switcher_active_attr.attr,
+	&bL_switcher_trace_trigger_attr.attr,
+	NULL,
+};
+
+static struct attribute_group bL_switcher_attr_group = {
+	.attrs = bL_switcher_attrs,
+};
+
+static struct kobject *bL_switcher_kobj;
+
+static int __init bL_switcher_sysfs_init(void)
+{
+	int ret;
+
+	bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj);
+	if (!bL_switcher_kobj)
+		return -ENOMEM;
+	ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group);
+	if (ret)
+		kobject_put(bL_switcher_kobj);
+	return ret;
+}
+
+#endif  /* CONFIG_SYSFS */
+
+bool bL_switcher_get_enabled(void)
+{
+	mutex_lock(&bL_switcher_activation_lock);
+
+	return bL_switcher_active;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_get_enabled);
+
+void bL_switcher_put_enabled(void)
+{
+	mutex_unlock(&bL_switcher_activation_lock);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_put_enabled);
+
+/*
+ * Veto any CPU hotplug operation while the switcher is active.
+ * We're just not ready to deal with that given the trickery involved.
+ */
+static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_DOWN_PREPARE:
+		if (bL_switcher_active)
+			return NOTIFY_BAD;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block bL_switcher_hotplug_notifier =
+        { &bL_switcher_hotplug_callback, NULL, 0 };
+
+static bool no_bL_switcher;
+core_param(no_bL_switcher, no_bL_switcher, bool, 0644);
+
+static int __init bL_switcher_init(void)
+{
+	int ret;
+
+	if (MAX_NR_CLUSTERS != 2) {
+		pr_err("%s: only dual cluster systems are supported\n", __func__);
+		return -EINVAL;
+	}
+
+	register_cpu_notifier(&bL_switcher_hotplug_notifier);
+
+	if (!no_bL_switcher) {
+		ret = bL_switcher_enable();
+		if (ret)
+			return ret;
+	}
+
+#ifdef CONFIG_SYSFS
+	ret = bL_switcher_sysfs_init();
+	if (ret)
+		pr_err("%s: unable to create sysfs entry\n", __func__);
+#endif
+
+	return 0;
+}
+
+late_initcall(bL_switcher_init);
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
new file mode 100644
index 000000000000..5e2dd197e728
--- /dev/null
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface
+ *
+ * Created by:	Nicolas Pitre, November 2012
+ * Copyright:	(C) 2012  Linaro Limited
+ *
+ * Dummy interface to user space for debugging purpose only.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <asm/uaccess.h>
+#include <asm/bL_switcher.h>
+
+static ssize_t bL_switcher_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	unsigned char val[3];
+	unsigned int cpu, cluster;
+	int ret;
+
+	pr_debug("%s\n", __func__);
+
+	if (len < 3)
+		return -EINVAL;
+
+	if (copy_from_user(val, buf, 3))
+		return -EFAULT;
+
+	/* format: <cpu#>,<cluster#> */
+	if (val[0] < '0' || val[0] > '4' ||
+	    val[1] != ',' ||
+	    val[2] < '0' || val[2] > '1')
+		return -EINVAL;
+
+	cpu = val[0] - '0';
+	cluster = val[2] - '0';
+	ret = bL_switch_request(cpu, cluster);
+
+	return ret ? : len;
+}
+
+static const struct file_operations bL_switcher_fops = {
+	.write		= bL_switcher_write,
+	.owner	= THIS_MODULE,
+};
+
+static struct miscdevice bL_switcher_device = {
+        MISC_DYNAMIC_MINOR,
+        "b.L_switcher",
+        &bL_switcher_fops
+};
+
+static int __init bL_switcher_dummy_if_init(void)
+{
+	return misc_register(&bL_switcher_device);
+}
+
+static void __exit bL_switcher_dummy_if_exit(void)
+{
+	misc_deregister(&bL_switcher_device);
+}
+
+module_init(bL_switcher_dummy_if_init);
+module_exit(bL_switcher_dummy_if_exit);
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 370236dd1a03..4a2b32fd53a1 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
 	sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
 }
 
+extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2];
+
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+			 unsigned long poke_phys_addr, unsigned long poke_val)
+{
+	unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0];
+	poke[0] = poke_phys_addr;
+	poke[1] = poke_val;
+	__cpuc_flush_dcache_area((void *)poke, 8);
+	outer_clean_range(__pa(poke), __pa(poke + 2));
+}
+
 static const struct mcpm_platform_ops *platform_ops;
 
 int __init mcpm_platform_register(const struct mcpm_platform_ops *ops)
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index 8178705c4b24..057e9c5a9e1f 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -71,12 +71,19 @@ ENTRY(mcpm_entry_point)
 	 * position independent way.
 	 */
 	adr	r5, 3f
-	ldmia	r5, {r6, r7, r8, r11}
+	ldmia	r5, {r0, r6, r7, r8, r11}
+	add	r0, r5, r0			@ r0 = mcpm_entry_early_pokes
 	add	r6, r5, r6			@ r6 = mcpm_entry_vectors
 	ldr	r7, [r5, r7]			@ r7 = mcpm_power_up_setup_phys
 	add	r8, r5, r8			@ r8 = mcpm_sync
 	add	r11, r5, r11			@ r11 = first_man_locks
 
+	@ Perform an early poke, if any
+	add	r0, r0, r4, lsl #3
+	ldmia	r0, {r0, r1}
+	teq	r0, #0
+	strne	r1, [r0]
+
 	mov	r0, #MCPM_SYNC_CLUSTER_SIZE
 	mla	r8, r0, r10, r8			@ r8 = sync cluster base
 
@@ -195,7 +202,8 @@ mcpm_entry_gated:
 
 	.align	2
 
-3:	.word	mcpm_entry_vectors - .
+3:	.word	mcpm_entry_early_pokes - .
+	.word	mcpm_entry_vectors - 3b
 	.word	mcpm_power_up_setup_phys - 3b
 	.word	mcpm_sync - 3b
 	.word	first_man_locks - 3b
@@ -214,6 +222,10 @@ first_man_locks:
 ENTRY(mcpm_entry_vectors)
 	.space	4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
 
+	.type	mcpm_entry_early_pokes, #object
+ENTRY(mcpm_entry_early_pokes)
+	.space	8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+
 	.type	mcpm_power_up_setup_phys, #object
 ENTRY(mcpm_power_up_setup_phys)
 	.space  4		@ set by mcpm_sync_init()
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
new file mode 100644
index 000000000000..d60e77d179af
--- /dev/null
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -0,0 +1,67 @@
+/*
+ * arch/arm/include/asm/bL_switcher.h
+ *
+ * Created by:  Nicolas Pitre, April 2012
+ * Copyright:   (C) 2012  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_BL_SWITCHER_H
+#define ASM_BL_SWITCHER_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
+
+/*
+ * Register here to be notified about runtime enabling/disabling of
+ * the switcher.
+ *
+ * The notifier chain is called with the switcher activation lock held:
+ * the switcher will not be enabled or disabled during callbacks.
+ * Callbacks must not call bL_switcher_{get,put}_enabled().
+ */
+#define BL_NOTIFY_PRE_ENABLE	0
+#define BL_NOTIFY_POST_ENABLE	1
+#define BL_NOTIFY_PRE_DISABLE	2
+#define BL_NOTIFY_POST_DISABLE	3
+
+#ifdef CONFIG_BL_SWITCHER
+
+int bL_switcher_register_notifier(struct notifier_block *nb);
+int bL_switcher_unregister_notifier(struct notifier_block *nb);
+
+/*
+ * Use these functions to temporarily prevent enabling/disabling of
+ * the switcher.
+ * bL_switcher_get_enabled() returns true if the switcher is currently
+ * enabled.  Each call to bL_switcher_get_enabled() must be followed
+ * by a call to bL_switcher_put_enabled().  These functions are not
+ * recursive.
+ */
+bool bL_switcher_get_enabled(void);
+void bL_switcher_put_enabled(void);
+
+int bL_switcher_trace_trigger(void);
+
+#else
+static inline int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline bool bL_switcher_get_enabled(void) { return false; }
+static inline void bL_switcher_put_enabled(void) { }
+static inline int bL_switcher_trace_trigger(void) { return 0; }
+#endif /* CONFIG_BL_SWITCHER */
+
+#endif
diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index ce4d01c03e6c..cedd3721318b 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -42,6 +42,8 @@
 #define vectors_high()	(0)
 #endif
 
+#ifdef CONFIG_CPU_CP15
+
 extern unsigned long cr_no_alignment;	/* defined in entry-armv.S */
 extern unsigned long cr_alignment;	/* defined in entry-armv.S */
 
@@ -96,6 +98,18 @@ static inline void set_copro_access(unsigned int val)
 	isb();
 }
 
-#endif
+#else /* ifdef CONFIG_CPU_CP15 */
+
+/*
+ * cr_alignment and cr_no_alignment are tightly coupled to cp15 (at least in the
+ * minds of the developers). Yielding 0 for machines without a cp15 (and making
+ * it read-only) is fine for most cases and saves quite some #ifdeffery.
+ */
+#define cr_no_alignment	UL(0)
+#define cr_alignment	UL(0)
+
+#endif /* ifdef CONFIG_CPU_CP15 / else */
+
+#endif /* ifndef __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index ad41ec2471e8..7652712d1d14 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -38,6 +38,24 @@
 #define MPIDR_AFFINITY_LEVEL(mpidr, level) \
 	((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK)
 
+#define ARM_CPU_IMP_ARM			0x41
+#define ARM_CPU_IMP_INTEL		0x69
+
+#define ARM_CPU_PART_ARM1136		0xB360
+#define ARM_CPU_PART_ARM1156		0xB560
+#define ARM_CPU_PART_ARM1176		0xB760
+#define ARM_CPU_PART_ARM11MPCORE	0xB020
+#define ARM_CPU_PART_CORTEX_A8		0xC080
+#define ARM_CPU_PART_CORTEX_A9		0xC090
+#define ARM_CPU_PART_CORTEX_A5		0xC050
+#define ARM_CPU_PART_CORTEX_A15		0xC0F0
+#define ARM_CPU_PART_CORTEX_A7		0xC070
+
+#define ARM_CPU_XSCALE_ARCH_MASK	0xe000
+#define ARM_CPU_XSCALE_ARCH_V1		0x2000
+#define ARM_CPU_XSCALE_ARCH_V2		0x4000
+#define ARM_CPU_XSCALE_ARCH_V3		0x6000
+
 extern unsigned int processor_id;
 
 #ifdef CONFIG_CPU_CP15
@@ -50,6 +68,7 @@ extern unsigned int processor_id;
 		    : "cc");						\
 		__val;							\
 	})
+
 #define read_cpuid_ext(ext_reg)						\
 	({								\
 		unsigned int __val;					\
@@ -59,29 +78,24 @@ extern unsigned int processor_id;
 		    : "cc");						\
 		__val;							\
 	})
-#else
-#define read_cpuid(reg) (processor_id)
-#define read_cpuid_ext(reg) 0
-#endif
 
-#define ARM_CPU_IMP_ARM			0x41
-#define ARM_CPU_IMP_INTEL		0x69
+#else /* ifdef CONFIG_CPU_CP15 */
 
-#define ARM_CPU_PART_ARM1136		0xB360
-#define ARM_CPU_PART_ARM1156		0xB560
-#define ARM_CPU_PART_ARM1176		0xB760
-#define ARM_CPU_PART_ARM11MPCORE	0xB020
-#define ARM_CPU_PART_CORTEX_A8		0xC080
-#define ARM_CPU_PART_CORTEX_A9		0xC090
-#define ARM_CPU_PART_CORTEX_A5		0xC050
-#define ARM_CPU_PART_CORTEX_A15		0xC0F0
-#define ARM_CPU_PART_CORTEX_A7		0xC070
+/*
+ * read_cpuid and read_cpuid_ext should only ever be called on machines that
+ * have cp15 so warn on other usages.
+ */
+#define read_cpuid(reg)							\
+	({								\
+		WARN_ON_ONCE(1);					\
+		0;							\
+	})
 
-#define ARM_CPU_XSCALE_ARCH_MASK	0xe000
-#define ARM_CPU_XSCALE_ARCH_V1		0x2000
-#define ARM_CPU_XSCALE_ARCH_V2		0x4000
-#define ARM_CPU_XSCALE_ARCH_V3		0x6000
+#define read_cpuid_ext(reg) read_cpuid(reg)
+
+#endif /* ifdef CONFIG_CPU_CP15 / else */
 
+#ifdef CONFIG_CPU_CP15
 /*
  * The CPU ID never changes at run time, so we might as well tell the
  * compiler that it's constant.  Use this function to read the CPU ID
@@ -92,6 +106,15 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void)
 	return read_cpuid(CPUID_ID);
 }
 
+#else /* ifdef CONFIG_CPU_CP15 */
+
+static inline unsigned int __attribute_const__ read_cpuid_id(void)
+{
+	return processor_id;
+}
+
+#endif /* ifdef CONFIG_CPU_CP15 / else */
+
 static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
 {
 	return (read_cpuid_id() & 0xFF000000) >> 24;
diff --git a/arch/arm/include/asm/glue-df.h b/arch/arm/include/asm/glue-df.h
index 8cacbcda76da..b6e9f2c108b5 100644
--- a/arch/arm/include/asm/glue-df.h
+++ b/arch/arm/include/asm/glue-df.h
@@ -18,12 +18,12 @@
  *	================
  *
  *	We have the following to choose from:
- *	  arm6          - ARM6 style
  *	  arm7		- ARM7 style
  *	  v4_early	- ARMv4 without Thumb early abort handler
  *	  v4t_late	- ARMv4 with Thumb late abort handler
  *	  v4t_early	- ARMv4 with Thumb early abort handler
- *	  v5tej_early	- ARMv5 with Thumb and Java early abort handler
+ *	  v5t_early	- ARMv5 with Thumb early abort handler
+ *	  v5tj_early	- ARMv5 with Thumb and Java early abort handler
  *	  xscale	- ARMv5 with Thumb with Xscale extensions
  *	  v6_early	- ARMv6 generic early abort handler
  *	  v7_early	- ARMv7 generic early abort handler
@@ -39,19 +39,19 @@
 # endif
 #endif
 
-#ifdef CONFIG_CPU_ABRT_LV4T
+#ifdef CONFIG_CPU_ABRT_EV4
 # ifdef CPU_DABORT_HANDLER
 #  define MULTI_DABORT 1
 # else
-#  define CPU_DABORT_HANDLER v4t_late_abort
+#  define CPU_DABORT_HANDLER v4_early_abort
 # endif
 #endif
 
-#ifdef CONFIG_CPU_ABRT_EV4
+#ifdef CONFIG_CPU_ABRT_LV4T
 # ifdef CPU_DABORT_HANDLER
 #  define MULTI_DABORT 1
 # else
-#  define CPU_DABORT_HANDLER v4_early_abort
+#  define CPU_DABORT_HANDLER v4t_late_abort
 # endif
 #endif
 
@@ -63,19 +63,19 @@
 # endif
 #endif
 
-#ifdef CONFIG_CPU_ABRT_EV5TJ
+#ifdef CONFIG_CPU_ABRT_EV5T
 # ifdef CPU_DABORT_HANDLER
 #  define MULTI_DABORT 1
 # else
-#  define CPU_DABORT_HANDLER v5tj_early_abort
+#  define CPU_DABORT_HANDLER v5t_early_abort
 # endif
 #endif
 
-#ifdef CONFIG_CPU_ABRT_EV5T
+#ifdef CONFIG_CPU_ABRT_EV5TJ
 # ifdef CPU_DABORT_HANDLER
 #  define MULTI_DABORT 1
 # else
-#  define CPU_DABORT_HANDLER v5t_early_abort
+#  define CPU_DABORT_HANDLER v5tj_early_abort
 # endif
 #endif
 
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index 2740c2a2df63..3d7351c844aa 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -5,7 +5,7 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 
-#define NR_IPI	6
+#define NR_IPI	7
 
 typedef struct {
 	unsigned int __softirq_pending;
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 7c3d813e15df..124623e5ef14 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -211,4 +211,8 @@
 
 #define HSR_HVC_IMM_MASK	((1UL << 16) - 1)
 
+#define HSR_DABT_S1PTW		(1U << 7)
+#define HSR_DABT_CM		(1U << 8)
+#define HSR_DABT_EA		(1U << 9)
+
 #endif /* __ARM_KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index e4956f4e23e1..18d50322a9e2 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -75,7 +75,7 @@ extern char __kvm_hyp_code_end[];
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 
 extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index fd611996bfb5..82b4babead2c 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -22,11 +22,12 @@
 #include <linux/kvm_host.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
+#include <asm/kvm_arm.h>
 
-u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
-u32 *vcpu_spsr(struct kvm_vcpu *vcpu);
+unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
+unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
 
-int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run);
+bool kvm_condition_valid(struct kvm_vcpu *vcpu);
 void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr);
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
@@ -37,14 +38,14 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu)
+static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
 {
-	return (u32 *)&vcpu->arch.regs.usr_regs.ARM_pc;
+	return &vcpu->arch.regs.usr_regs.ARM_pc;
 }
 
-static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu)
+static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu)
 {
-	return (u32 *)&vcpu->arch.regs.usr_regs.ARM_cpsr;
+	return &vcpu->arch.regs.usr_regs.ARM_cpsr;
 }
 
 static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
@@ -69,4 +70,96 @@ static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg)
 	return reg == 15;
 }
 
+static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.hsr;
+}
+
+static inline unsigned long kvm_vcpu_get_hfar(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.hxfar;
+}
+
+static inline phys_addr_t kvm_vcpu_get_fault_ipa(struct kvm_vcpu *vcpu)
+{
+	return ((phys_addr_t)vcpu->arch.fault.hpfar & HPFAR_MASK) << 8;
+}
+
+static inline unsigned long kvm_vcpu_get_hyp_pc(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.hyp_pc;
+}
+
+static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_ISV;
+}
+
+static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_WNR;
+}
+
+static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_SSE;
+}
+
+static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu)
+{
+	return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
+}
+
+static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_EA;
+}
+
+static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW;
+}
+
+/* Get Access Size from a data abort */
+static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu)
+{
+	switch ((kvm_vcpu_get_hsr(vcpu) >> 22) & 0x3) {
+	case 0:
+		return 1;
+	case 1:
+		return 2;
+	case 2:
+		return 4;
+	default:
+		kvm_err("Hardware is weird: SAS 0b11 is reserved\n");
+		return -EFAULT;
+	}
+}
+
+/* This one is not specific to Data Abort */
+static inline bool kvm_vcpu_trap_il_is32bit(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_IL;
+}
+
+static inline u8 kvm_vcpu_trap_get_class(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) >> HSR_EC_SHIFT;
+}
+
+static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_trap_get_class(vcpu) == HSR_EC_IABT;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
+}
+
+static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index d1736a53b12d..0c4e643d939e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -80,6 +80,15 @@ struct kvm_mmu_memory_cache {
 	void *objects[KVM_NR_MEM_OBJS];
 };
 
+struct kvm_vcpu_fault_info {
+	u32 hsr;		/* Hyp Syndrome Register */
+	u32 hxfar;		/* Hyp Data/Inst. Fault Address Register */
+	u32 hpfar;		/* Hyp IPA Fault Address Register */
+	u32 hyp_pc;		/* PC when exception was taken from Hyp mode */
+};
+
+typedef struct vfp_hard_struct kvm_kernel_vfp_t;
+
 struct kvm_vcpu_arch {
 	struct kvm_regs regs;
 
@@ -93,13 +102,11 @@ struct kvm_vcpu_arch {
 	u32 midr;
 
 	/* Exception Information */
-	u32 hsr;		/* Hyp Syndrome Register */
-	u32 hxfar;		/* Hyp Data/Inst Fault Address Register */
-	u32 hpfar;		/* Hyp IPA Fault Address Register */
+	struct kvm_vcpu_fault_info fault;
 
 	/* Floating point registers (VFP and Advanced SIMD/NEON) */
-	struct vfp_hard_struct vfp_guest;
-	struct vfp_hard_struct *vfp_host;
+	kvm_kernel_vfp_t vfp_guest;
+	kvm_kernel_vfp_t *vfp_host;
 
 	/* VGIC state */
 	struct vgic_cpu vgic_cpu;
@@ -122,9 +129,6 @@ struct kvm_vcpu_arch {
 	/* Interrupt related fields */
 	u32 irq_lines;		/* IRQ and FIQ levels */
 
-	/* Hyp exception information */
-	u32 hyp_pc;		/* PC when exception was taken from Hyp mode */
-
 	/* Cache some mmu pages needed inside spinlock regions */
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
@@ -181,4 +185,26 @@ struct kvm_one_reg;
 int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		int exception_index);
+
+static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr,
+				       unsigned long hyp_stack_ptr,
+				       unsigned long vector_ptr)
+{
+	unsigned long pgd_low, pgd_high;
+
+	pgd_low = (pgd_ptr & ((1ULL << 32) - 1));
+	pgd_high = (pgd_ptr >> 32ULL);
+
+	/*
+	 * Call initialization code, and switch to the full blown
+	 * HYP code. The init code doesn't need to preserve these registers as
+	 * r1-r3 and r12 are already callee save according to the AAPCS.
+	 * Note that we slightly misuse the prototype by casing the pgd_low to
+	 * a void *.
+	 */
+	kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
+}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 421a20b34874..970f3b5fa109 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -19,6 +19,18 @@
 #ifndef __ARM_KVM_MMU_H__
 #define __ARM_KVM_MMU_H__
 
+#include <asm/cacheflush.h>
+#include <asm/pgalloc.h>
+#include <asm/idmap.h>
+
+/*
+ * We directly use the kernel VA for the HYP, as we can directly share
+ * the mapping (HTTBR "covers" TTBR1).
+ */
+#define HYP_PAGE_OFFSET_MASK	(~0UL)
+#define HYP_PAGE_OFFSET		PAGE_OFFSET
+#define KERN_TO_HYP(kva)	(kva)
+
 int create_hyp_mappings(void *from, void *to);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_hyp_pmds(void);
@@ -36,6 +48,16 @@ phys_addr_t kvm_mmu_get_httbr(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
+static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
+{
+	pte_val(*pte) = new_pte;
+	/*
+	 * flush_pmd_entry just takes a void pointer and cleans the necessary
+	 * cache entries, so we can reuse the function for ptes.
+	 */
+	flush_pmd_entry(pte);
+}
+
 static inline bool kvm_is_write_fault(unsigned long hsr)
 {
 	unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
@@ -47,4 +69,49 @@ static inline bool kvm_is_write_fault(unsigned long hsr)
 		return true;
 }
 
+static inline void kvm_clean_pgd(pgd_t *pgd)
+{
+	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
+}
+
+static inline void kvm_clean_pmd_entry(pmd_t *pmd)
+{
+	clean_pmd_entry(pmd);
+}
+
+static inline void kvm_clean_pte(pte_t *pte)
+{
+	clean_pte_table(pte);
+}
+
+static inline void kvm_set_s2pte_writable(pte_t *pte)
+{
+	pte_val(*pte) |= L_PTE_S2_RDWR;
+}
+
+struct kvm;
+
+static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+{
+	/*
+	 * If we are going to insert an instruction page and the icache is
+	 * either VIPT or PIPT, there is a potential problem where the host
+	 * (or another VM) may have used the same page as this guest, and we
+	 * read incorrect data from the icache.  If we're using a PIPT cache,
+	 * we can invalidate just that page, but if we are using a VIPT cache
+	 * we need to invalidate the entire icache - damn shame - as written
+	 * in the ARM ARM (DDI 0406C.b - Page B3-1393).
+	 *
+	 * VIVT caches are tagged using both the ASID and the VMID and doesn't
+	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
+	 */
+	if (icache_is_pipt()) {
+		unsigned long hva = gfn_to_hva(kvm, gfn);
+		__cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
+	} else if (!icache_is_vivt_asid_tagged()) {
+		/* any kind of VIPT cache */
+		__flush_icache_all();
+	}
+}
+
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index ab97207d9cd3..343744e4809c 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -21,7 +21,6 @@
 
 #include <linux/kernel.h>
 #include <linux/kvm.h>
-#include <linux/kvm_host.h>
 #include <linux/irqreturn.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 0f7b7620e9a5..7626a7fd4938 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -42,6 +42,14 @@ extern void mcpm_entry_point(void);
 void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr);
 
 /*
+ * This sets an early poke i.e a value to be poked into some address
+ * from very early assembly code before the CPU is ungated.  The
+ * address must be physical, and if 0 then nothing will happen.
+ */
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+			 unsigned long poke_phys_addr, unsigned long poke_val);
+
+/*
  * CPU/cluster power operations API for higher subsystems to use.
  */
 
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index d3a22bebe6ce..610ccf33f5e7 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -81,6 +81,8 @@ extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
 
+extern int register_ipi_completion(struct completion *completion, int cpu);
+
 struct smp_operations {
 #ifdef CONFIG_SMP
 	/*
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 023bfeb367bf..c1ee007523d7 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -53,12 +53,12 @@
 #define KVM_ARM_FIQ_spsr	fiq_regs[7]
 
 struct kvm_regs {
-	struct pt_regs usr_regs;/* R0_usr - R14_usr, PC, CPSR */
-	__u32 svc_regs[3];	/* SP_svc, LR_svc, SPSR_svc */
-	__u32 abt_regs[3];	/* SP_abt, LR_abt, SPSR_abt */
-	__u32 und_regs[3];	/* SP_und, LR_und, SPSR_und */
-	__u32 irq_regs[3];	/* SP_irq, LR_irq, SPSR_irq */
-	__u32 fiq_regs[8];	/* R8_fiq - R14_fiq, SPSR_fiq */
+	struct pt_regs usr_regs;	/* R0_usr - R14_usr, PC, CPSR */
+	unsigned long svc_regs[3];	/* SP_svc, LR_svc, SPSR_svc */
+	unsigned long abt_regs[3];	/* SP_abt, LR_abt, SPSR_abt */
+	unsigned long und_regs[3];	/* SP_und, LR_und, SPSR_und */
+	unsigned long irq_regs[3];	/* SP_irq, LR_irq, SPSR_irq */
+	unsigned long fiq_regs[8];	/* R8_fiq - R14_fiq, SPSR_fiq */
 };
 
 /* Supported Processor Types */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 3f088225e71c..a53efa993690 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -169,10 +169,10 @@ int main(void)
   DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
   DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
-  DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.hsr));
-  DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.hxfar));
-  DEFINE(VCPU_HPFAR,		offsetof(struct kvm_vcpu, arch.hpfar));
-  DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.hyp_pc));
+  DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.fault.hsr));
+  DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.fault.hxfar));
+  DEFINE(VCPU_HPFAR,		offsetof(struct kvm_vcpu, arch.fault.hpfar));
+  DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
 #ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
   DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 854bd22380d3..5b391a689b47 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -98,8 +98,9 @@ __mmap_switched:
 	str	r9, [r4]			@ Save processor ID
 	str	r1, [r5]			@ Save machine type
 	str	r2, [r6]			@ Save atags pointer
-	bic	r4, r0, #CR_A			@ Clear 'A' bit
-	stmia	r7, {r0, r4}			@ Save control register values
+	cmp	r7, #0
+	bicne	r4, r0, #CR_A			@ Clear 'A' bit
+	stmneia	r7, {r0, r4}			@ Save control register values
 	b	start_kernel
 ENDPROC(__mmap_switched)
 
@@ -113,7 +114,11 @@ __mmap_switched_data:
 	.long	processor_id			@ r4
 	.long	__machine_arch_type		@ r5
 	.long	__atags_pointer			@ r6
+#ifdef CONFIG_CPU_CP15
 	.long	cr_alignment			@ r7
+#else
+	.long	0				@ r7
+#endif
 	.long	init_thread_union + THREAD_START_SP @ sp
 	.size	__mmap_switched_data, . - __mmap_switched_data
 
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 2c228a07e58c..6a2e09c952c7 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -32,15 +32,21 @@
  * numbers for r1.
  *
  */
-	.arm
 
 	__HEAD
+
+#ifdef CONFIG_CPU_THUMBONLY
+	.thumb
+ENTRY(stext)
+#else
+	.arm
 ENTRY(stext)
 
  THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is always entered in ARM.
  THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
  THUMB(	.thumb			)	@ switch to Thumb now.
  THUMB(1:			)
+#endif
 
 	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 						@ and irqs disabled
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 3f0d5e969ef0..7c9fd36be1d7 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -303,10 +303,10 @@ static int cpu_has_aliasing_icache(unsigned int arch)
 
 static void __init cacheid_init(void)
 {
-	unsigned int cachetype = read_cpuid_cachetype();
 	unsigned int arch = cpu_architecture();
 
 	if (arch >= CPU_ARCH_ARMv6) {
+		unsigned int cachetype = read_cpuid_cachetype();
 		if ((cachetype & (7 << 29)) == 4 << 29) {
 			/* ARMv7 register format */
 			arch = CPU_ARCH_ARMv7;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 1f2ccccaf009..f40fb0dcb3b0 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -66,6 +66,7 @@ enum ipi_msg_type {
 	IPI_CALL_FUNC,
 	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP,
+	IPI_COMPLETION,
 };
 
 static DECLARE_COMPLETION(cpu_running);
@@ -429,6 +430,7 @@ static const char *ipi_types[NR_IPI] = {
 	S(IPI_CALL_FUNC, "Function call interrupts"),
 	S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
 	S(IPI_CPU_STOP, "CPU stop interrupts"),
+	S(IPI_COMPLETION, "completion interrupts"),
 };
 
 void show_ipi_list(struct seq_file *p, int prec)
@@ -554,6 +556,19 @@ static void ipi_cpu_stop(unsigned int cpu)
 		cpu_relax();
 }
 
+static DEFINE_PER_CPU(struct completion *, cpu_completion);
+
+int register_ipi_completion(struct completion *completion, int cpu)
+{
+	per_cpu(cpu_completion, cpu) = completion;
+	return IPI_COMPLETION;
+}
+
+static void ipi_complete(unsigned int cpu)
+{
+	complete(per_cpu(cpu_completion, cpu));
+}
+
 /*
  * Main handler for inter-processor interrupts
  */
@@ -604,6 +619,12 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 		irq_exit();
 		break;
 
+	case IPI_COMPLETION:
+		irq_enter();
+		ipi_complete(cpu);
+		irq_exit();
+		break;
+
 	default:
 		printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
 		       cpu, ipinr);
diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
index 45eac87ed66a..5bc1a63284e3 100644
--- a/arch/arm/kernel/smp_scu.c
+++ b/arch/arm/kernel/smp_scu.c
@@ -41,7 +41,7 @@ void scu_enable(void __iomem *scu_base)
 
 #ifdef CONFIG_ARM_ERRATA_764369
 	/* Cortex-A9 only */
-	if ((read_cpuid(CPUID_ID) & 0xff0ffff0) == 0x410fc090) {
+	if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090) {
 		scu_ctrl = __raw_readl(scu_base + 0x30);
 		if (!(scu_ctrl & 1))
 			__raw_writel(scu_ctrl | 0x1, scu_base + 0x30);
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index fc96ce6f2357..8dc5e76cb789 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -17,7 +17,7 @@ AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 obj-y += kvm-arm.o init.o interrupts.o
-obj-y += arm.o guest.o mmu.o emulate.o reset.o
+obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o mmio.o psci.o
 obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
 obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c1fe498983ac..a0dfc2a53f91 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -30,11 +30,9 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-#include <asm/unified.h>
 #include <asm/uaccess.h>
 #include <asm/ptrace.h>
 #include <asm/mman.h>
-#include <asm/cputype.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <asm/virt.h>
@@ -44,14 +42,13 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_psci.h>
-#include <asm/opcodes.h>
 
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension	virt");
 #endif
 
 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-static struct vfp_hard_struct __percpu *kvm_host_vfp_state;
+static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state;
 static unsigned long hyp_default_vectors;
 
 /* Per-CPU variable containing the currently running vcpu. */
@@ -304,22 +301,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-int __attribute_const__ kvm_target_cpu(void)
-{
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
-
-	if (implementor != ARM_CPU_IMP_ARM)
-		return -EINVAL;
-
-	switch (part_number) {
-	case ARM_CPU_PART_CORTEX_A15:
-		return KVM_ARM_TARGET_CORTEX_A15;
-	default:
-		return -EINVAL;
-	}
-}
-
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	int ret;
@@ -482,163 +463,6 @@ static void update_vttbr(struct kvm *kvm)
 	spin_unlock(&kvm_vmid_lock);
 }
 
-static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	/* SVC called from Hyp mode should never get here */
-	kvm_debug("SVC called from Hyp mode shouldn't go here\n");
-	BUG();
-	return -EINVAL; /* Squash warning */
-}
-
-static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
-		      vcpu->arch.hsr & HSR_HVC_IMM_MASK);
-
-	if (kvm_psci_call(vcpu))
-		return 1;
-
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
-static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	if (kvm_psci_call(vcpu))
-		return 1;
-
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
-
-static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	/* The hypervisor should never cause aborts */
-	kvm_err("Prefetch Abort taken from Hyp mode at %#08x (HSR: %#08x)\n",
-		vcpu->arch.hxfar, vcpu->arch.hsr);
-	return -EFAULT;
-}
-
-static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-	/* This is either an error in the ws. code or an external abort */
-	kvm_err("Data Abort taken from Hyp mode at %#08x (HSR: %#08x)\n",
-		vcpu->arch.hxfar, vcpu->arch.hsr);
-	return -EFAULT;
-}
-
-typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
-static exit_handle_fn arm_exit_handlers[] = {
-	[HSR_EC_WFI]		= kvm_handle_wfi,
-	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
-	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
-	[HSR_EC_CP14_MR]	= kvm_handle_cp14_access,
-	[HSR_EC_CP14_LS]	= kvm_handle_cp14_load_store,
-	[HSR_EC_CP14_64]	= kvm_handle_cp14_access,
-	[HSR_EC_CP_0_13]	= kvm_handle_cp_0_13_access,
-	[HSR_EC_CP10_ID]	= kvm_handle_cp10_id,
-	[HSR_EC_SVC_HYP]	= handle_svc_hyp,
-	[HSR_EC_HVC]		= handle_hvc,
-	[HSR_EC_SMC]		= handle_smc,
-	[HSR_EC_IABT]		= kvm_handle_guest_abort,
-	[HSR_EC_IABT_HYP]	= handle_pabt_hyp,
-	[HSR_EC_DABT]		= kvm_handle_guest_abort,
-	[HSR_EC_DABT_HYP]	= handle_dabt_hyp,
-};
-
-/*
- * A conditional instruction is allowed to trap, even though it
- * wouldn't be executed.  So let's re-implement the hardware, in
- * software!
- */
-static bool kvm_condition_valid(struct kvm_vcpu *vcpu)
-{
-	unsigned long cpsr, cond, insn;
-
-	/*
-	 * Exception Code 0 can only happen if we set HCR.TGE to 1, to
-	 * catch undefined instructions, and then we won't get past
-	 * the arm_exit_handlers test anyway.
-	 */
-	BUG_ON(((vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT) == 0);
-
-	/* Top two bits non-zero?  Unconditional. */
-	if (vcpu->arch.hsr >> 30)
-		return true;
-
-	cpsr = *vcpu_cpsr(vcpu);
-
-	/* Is condition field valid? */
-	if ((vcpu->arch.hsr & HSR_CV) >> HSR_CV_SHIFT)
-		cond = (vcpu->arch.hsr & HSR_COND) >> HSR_COND_SHIFT;
-	else {
-		/* This can happen in Thumb mode: examine IT state. */
-		unsigned long it;
-
-		it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
-
-		/* it == 0 => unconditional. */
-		if (it == 0)
-			return true;
-
-		/* The cond for this insn works out as the top 4 bits. */
-		cond = (it >> 4);
-	}
-
-	/* Shift makes it look like an ARM-mode instruction */
-	insn = cond << 28;
-	return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL;
-}
-
-/*
- * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
- * proper exit to QEMU.
- */
-static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
-		       int exception_index)
-{
-	unsigned long hsr_ec;
-
-	switch (exception_index) {
-	case ARM_EXCEPTION_IRQ:
-		return 1;
-	case ARM_EXCEPTION_UNDEFINED:
-		kvm_err("Undefined exception in Hyp mode at: %#08x\n",
-			vcpu->arch.hyp_pc);
-		BUG();
-		panic("KVM: Hypervisor undefined exception!\n");
-	case ARM_EXCEPTION_DATA_ABORT:
-	case ARM_EXCEPTION_PREF_ABORT:
-	case ARM_EXCEPTION_HVC:
-		hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT;
-
-		if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers)
-		    || !arm_exit_handlers[hsr_ec]) {
-			kvm_err("Unkown exception class: %#08lx, "
-				"hsr: %#08x\n", hsr_ec,
-				(unsigned int)vcpu->arch.hsr);
-			BUG();
-		}
-
-		/*
-		 * See ARM ARM B1.14.1: "Hyp traps on instructions
-		 * that fail their condition code check"
-		 */
-		if (!kvm_condition_valid(vcpu)) {
-			bool is_wide = vcpu->arch.hsr & HSR_IL;
-			kvm_skip_instr(vcpu, is_wide);
-			return 1;
-		}
-
-		return arm_exit_handlers[hsr_ec](vcpu, run);
-	default:
-		kvm_pr_unimpl("Unsupported exception type: %d",
-			      exception_index);
-		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-		return 0;
-	}
-}
-
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
 	if (likely(vcpu->arch.has_run_once))
@@ -973,7 +797,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 static void cpu_init_hyp_mode(void *vector)
 {
 	unsigned long long pgd_ptr;
-	unsigned long pgd_low, pgd_high;
 	unsigned long hyp_stack_ptr;
 	unsigned long stack_page;
 	unsigned long vector_ptr;
@@ -982,20 +805,11 @@ static void cpu_init_hyp_mode(void *vector)
 	__hyp_set_vectors((unsigned long)vector);
 
 	pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
-	pgd_low = (pgd_ptr & ((1ULL << 32) - 1));
-	pgd_high = (pgd_ptr >> 32ULL);
 	stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
 	vector_ptr = (unsigned long)__kvm_hyp_vector;
 
-	/*
-	 * Call initialization code, and switch to the full blown
-	 * HYP code. The init code doesn't need to preserve these registers as
-	 * r1-r3 and r12 are already callee save according to the AAPCS.
-	 * Note that we slightly misuse the prototype by casing the pgd_low to
-	 * a void *.
-	 */
-	kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
+	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
 }
 
 /**
@@ -1078,7 +892,7 @@ static int init_hyp_mode(void)
 	/*
 	 * Map the host VFP structures
 	 */
-	kvm_host_vfp_state = alloc_percpu(struct vfp_hard_struct);
+	kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t);
 	if (!kvm_host_vfp_state) {
 		err = -ENOMEM;
 		kvm_err("Cannot allocate host VFP state\n");
@@ -1086,7 +900,7 @@ static int init_hyp_mode(void)
 	}
 
 	for_each_possible_cpu(cpu) {
-		struct vfp_hard_struct *vfp;
+		kvm_kernel_vfp_t *vfp;
 
 		vfp = per_cpu_ptr(kvm_host_vfp_state, cpu);
 		err = create_hyp_mappings(vfp, vfp + 1);
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 7bed7556077a..8eea97be1ed5 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -76,7 +76,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
 			const struct coproc_params *p,
 			const struct coproc_reg *r)
 {
-	u32 val;
+	unsigned long val;
 	int cpu;
 
 	if (!p->is_write)
@@ -293,12 +293,12 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
 
 		if (likely(r->access(vcpu, params, r))) {
 			/* Skip instruction, since it was emulated */
-			kvm_skip_instr(vcpu, (vcpu->arch.hsr >> 25) & 1);
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
 			return 1;
 		}
 		/* If access function fails, it should complain. */
 	} else {
-		kvm_err("Unsupported guest CP15 access at: %08x\n",
+		kvm_err("Unsupported guest CP15 access at: %08lx\n",
 			*vcpu_pc(vcpu));
 		print_cp_instr(params);
 	}
@@ -315,14 +315,14 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	struct coproc_params params;
 
-	params.CRm = (vcpu->arch.hsr >> 1) & 0xf;
-	params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf;
-	params.is_write = ((vcpu->arch.hsr & 1) == 0);
+	params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
+	params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
+	params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
 	params.is_64bit = true;
 
-	params.Op1 = (vcpu->arch.hsr >> 16) & 0xf;
+	params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
 	params.Op2 = 0;
-	params.Rt2 = (vcpu->arch.hsr >> 10) & 0xf;
+	params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
 	params.CRn = 0;
 
 	return emulate_cp15(vcpu, &params);
@@ -347,14 +347,14 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	struct coproc_params params;
 
-	params.CRm = (vcpu->arch.hsr >> 1) & 0xf;
-	params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf;
-	params.is_write = ((vcpu->arch.hsr & 1) == 0);
+	params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
+	params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
+	params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
 	params.is_64bit = false;
 
-	params.CRn = (vcpu->arch.hsr >> 10) & 0xf;
-	params.Op1 = (vcpu->arch.hsr >> 14) & 0x7;
-	params.Op2 = (vcpu->arch.hsr >> 17) & 0x7;
+	params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
+	params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 14) & 0x7;
+	params.Op2 = (kvm_vcpu_get_hsr(vcpu) >> 17) & 0x7;
 	params.Rt2 = 0;
 
 	return emulate_cp15(vcpu, &params);
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 992adfafa2ff..b7301d3e4799 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -84,7 +84,7 @@ static inline bool read_zero(struct kvm_vcpu *vcpu,
 static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
 				      const struct coproc_params *params)
 {
-	kvm_debug("CP15 write to read-only register at: %08x\n",
+	kvm_debug("CP15 write to read-only register at: %08lx\n",
 		  *vcpu_pc(vcpu));
 	print_cp_instr(params);
 	return false;
@@ -93,7 +93,7 @@ static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
 static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
 					const struct coproc_params *params)
 {
-	kvm_debug("CP15 read to write-only register at: %08x\n",
+	kvm_debug("CP15 read to write-only register at: %08lx\n",
 		  *vcpu_pc(vcpu));
 	print_cp_instr(params);
 	return false;
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index d61450ac6665..bdede9e7da51 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -20,6 +20,7 @@
 #include <linux/kvm_host.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_emulate.h>
+#include <asm/opcodes.h>
 #include <trace/events/kvm.h>
 
 #include "trace.h"
@@ -109,10 +110,10 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = {
  * Return a pointer to the register number valid in the current mode of
  * the virtual CPU.
  */
-u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
+unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
 {
-	u32 *reg_array = (u32 *)&vcpu->arch.regs;
-	u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK;
+	unsigned long *reg_array = (unsigned long *)&vcpu->arch.regs;
+	unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
 
 	switch (mode) {
 	case USR_MODE...SVC_MODE:
@@ -141,9 +142,9 @@ u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
 /*
  * Return the SPSR for the current mode of the virtual CPU.
  */
-u32 *vcpu_spsr(struct kvm_vcpu *vcpu)
+unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
 {
-	u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK;
+	unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
 	switch (mode) {
 	case SVC_MODE:
 		return &vcpu->arch.regs.KVM_ARM_SVC_spsr;
@@ -160,20 +161,48 @@ u32 *vcpu_spsr(struct kvm_vcpu *vcpu)
 	}
 }
 
-/**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
- * @vcpu:	the vcpu pointer
- * @run:	the kvm_run structure pointer
- *
- * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
- * halt execution of world-switches and schedule other host processes until
- * there is an incoming IRQ or FIQ to the VM.
+/*
+ * A conditional instruction is allowed to trap, even though it
+ * wouldn't be executed.  So let's re-implement the hardware, in
+ * software!
  */
-int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+bool kvm_condition_valid(struct kvm_vcpu *vcpu)
 {
-	trace_kvm_wfi(*vcpu_pc(vcpu));
-	kvm_vcpu_block(vcpu);
-	return 1;
+	unsigned long cpsr, cond, insn;
+
+	/*
+	 * Exception Code 0 can only happen if we set HCR.TGE to 1, to
+	 * catch undefined instructions, and then we won't get past
+	 * the arm_exit_handlers test anyway.
+	 */
+	BUG_ON(!kvm_vcpu_trap_get_class(vcpu));
+
+	/* Top two bits non-zero?  Unconditional. */
+	if (kvm_vcpu_get_hsr(vcpu) >> 30)
+		return true;
+
+	cpsr = *vcpu_cpsr(vcpu);
+
+	/* Is condition field valid? */
+	if ((kvm_vcpu_get_hsr(vcpu) & HSR_CV) >> HSR_CV_SHIFT)
+		cond = (kvm_vcpu_get_hsr(vcpu) & HSR_COND) >> HSR_COND_SHIFT;
+	else {
+		/* This can happen in Thumb mode: examine IT state. */
+		unsigned long it;
+
+		it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
+
+		/* it == 0 => unconditional. */
+		if (it == 0)
+			return true;
+
+		/* The cond for this insn works out as the top 4 bits. */
+		cond = (it >> 4);
+	}
+
+	/* Shift makes it look like an ARM-mode instruction */
+	insn = cond << 28;
+	return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL;
 }
 
 /**
@@ -257,9 +286,9 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
  */
 void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 {
-	u32 new_lr_value;
-	u32 new_spsr_value;
-	u32 cpsr = *vcpu_cpsr(vcpu);
+	unsigned long new_lr_value;
+	unsigned long new_spsr_value;
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
 	bool is_thumb = (cpsr & PSR_T_BIT);
 	u32 vect_offset = 4;
@@ -291,9 +320,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
  */
 static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
 {
-	u32 new_lr_value;
-	u32 new_spsr_value;
-	u32 cpsr = *vcpu_cpsr(vcpu);
+	unsigned long new_lr_value;
+	unsigned long new_spsr_value;
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
 	bool is_thumb = (cpsr & PSR_T_BIT);
 	u32 vect_offset;
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 2339d9609d36..152d03612181 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
+#include <asm/cputype.h>
 #include <asm/uaccess.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
@@ -180,6 +181,22 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return -EINVAL;
 }
 
+int __attribute_const__ kvm_target_cpu(void)
+{
+	unsigned long implementor = read_cpuid_implementor();
+	unsigned long part_number = read_cpuid_part_number();
+
+	if (implementor != ARM_CPU_IMP_ARM)
+		return -EINVAL;
+
+	switch (part_number) {
+	case ARM_CPU_PART_CORTEX_A15:
+		return KVM_ARM_TARGET_CORTEX_A15;
+	default:
+		return -EINVAL;
+	}
+}
+
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init)
 {
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
new file mode 100644
index 000000000000..26ad17310a1e
--- /dev/null
+++ b/arch/arm/kvm/handle_exit.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_psci.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+#include "trace.h"
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+
+static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/* SVC called from Hyp mode should never get here */
+	kvm_debug("SVC called from Hyp mode shouldn't go here\n");
+	BUG();
+	return -EINVAL; /* Squash warning */
+}
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
+		      kvm_vcpu_hvc_get_imm(vcpu));
+
+	if (kvm_psci_call(vcpu))
+		return 1;
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	if (kvm_psci_call(vcpu))
+		return 1;
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/* The hypervisor should never cause aborts */
+	kvm_err("Prefetch Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n",
+		kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu));
+	return -EFAULT;
+}
+
+static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/* This is either an error in the ws. code or an external abort */
+	kvm_err("Data Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n",
+		kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu));
+	return -EFAULT;
+}
+
+/**
+ * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * @vcpu:	the vcpu pointer
+ * @run:	the kvm_run structure pointer
+ *
+ * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
+ * halt execution of world-switches and schedule other host processes until
+ * there is an incoming IRQ or FIQ to the VM.
+ */
+static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	trace_kvm_wfi(*vcpu_pc(vcpu));
+	kvm_vcpu_block(vcpu);
+	return 1;
+}
+
+static exit_handle_fn arm_exit_handlers[] = {
+	[HSR_EC_WFI]		= kvm_handle_wfi,
+	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
+	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
+	[HSR_EC_CP14_MR]	= kvm_handle_cp14_access,
+	[HSR_EC_CP14_LS]	= kvm_handle_cp14_load_store,
+	[HSR_EC_CP14_64]	= kvm_handle_cp14_access,
+	[HSR_EC_CP_0_13]	= kvm_handle_cp_0_13_access,
+	[HSR_EC_CP10_ID]	= kvm_handle_cp10_id,
+	[HSR_EC_SVC_HYP]	= handle_svc_hyp,
+	[HSR_EC_HVC]		= handle_hvc,
+	[HSR_EC_SMC]		= handle_smc,
+	[HSR_EC_IABT]		= kvm_handle_guest_abort,
+	[HSR_EC_IABT_HYP]	= handle_pabt_hyp,
+	[HSR_EC_DABT]		= kvm_handle_guest_abort,
+	[HSR_EC_DABT_HYP]	= handle_dabt_hyp,
+};
+
+static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
+{
+	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+
+	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
+	    !arm_exit_handlers[hsr_ec]) {
+		kvm_err("Unkown exception class: hsr: %#08x\n",
+			(unsigned int)kvm_vcpu_get_hsr(vcpu));
+		BUG();
+	}
+
+	return arm_exit_handlers[hsr_ec];
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		       int exception_index)
+{
+	exit_handle_fn exit_handler;
+
+	switch (exception_index) {
+	case ARM_EXCEPTION_IRQ:
+		return 1;
+	case ARM_EXCEPTION_UNDEFINED:
+		kvm_err("Undefined exception in Hyp mode at: %#08lx\n",
+			kvm_vcpu_get_hyp_pc(vcpu));
+		BUG();
+		panic("KVM: Hypervisor undefined exception!\n");
+	case ARM_EXCEPTION_DATA_ABORT:
+	case ARM_EXCEPTION_PREF_ABORT:
+	case ARM_EXCEPTION_HVC:
+		/*
+		 * See ARM ARM B1.14.1: "Hyp traps on instructions
+		 * that fail their condition code check"
+		 */
+		if (!kvm_condition_valid(vcpu)) {
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+
+		exit_handler = kvm_get_exit_handler(vcpu);
+
+		return exit_handler(vcpu, run);
+	default:
+		kvm_pr_unimpl("Unsupported exception type: %d",
+			      exception_index);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return 0;
+	}
+}
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 8ca87ab0919d..f7793df62f58 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -35,15 +35,18 @@ __kvm_hyp_code_start:
 /********************************************************************
  * Flush per-VMID TLBs
  *
- * void __kvm_tlb_flush_vmid(struct kvm *kvm);
+ * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
  *
  * We rely on the hardware to broadcast the TLB invalidation to all CPUs
  * inside the inner-shareable domain (which is the case for all v7
  * implementations).  If we come across a non-IS SMP implementation, we'll
  * have to use an IPI based mechanism. Until then, we stick to the simple
  * hardware assisted version.
+ *
+ * As v7 does not support flushing per IPA, just nuke the whole TLB
+ * instead, ignoring the ipa value.
  */
-ENTRY(__kvm_tlb_flush_vmid)
+ENTRY(__kvm_tlb_flush_vmid_ipa)
 	push	{r2, r3}
 
 	add	r0, r0, #KVM_VTTBR
@@ -60,7 +63,7 @@ ENTRY(__kvm_tlb_flush_vmid)
 
 	pop	{r2, r3}
 	bx	lr
-ENDPROC(__kvm_tlb_flush_vmid)
+ENDPROC(__kvm_tlb_flush_vmid_ipa)
 
 /********************************************************************
  * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
@@ -235,9 +238,9 @@ ENTRY(kvm_call_hyp)
  * instruction is issued since all traps are disabled when running the host
  * kernel as per the Hyp-mode initialization at boot time.
  *
- * HVC instructions cause a trap to the vector page + offset 0x18 (see hyp_hvc
+ * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
  * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
- * host kernel) and they cause a trap to the vector page + offset 0xc when HVC
+ * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
  * instructions are called from within Hyp-mode.
  *
  * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 98a870ff1a5c..72a12f2171b2 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -33,16 +33,16 @@
  */
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	__u32 *dest;
+	unsigned long *dest;
 	unsigned int len;
 	int mask;
 
 	if (!run->mmio.is_write) {
 		dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt);
-		memset(dest, 0, sizeof(int));
+		*dest = 0;
 
 		len = run->mmio.len;
-		if (len > 4)
+		if (len > sizeof(unsigned long))
 			return -EINVAL;
 
 		memcpy(dest, run->mmio.data, len);
@@ -50,7 +50,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
 				*((u64 *)run->mmio.data));
 
-		if (vcpu->arch.mmio_decode.sign_extend && len < 4) {
+		if (vcpu->arch.mmio_decode.sign_extend &&
+		    len < sizeof(unsigned long)) {
 			mask = 1U << ((len * 8) - 1);
 			*dest = (*dest ^ mask) - mask;
 		}
@@ -65,40 +66,29 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	unsigned long rt, len;
 	bool is_write, sign_extend;
 
-	if ((vcpu->arch.hsr >> 8) & 1) {
+	if (kvm_vcpu_dabt_isextabt(vcpu)) {
 		/* cache operation on I/O addr, tell guest unsupported */
-		kvm_inject_dabt(vcpu, vcpu->arch.hxfar);
+		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
 		return 1;
 	}
 
-	if ((vcpu->arch.hsr >> 7) & 1) {
+	if (kvm_vcpu_dabt_iss1tw(vcpu)) {
 		/* page table accesses IO mem: tell guest to fix its TTBR */
-		kvm_inject_dabt(vcpu, vcpu->arch.hxfar);
+		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
 		return 1;
 	}
 
-	switch ((vcpu->arch.hsr >> 22) & 0x3) {
-	case 0:
-		len = 1;
-		break;
-	case 1:
-		len = 2;
-		break;
-	case 2:
-		len = 4;
-		break;
-	default:
-		kvm_err("Hardware is weird: SAS 0b11 is reserved\n");
-		return -EFAULT;
-	}
+	len = kvm_vcpu_dabt_get_as(vcpu);
+	if (unlikely(len < 0))
+		return len;
 
-	is_write = vcpu->arch.hsr & HSR_WNR;
-	sign_extend = vcpu->arch.hsr & HSR_SSE;
-	rt = (vcpu->arch.hsr & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
+	is_write = kvm_vcpu_dabt_iswrite(vcpu);
+	sign_extend = kvm_vcpu_dabt_issext(vcpu);
+	rt = kvm_vcpu_dabt_get_rd(vcpu);
 
 	if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
 		/* IO memory trying to read/write pc */
-		kvm_inject_pabt(vcpu, vcpu->arch.hxfar);
+		kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
 		return 1;
 	}
 
@@ -112,7 +102,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 * The MMIO instruction is emulated and should not be re-executed
 	 * in the guest.
 	 */
-	kvm_skip_instr(vcpu, (vcpu->arch.hsr >> 25) & 1);
+	kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
 	return 0;
 }
 
@@ -130,7 +120,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	 * space do its magic.
 	 */
 
-	if (vcpu->arch.hsr & HSR_ISV) {
+	if (kvm_vcpu_dabt_isvalid(vcpu)) {
 		ret = decode_hsr(vcpu, fault_ipa, &mmio);
 		if (ret)
 			return ret;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 99e07c7dd745..2f12e4056408 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -20,7 +20,6 @@
 #include <linux/kvm_host.h>
 #include <linux/io.h>
 #include <trace/events/kvm.h>
-#include <asm/idmap.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/kvm_arm.h>
@@ -28,8 +27,6 @@
 #include <asm/kvm_mmio.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
-#include <asm/mach/map.h>
-#include <trace/events/kvm.h>
 
 #include "trace.h"
 
@@ -37,19 +34,9 @@ extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 
 static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
 
-static void kvm_tlb_flush_vmid(struct kvm *kvm)
+static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
-	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
-}
-
-static void kvm_set_pte(pte_t *pte, pte_t new_pte)
-{
-	pte_val(*pte) = new_pte;
-	/*
-	 * flush_pmd_entry just takes a void pointer and cleans the necessary
-	 * cache entries, so we can reuse the function for ptes.
-	 */
-	flush_pmd_entry(pte);
+	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
 
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -98,33 +85,42 @@ static void free_ptes(pmd_t *pmd, unsigned long addr)
 	}
 }
 
+static void free_hyp_pgd_entry(unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned long hyp_addr = KERN_TO_HYP(addr);
+
+	pgd = hyp_pgd + pgd_index(hyp_addr);
+	pud = pud_offset(pgd, hyp_addr);
+
+	if (pud_none(*pud))
+		return;
+	BUG_ON(pud_bad(*pud));
+
+	pmd = pmd_offset(pud, hyp_addr);
+	free_ptes(pmd, addr);
+	pmd_free(NULL, pmd);
+	pud_clear(pud);
+}
+
 /**
  * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
  *
  * Assumes this is a page table used strictly in Hyp-mode and therefore contains
- * only mappings in the kernel memory area, which is above PAGE_OFFSET.
+ * either mappings in the kernel memory area (above PAGE_OFFSET), or
+ * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END).
  */
 void free_hyp_pmds(void)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
 	unsigned long addr;
 
 	mutex_lock(&kvm_hyp_pgd_mutex);
-	for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) {
-		pgd = hyp_pgd + pgd_index(addr);
-		pud = pud_offset(pgd, addr);
-
-		if (pud_none(*pud))
-			continue;
-		BUG_ON(pud_bad(*pud));
-
-		pmd = pmd_offset(pud, addr);
-		free_ptes(pmd, addr);
-		pmd_free(NULL, pmd);
-		pud_clear(pud);
-	}
+	for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
+		free_hyp_pgd_entry(addr);
+	for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
+		free_hyp_pgd_entry(addr);
 	mutex_unlock(&kvm_hyp_pgd_mutex);
 }
 
@@ -136,7 +132,9 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
 	struct page *page;
 
 	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
-		pte = pte_offset_kernel(pmd, addr);
+		unsigned long hyp_addr = KERN_TO_HYP(addr);
+
+		pte = pte_offset_kernel(pmd, hyp_addr);
 		BUG_ON(!virt_addr_valid(addr));
 		page = virt_to_page(addr);
 		kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
@@ -151,7 +149,9 @@ static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start,
 	unsigned long addr;
 
 	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
-		pte = pte_offset_kernel(pmd, addr);
+		unsigned long hyp_addr = KERN_TO_HYP(addr);
+
+		pte = pte_offset_kernel(pmd, hyp_addr);
 		BUG_ON(pfn_valid(*pfn_base));
 		kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
 		(*pfn_base)++;
@@ -166,12 +166,13 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
 	unsigned long addr, next;
 
 	for (addr = start; addr < end; addr = next) {
-		pmd = pmd_offset(pud, addr);
+		unsigned long hyp_addr = KERN_TO_HYP(addr);
+		pmd = pmd_offset(pud, hyp_addr);
 
 		BUG_ON(pmd_sect(*pmd));
 
 		if (pmd_none(*pmd)) {
-			pte = pte_alloc_one_kernel(NULL, addr);
+			pte = pte_alloc_one_kernel(NULL, hyp_addr);
 			if (!pte) {
 				kvm_err("Cannot allocate Hyp pte\n");
 				return -ENOMEM;
@@ -206,17 +207,23 @@ static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base)
 	unsigned long addr, next;
 	int err = 0;
 
-	BUG_ON(start > end);
-	if (start < PAGE_OFFSET)
+	if (start >= end)
+		return -EINVAL;
+	/* Check for a valid kernel memory mapping */
+	if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1)))
+		return -EINVAL;
+	/* Check for a valid kernel IO mapping */
+	if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)))
 		return -EINVAL;
 
 	mutex_lock(&kvm_hyp_pgd_mutex);
 	for (addr = start; addr < end; addr = next) {
-		pgd = hyp_pgd + pgd_index(addr);
-		pud = pud_offset(pgd, addr);
+		unsigned long hyp_addr = KERN_TO_HYP(addr);
+		pgd = hyp_pgd + pgd_index(hyp_addr);
+		pud = pud_offset(pgd, hyp_addr);
 
 		if (pud_none_or_clear_bad(pud)) {
-			pmd = pmd_alloc_one(NULL, addr);
+			pmd = pmd_alloc_one(NULL, hyp_addr);
 			if (!pmd) {
 				kvm_err("Cannot allocate Hyp pmd\n");
 				err = -ENOMEM;
@@ -236,12 +243,13 @@ out:
 }
 
 /**
- * create_hyp_mappings - map a kernel virtual address range in Hyp mode
+ * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
  * @from:	The virtual kernel start address of the range
  * @to:		The virtual kernel end address of the range (exclusive)
  *
- * The same virtual address as the kernel virtual address is also used in
- * Hyp-mode mapping to the same underlying physical pages.
+ * The same virtual address as the kernel virtual address is also used
+ * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
+ * physical pages.
  *
  * Note: Wrapping around zero in the "to" address is not supported.
  */
@@ -251,10 +259,13 @@ int create_hyp_mappings(void *from, void *to)
 }
 
 /**
- * create_hyp_io_mappings - map a physical IO range in Hyp mode
- * @from:	The virtual HYP start address of the range
- * @to:		The virtual HYP end address of the range (exclusive)
+ * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
+ * @from:	The kernel start VA of the range
+ * @to:		The kernel end VA of the range (exclusive)
  * @addr:	The physical start address which gets mapped
+ *
+ * The resulting HYP VA is the same as the kernel VA, modulo
+ * HYP_PAGE_OFFSET.
  */
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
 {
@@ -290,7 +301,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
 
 	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
-	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
+	kvm_clean_pgd(pgd);
 	kvm->arch.pgd = pgd;
 
 	return 0;
@@ -422,22 +433,22 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 			return 0; /* ignore calls from kvm_set_spte_hva */
 		pmd = mmu_memory_cache_alloc(cache);
 		pud_populate(NULL, pud, pmd);
-		pmd += pmd_index(addr);
 		get_page(virt_to_page(pud));
-	} else
-		pmd = pmd_offset(pud, addr);
+	}
+
+	pmd = pmd_offset(pud, addr);
 
 	/* Create 2nd stage page table mapping - Level 2 */
 	if (pmd_none(*pmd)) {
 		if (!cache)
 			return 0; /* ignore calls from kvm_set_spte_hva */
 		pte = mmu_memory_cache_alloc(cache);
-		clean_pte_table(pte);
+		kvm_clean_pte(pte);
 		pmd_populate_kernel(NULL, pmd, pte);
-		pte += pte_index(addr);
 		get_page(virt_to_page(pmd));
-	} else
-		pte = pte_offset_kernel(pmd, addr);
+	}
+
+	pte = pte_offset_kernel(pmd, addr);
 
 	if (iomap && pte_present(*pte))
 		return -EFAULT;
@@ -446,7 +457,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 	old_pte = *pte;
 	kvm_set_pte(pte, *new_pte);
 	if (pte_present(old_pte))
-		kvm_tlb_flush_vmid(kvm);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
 	else
 		get_page(virt_to_page(pte));
 
@@ -473,7 +484,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 	pfn = __phys_to_pfn(pa);
 
 	for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
-		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE | L_PTE_S2_RDWR);
+		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
+		kvm_set_s2pte_writable(&pte);
 
 		ret = mmu_topup_memory_cache(&cache, 2, 2);
 		if (ret)
@@ -492,29 +504,6 @@ out:
 	return ret;
 }
 
-static void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
-{
-	/*
-	 * If we are going to insert an instruction page and the icache is
-	 * either VIPT or PIPT, there is a potential problem where the host
-	 * (or another VM) may have used the same page as this guest, and we
-	 * read incorrect data from the icache.  If we're using a PIPT cache,
-	 * we can invalidate just that page, but if we are using a VIPT cache
-	 * we need to invalidate the entire icache - damn shame - as written
-	 * in the ARM ARM (DDI 0406C.b - Page B3-1393).
-	 *
-	 * VIVT caches are tagged using both the ASID and the VMID and doesn't
-	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
-	 */
-	if (icache_is_pipt()) {
-		unsigned long hva = gfn_to_hva(kvm, gfn);
-		__cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
-	} else if (!icache_is_vivt_asid_tagged()) {
-		/* any kind of VIPT cache */
-		__flush_icache_all();
-	}
-}
-
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  gfn_t gfn, struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
@@ -526,7 +515,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	unsigned long mmu_seq;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 
-	write_fault = kvm_is_write_fault(vcpu->arch.hsr);
+	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
 	if (fault_status == FSC_PERM && !write_fault) {
 		kvm_err("Unexpected L2 read permission error\n");
 		return -EFAULT;
@@ -560,7 +549,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
 		goto out_unlock;
 	if (writable) {
-		pte_val(new_pte) |= L_PTE_S2_RDWR;
+		kvm_set_s2pte_writable(&new_pte);
 		kvm_set_pfn_dirty(pfn);
 	}
 	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
@@ -585,7 +574,6 @@ out_unlock:
  */
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	unsigned long hsr_ec;
 	unsigned long fault_status;
 	phys_addr_t fault_ipa;
 	struct kvm_memory_slot *memslot;
@@ -593,18 +581,17 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	gfn_t gfn;
 	int ret, idx;
 
-	hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
-	is_iabt = (hsr_ec == HSR_EC_IABT);
-	fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
+	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 
-	trace_kvm_guest_fault(*vcpu_pc(vcpu), vcpu->arch.hsr,
-			      vcpu->arch.hxfar, fault_ipa);
+	trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
+			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
 
 	/* Check the stage-2 fault is trans. fault or write fault */
-	fault_status = (vcpu->arch.hsr & HSR_FSC_TYPE);
+	fault_status = kvm_vcpu_trap_get_fault(vcpu);
 	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
-		kvm_err("Unsupported fault status: EC=%#lx DFCS=%#lx\n",
-			hsr_ec, fault_status);
+		kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
+			kvm_vcpu_trap_get_class(vcpu), fault_status);
 		return -EFAULT;
 	}
 
@@ -614,7 +601,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
 		if (is_iabt) {
 			/* Prefetch Abort on I/O address */
-			kvm_inject_pabt(vcpu, vcpu->arch.hxfar);
+			kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
 			ret = 1;
 			goto out_unlock;
 		}
@@ -626,8 +613,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			goto out_unlock;
 		}
 
-		/* Adjust page offset */
-		fault_ipa |= vcpu->arch.hxfar & ~PAGE_MASK;
+		/*
+		 * The IPA is reported as [MAX:12], so we need to
+		 * complement it with the bottom 12 bits from the
+		 * faulting VA. This is always 12 bits, irrespective
+		 * of the page size.
+		 */
+		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
 		ret = io_mem_abort(vcpu, run, fault_ipa);
 		goto out_unlock;
 	}
@@ -682,7 +674,7 @@ static void handle_hva_to_gpa(struct kvm *kvm,
 static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
 	unmap_stage2_range(kvm, gpa, PAGE_SIZE);
-	kvm_tlb_flush_vmid(kvm);
+	kvm_tlb_flush_vmid_ipa(kvm, gpa);
 }
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -776,7 +768,7 @@ void kvm_clear_hyp_idmap(void)
 		pmd = pmd_offset(pud, addr);
 
 		pud_clear(pud);
-		clean_pmd_entry(pmd);
+		kvm_clean_pmd_entry(pmd);
 		pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
 	} while (pgd++, addr = next, addr < end);
 }
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 0e4cfe123b38..17c5ac7d10ed 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -1477,7 +1477,7 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
 	if (addr & ~KVM_PHYS_MASK)
 		return -E2BIG;
 
-	if (addr & ~PAGE_MASK)
+	if (addr & (SZ_4K - 1))
 		return -EINVAL;
 
 	mutex_lock(&kvm->lock);
diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c
index 8a68f1ec66b9..577298ed5a44 100644
--- a/arch/arm/mach-omap2/id.c
+++ b/arch/arm/mach-omap2/id.c
@@ -300,7 +300,7 @@ void __init omap3xxx_check_revision(void)
 	 * If the processor type is Cortex-A8 and the revision is 0x0
 	 * it means its Cortex r0p0 which is 3430 ES1.0.
 	 */
-	cpuid = read_cpuid(CPUID_ID);
+	cpuid = read_cpuid_id();
 	if ((((cpuid >> 4) & 0xfff) == 0xc08) && ((cpuid & 0xf) == 0x0)) {
 		omap_revision = OMAP3430_REV_ES1_0;
 		cpu_rev = "1.0";
@@ -460,7 +460,7 @@ void __init omap4xxx_check_revision(void)
 	 * Use ARM register to detect the correct ES version
 	 */
 	if (!rev && (hawkeye != 0xb94e) && (hawkeye != 0xb975)) {
-		idcode = read_cpuid(CPUID_ID);
+		idcode = read_cpuid_id();
 		rev = (idcode & 0xf) - 1;
 	}
 
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index e7a449758ab5..f0897732a962 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -202,7 +202,7 @@ static void __init omap4_smp_init_cpus(void)
 	unsigned int i = 0, ncores = 1, cpu_id;
 
 	/* Use ARM cpuid check here, as SoC detection will not work so early */
-	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
+	cpu_id = read_cpuid_id() & CPU_MASK;
 	if (cpu_id == CPU_CORTEX_A9) {
 		/*
 		 * Currently we can't call ioremap here because
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 4045c4931a30..35955b54944c 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -397,6 +397,13 @@ config CPU_V7
 	select CPU_PABRT_V7
 	select CPU_TLB_V7 if MMU
 
+config CPU_THUMBONLY
+	bool
+	# There are no CPUs available with MMU that don't implement an ARM ISA:
+	depends on !MMU
+	help
+	  Select this if your CPU doesn't support the 32 bit ARM instructions.
+
 # Figure out what processor architecture version we should be using.
 # This defines the compiler instruction set which depends on the machine type.
 config CPU_32v3
@@ -605,7 +612,7 @@ config ARCH_DMA_ADDR_T_64BIT
 	bool
 
 config ARM_THUMB
-	bool "Support Thumb user binaries"
+	bool "Support Thumb user binaries" if !CPU_THUMBONLY
 	depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || CPU_V7 || CPU_FEROCEON
 	default y
 	help
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index db26e2e543f4..6f4585b89078 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -961,12 +961,14 @@ static int __init alignment_init(void)
 		return -ENOMEM;
 #endif
 
+#ifdef CONFIG_CPU_CP15
 	if (cpu_is_v6_unaligned()) {
 		cr_alignment &= ~CR_A;
 		cr_no_alignment &= ~CR_A;
 		set_cr(cr_alignment);
 		ai_usermode = safe_usermode(ai_usermode, false);
 	}
+#endif
 
 	hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN,
 			"alignment exception");
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index a84ff763ac39..e0d8565671a6 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -113,6 +113,7 @@ static struct cachepolicy cache_policies[] __initdata = {
 	}
 };
 
+#ifdef CONFIG_CPU_CP15
 /*
  * These are useful for identifying cache coherency
  * problems by allowing the cache or the cache and
@@ -211,6 +212,22 @@ void adjust_cr(unsigned long mask, unsigned long set)
 }
 #endif
 
+#else /* ifdef CONFIG_CPU_CP15 */
+
+static int __init early_cachepolicy(char *p)
+{
+	pr_warning("cachepolicy kernel parameter not supported without cp15\n");
+}
+early_param("cachepolicy", early_cachepolicy);
+
+static int __init noalign_setup(char *__unused)
+{
+	pr_warning("noalign kernel parameter not supported without cp15\n");
+}
+__setup("noalign", noalign_setup);
+
+#endif /* ifdef CONFIG_CPU_CP15 / else */
+
 #define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
 
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index e2931de32e64..5533a88c96aa 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
+	raw_spin_lock(&irq_controller_lock);
 	mask = 0xff << shift;
 	bit = gic_cpu_map[cpu] << shift;
-
-	raw_spin_lock(&irq_controller_lock);
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
 	raw_spin_unlock(&irq_controller_lock);
@@ -652,7 +651,9 @@ static void __init gic_pm_init(struct gic_chip_data *gic)
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
 	int cpu;
-	unsigned long map = 0;
+	unsigned long flags, map = 0;
+
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 
 	/* Convert our logical CPU mask into a physical one. */
 	for_each_cpu(cpu, mask)
@@ -666,9 +667,145 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 
 	/* this always happens on GIC0 */
 	writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 #endif
 
+#ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_send_sgi - send a SGI directly to given CPU interface number
+ *
+ * cpu_id: the ID for the destination CPU interface
+ * irq: the IPI number to send a SGI for
+ */
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq)
+{
+	BUG_ON(cpu_id >= NR_GIC_CPU_IF);
+	cpu_id = 1 << cpu_id;
+	/* this always happens on GIC0 */
+	writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+}
+
+/*
+ * gic_get_cpu_id - get the CPU interface ID for the specified CPU
+ *
+ * @cpu: the logical CPU number to get the GIC ID for.
+ *
+ * Return the CPU interface ID for the given logical CPU number,
+ * or -1 if the CPU number is too large or the interface ID is
+ * unknown (more than one bit set).
+ */
+int gic_get_cpu_id(unsigned int cpu)
+{
+	unsigned int cpu_bit;
+
+	if (cpu >= NR_GIC_CPU_IF)
+		return -1;
+	cpu_bit = gic_cpu_map[cpu];
+	if (cpu_bit & (cpu_bit - 1))
+	       return -1;
+	return __ffs(cpu_bit);
+}
+
+/*
+ * gic_migrate_target - migrate IRQs to another PU interface
+ *
+ * @new_cpu_id: the CPU target ID to migrate IRQs to
+ *
+ * Migrate all peripheral interrupts with a target matching the current CPU
+ * to the interface corresponding to @new_cpu_id.  The CPU interface mapping
+ * is also updated.  Targets to other CPU interfaces are unchanged.
+ * This must be called with IRQs locally disabled.
+ */
+void gic_migrate_target(unsigned int new_cpu_id)
+{
+	unsigned int old_cpu_id, gic_irqs, gic_nr = 0;
+	void __iomem *dist_base;
+	int i, ror_val, cpu = smp_processor_id();
+	u32 val, old_mask, active_mask;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data_dist_base(&gic_data[gic_nr]);
+	if (!dist_base)
+		return;
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+
+	old_cpu_id = __ffs(gic_cpu_map[cpu]);
+	old_mask = 0x01010101 << old_cpu_id;
+	ror_val = (old_cpu_id - new_cpu_id) & 31;
+
+	raw_spin_lock(&irq_controller_lock);
+
+	gic_cpu_map[cpu] = 1 << new_cpu_id;
+
+	for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) {
+		val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+		active_mask = val & old_mask;
+		if (active_mask) {
+			val &= ~active_mask;
+			val |= ror32(active_mask, ror_val);
+			writel_relaxed(val, dist_base + GIC_DIST_TARGET + i * 4);
+		}
+	}
+
+	raw_spin_unlock(&irq_controller_lock);
+
+	/*
+	 * Now let's migrate and clear any potential SGIs that might be
+	 * pending for us (old_cpu_id).  Since GIC_DIST_SGI_PENDING_SET
+	 * is a banked register, we can only forward the SGI using
+	 * GIC_DIST_SOFTINT.  The original SGI source is lost but Linux
+	 * doesn't use that information anyway.
+	 *
+	 * For the same reason we do not adjust SGI source information
+	 * for previously sent SGIs by us to other CPUs either.
+	 */
+	for (i = 0; i < 16; i += 4) {
+		int j;
+		val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i);
+		if (!val)
+			continue;
+		writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i);
+		for (j = i; j < i + 4; j++) {
+			if (val & 0xff)
+				writel_relaxed((1 << (new_cpu_id + 16)) | j,
+						dist_base + GIC_DIST_SOFTINT);
+			val >>= 8;
+		}
+	}
+}
+
+/*
+ * gic_get_sgir_physaddr - get the physical address for the SGI register
+ *
+ * REturn the physical address of the SGI register to be used
+ * by some early assembly code when the kernel is not yet available.
+ */
+static unsigned long gic_dist_physaddr;
+
+unsigned long gic_get_sgir_physaddr(void)
+{
+	if (!gic_dist_physaddr)
+		return 0;
+	return gic_dist_physaddr + GIC_DIST_SOFTINT;
+}
+
+void __init gic_init_physaddr(struct device_node *node)
+{
+	struct resource res;
+	if (of_address_to_resource(node, 0, &res) == 0) {
+		gic_dist_physaddr = res.start;
+		pr_info("GIC physical location is %#lx\n", gic_dist_physaddr);
+	}
+}
+
+#else
+#define gic_init_physaddr(node)  do { } while(0)
+#endif
+
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 				irq_hw_number_t hw)
 {
@@ -850,6 +987,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent)
 		percpu_offset = 0;
 
 	gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node);
+	if (!gic_cnt)
+		gic_init_physaddr(node);
 
 	if (parent) {
 		irq = irq_of_parse_and_map(node, 0);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index b5696108c06e..40643ca79cd9 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -31,6 +31,8 @@
 #define GIC_DIST_TARGET			0x800
 #define GIC_DIST_CONFIG			0xc00
 #define GIC_DIST_SOFTINT		0xf00
+#define GIC_DIST_SGI_PENDING_CLEAR	0xf10
+#define GIC_DIST_SGI_PENDING_SET	0xf20
 
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
@@ -75,6 +77,11 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
+int gic_get_cpu_id(unsigned int cpu);
+void gic_migrate_target(unsigned int new_cpu_id);
+unsigned long gic_get_sgir_physaddr(void);
+
 #endif /* __ASSEMBLY */
 
 #endif
diff --git a/include/trace/events/power_cpu_migrate.h b/include/trace/events/power_cpu_migrate.h
new file mode 100644
index 000000000000..f76dd4de625e
--- /dev/null
+++ b/include/trace/events/power_cpu_migrate.h
@@ -0,0 +1,67 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM power
+
+#if !defined(_TRACE_POWER_CPU_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWER_CPU_MIGRATE_H
+
+#include <linux/tracepoint.h>
+
+#define __cpu_migrate_proto			\
+	TP_PROTO(u64 timestamp,			\
+		 u32 cpu_hwid)
+#define __cpu_migrate_args			\
+	TP_ARGS(timestamp,			\
+		cpu_hwid)
+
+DECLARE_EVENT_CLASS(cpu_migrate,
+
+	__cpu_migrate_proto,
+	__cpu_migrate_args,
+
+	TP_STRUCT__entry(
+		__field(u64,	timestamp		)
+		__field(u32,	cpu_hwid		)
+	),
+
+	TP_fast_assign(
+		__entry->timestamp = timestamp;
+		__entry->cpu_hwid = cpu_hwid;
+	),
+
+	TP_printk("timestamp=%llu cpu_hwid=0x%08lX",
+		(unsigned long long)__entry->timestamp,
+		(unsigned long)__entry->cpu_hwid
+	)
+);
+
+#define __define_cpu_migrate_event(name)		\
+	DEFINE_EVENT(cpu_migrate, cpu_migrate_##name,	\
+		__cpu_migrate_proto,			\
+		__cpu_migrate_args			\
+	)
+
+__define_cpu_migrate_event(begin);
+__define_cpu_migrate_event(finish);
+__define_cpu_migrate_event(current);
+
+#undef __define_cpu_migrate
+#undef __cpu_migrate_proto
+#undef __cpu_migrate_args
+
+/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
+#ifndef _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING
+#define _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING
+
+/*
+ * Set from_phys_cpu and to_phys_cpu to CPU_MIGRATE_ALL_CPUS to indicate
+ * a whole-cluster migration:
+ */
+#define CPU_MIGRATE_ALL_CPUS 0x80000000U
+#endif
+
+#endif /* _TRACE_POWER_CPU_MIGRATE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE power_cpu_migrate
+#include <trace/define_trace.h>
author	Mark Brown <broonie@sirena.org.uk>	2013-06-14 12:08:31 +0100
committer	Mark Brown <broonie@sirena.org.uk>	2013-06-14 12:08:31 +0100
commit	d3c2ad71e5f3fcffa55ba9eeb8121b3f179ae249 (patch)
tree	27229267b3816cb00660fa5a7c57604de8c2a3e5
parent	bdc04bac744054924dd5017e17fbcfdd87fda292 (diff)
parent	2be12549c900303c418fcbb7044979e190fd4e7e (diff)