summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJon Medhurst <tixy@linaro.org>2013-06-05 18:35:30 +0100
committerJon Medhurst <tixy@linaro.org>2013-06-05 18:35:30 +0100
commitd3a081b36adda538a205c9a5c7b5a0e5fb675f1a (patch)
treee74f2110baf000b95665c5afb4fcb5867ffab0ab
parentc9e0e6097b3c9ddb10f9dc02169b872f369e9100 (diff)
parentbc3ea4a1aa2e2e15ce54597359d2f055e49be411 (diff)
Merge branch 'IKS-rebase' into integration-linaro-vexpresstracking-integration-linaro-vexpress-ll-20130605.0
-rw-r--r--arch/arm/Kconfig18
-rw-r--r--arch/arm/common/Makefile2
-rw-r--r--arch/arm/common/bL_switcher.c826
-rw-r--r--arch/arm/common/bL_switcher_dummy_if.c71
-rw-r--r--arch/arm/common/mcpm_entry.c12
-rw-r--r--arch/arm/common/mcpm_head.S16
-rw-r--r--arch/arm/include/asm/bL_switcher.h77
-rw-r--r--arch/arm/include/asm/hardirq.h2
-rw-r--r--arch/arm/include/asm/mcpm.h8
-rw-r--r--arch/arm/include/asm/pmu.h27
-rw-r--r--arch/arm/include/asm/smp.h2
-rw-r--r--arch/arm/kernel/perf_event.c18
-rw-r--r--arch/arm/kernel/perf_event_cpu.c335
-rw-r--r--arch/arm/kernel/perf_event_v6.c12
-rw-r--r--arch/arm/kernel/perf_event_v7.c466
-rw-r--r--arch/arm/kernel/perf_event_xscale.c20
-rw-r--r--arch/arm/kernel/smp.c21
-rw-r--r--drivers/bus/arm-cci.c13
-rw-r--r--drivers/clk/versatile/clk-vexpress-spc.c2
-rw-r--r--drivers/cpufreq/arm_big_little.c411
-rw-r--r--drivers/cpufreq/arm_big_little.h17
-rw-r--r--drivers/cpufreq/cpufreq_stats.c49
-rw-r--r--drivers/irqchip/irq-gic.c145
-rw-r--r--include/linux/irqchip/arm-gic.h7
-rw-r--r--include/trace/events/power_cpu_migrate.h67
-rw-r--r--linaro/configs/big-LITTLE-IKS.conf5
26 files changed, 2377 insertions, 272 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e08f4811aa3b..06bc7a222955 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1630,6 +1630,24 @@ config BIG_LITTLE
help
This option enables support for the big.LITTLE architecture.
+config BL_SWITCHER
+ bool "big.LITTLE switcher support"
+ depends on BIG_LITTLE && MCPM && HOTPLUG_CPU
+ select CPU_PM
+ select ARM_CPU_SUSPEND
+ help
+ The big.LITTLE "switcher" provides the core functionality to
+ transparently handle transition between a cluster of A15's
+ and a cluster of A7's in a big.LITTLE system.
+
+config BL_SWITCHER_DUMMY_IF
+ tristate "Simple big.LITTLE switcher user interface"
+ depends on BL_SWITCHER && DEBUG_KERNEL
+ help
+ This is a simple and dummy char dev interface to control
+ the big.LITTLE switcher core code. It is meant for
+ debugging purposes only.
+
choice
prompt "Memory split"
default VMSPLIT_3G
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 53dc0c820ff2..cfb69286952e 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -19,3 +19,5 @@ obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
AFLAGS_mcpm_head.o := -march=armv7-a
AFLAGS_vlock.o := -march=armv7-a
CFLAGS_REMOVE_mcpm_entry.o = -pg
+obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o
+obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
new file mode 100644
index 000000000000..be46c594f957
--- /dev/null
+++ b/arch/arm/common/bL_switcher.c
@@ -0,0 +1,826 @@
+/*
+ * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
+ *
+ * Created by: Nicolas Pitre, March 2012
+ * Copyright: (C) 2012 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/time.h>
+#include <linux/clockchips.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/moduleparam.h>
+
+#include <asm/smp_plat.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/suspend.h>
+#include <asm/mcpm.h>
+#include <asm/bL_switcher.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/power_cpu_migrate.h>
+
+
+/*
+ * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
+ * __attribute_const__ and we don't want the compiler to assume any
+ * constness here.
+ */
+
+static int read_mpidr(void)
+{
+ unsigned int id;
+ asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));
+ return id & MPIDR_HWID_BITMASK;
+}
+
+/*
+ * Get a global nanosecond time stamp for tracing.
+ */
+static s64 get_ns(void)
+{
+ struct timespec ts;
+ getnstimeofday(&ts);
+ return timespec_to_ns(&ts);
+}
+
+/*
+ * bL switcher core code.
+ */
+
+static void bL_do_switch(void *_arg)
+{
+ unsigned ib_mpidr, ib_cpu, ib_cluster;
+ long volatile handshake, **handshake_ptr = _arg;
+
+ pr_debug("%s\n", __func__);
+
+ ib_mpidr = cpu_logical_map(smp_processor_id());
+ ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+ ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+ /* Advertise our handshake location */
+ if (handshake_ptr) {
+ handshake = 0;
+ *handshake_ptr = &handshake;
+ } else
+ handshake = -1;
+
+ /*
+ * Our state has been saved at this point. Let's release our
+ * inbound CPU.
+ */
+ mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume);
+ sev();
+
+ /*
+ * From this point, we must assume that our counterpart CPU might
+ * have taken over in its parallel world already, as if execution
+ * just returned from cpu_suspend(). It is therefore important to
+ * be very careful not to make any change the other guy is not
+ * expecting. This is why we need stack isolation.
+ *
+ * Fancy under cover tasks could be performed here. For now
+ * we have none.
+ */
+
+ /*
+ * Let's wait until our inbound is alive.
+ */
+ while (!handshake) {
+ wfe();
+ smp_mb();
+ }
+
+ /* Let's put ourself down. */
+ mcpm_cpu_power_down();
+
+ /* should never get here */
+ BUG();
+}
+
+/*
+ * Stack isolation. To ensure 'current' remains valid, we just use another
+ * piece of our thread's stack space which should be fairly lightly used.
+ * The selected area starts just above the thread_info structure located
+ * at the very bottom of the stack, aligned to a cache line, and indexed
+ * with the cluster number.
+ */
+#define STACK_SIZE 512
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+static int bL_switchpoint(unsigned long _arg)
+{
+ unsigned int mpidr = read_mpidr();
+ unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ void *stack = current_thread_info() + 1;
+ stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
+ stack += clusterid * STACK_SIZE + STACK_SIZE;
+ call_with_stack(bL_do_switch, (void *)_arg, stack);
+ BUG();
+}
+
+/*
+ * Generic switcher interface
+ */
+
+static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
+static int bL_switcher_cpu_pairing[NR_CPUS];
+
+/*
+ * bL_switch_to - Switch to a specific cluster for the current CPU
+ * @new_cluster_id: the ID of the cluster to switch to.
+ *
+ * This function must be called on the CPU to be switched.
+ * Returns 0 on success, else a negative status code.
+ */
+static int bL_switch_to(unsigned int new_cluster_id)
+{
+ unsigned int mpidr, this_cpu, that_cpu;
+ unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
+ struct completion inbound_alive;
+ struct tick_device *tdev;
+ enum clock_event_mode tdev_mode;
+ long volatile *handshake_ptr;
+ int ipi_nr, ret;
+
+ this_cpu = smp_processor_id();
+ ob_mpidr = read_mpidr();
+ ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0);
+ ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1);
+ BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr);
+
+ if (new_cluster_id == ob_cluster)
+ return 0;
+
+ that_cpu = bL_switcher_cpu_pairing[this_cpu];
+ ib_mpidr = cpu_logical_map(that_cpu);
+ ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+ ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+ pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n",
+ this_cpu, ob_mpidr, ib_mpidr);
+
+ /* Close the gate for our entry vectors */
+ mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL);
+ mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL);
+
+ /* Install our "inbound alive" notifier. */
+ init_completion(&inbound_alive);
+ ipi_nr = register_ipi_completion(&inbound_alive, this_cpu);
+ ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]);
+ mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr);
+
+ /*
+ * Let's wake up the inbound CPU now in case it requires some delay
+ * to come online, but leave it gated in our entry vector code.
+ */
+ ret = mcpm_cpu_power_up(ib_cpu, ib_cluster);
+ if (ret) {
+ pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
+ return ret;
+ }
+
+ /*
+ * Raise a SGI on the inbound CPU to make sure it doesn't stall
+ * in a possible WFI, such as in bL_power_down().
+ */
+ gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0);
+
+ /*
+ * Wait for the inbound to come up. This allows for other
+ * tasks to be scheduled in the mean time.
+ */
+ wait_for_completion(&inbound_alive);
+ mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0);
+
+ /*
+ * From this point we are entering the switch critical zone
+ * and can't sleep/schedule anymore.
+ */
+ local_irq_disable();
+ local_fiq_disable();
+ trace_cpu_migrate_begin(get_ns(), ob_mpidr);
+
+ /* redirect GIC's SGIs to our counterpart */
+ gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
+
+ tdev = tick_get_device(this_cpu);
+ if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
+ tdev = NULL;
+ if (tdev) {
+ tdev_mode = tdev->evtdev->mode;
+ clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+ }
+
+ ret = cpu_pm_enter();
+
+ /* we can not tolerate errors at this point */
+ if (ret)
+ panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
+
+ /*
+ * Swap the physical CPUs in the logical map for this logical CPU.
+ * This must be flushed to RAM as the resume code
+ * needs to access it while the caches are still disabled.
+ */
+ cpu_logical_map(this_cpu) = ib_mpidr;
+ cpu_logical_map(that_cpu) = ob_mpidr;
+ sync_cache_w(&cpu_logical_map(this_cpu));
+
+ /* Let's do the actual CPU switch. */
+ ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint);
+ if (ret > 0)
+ panic("%s: cpu_suspend() returned %d\n", __func__, ret);
+
+ /* We are executing on the inbound CPU at this point */
+ mpidr = read_mpidr();
+ pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr);
+ BUG_ON(mpidr != ib_mpidr);
+
+ mcpm_cpu_powered_up();
+
+ ret = cpu_pm_exit();
+
+ if (tdev) {
+ clockevents_set_mode(tdev->evtdev, tdev_mode);
+ clockevents_program_event(tdev->evtdev,
+ tdev->evtdev->next_event, 1);
+ }
+
+ trace_cpu_migrate_finish(get_ns(), ib_mpidr);
+ local_fiq_enable();
+ local_irq_enable();
+
+ *handshake_ptr = 1;
+ dsb_sev();
+
+ if (ret)
+ pr_err("%s exiting with error %d\n", __func__, ret);
+ return ret;
+}
+
+struct bL_thread {
+ spinlock_t lock;
+ struct task_struct *task;
+ wait_queue_head_t wq;
+ int wanted_cluster;
+ struct completion started;
+ bL_switch_completion_handler completer;
+ void *completer_cookie;
+};
+
+static struct bL_thread bL_threads[NR_CPUS];
+
+static int bL_switcher_thread(void *arg)
+{
+ struct bL_thread *t = arg;
+ struct sched_param param = { .sched_priority = 1 };
+ int cluster;
+ bL_switch_completion_handler completer;
+ void *completer_cookie;
+
+ sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+ complete(&t->started);
+
+ do {
+ if (signal_pending(current))
+ flush_signals(current);
+ wait_event_interruptible(t->wq,
+ t->wanted_cluster != -1 ||
+ kthread_should_stop());
+
+ spin_lock(&t->lock);
+ cluster = t->wanted_cluster;
+ completer = t->completer;
+ completer_cookie = t->completer_cookie;
+ t->wanted_cluster = -1;
+ t->completer = NULL;
+ spin_unlock(&t->lock);
+
+ if (cluster != -1) {
+ bL_switch_to(cluster);
+
+ if (completer)
+ completer(completer_cookie);
+ }
+ } while (!kthread_should_stop());
+
+ return 0;
+}
+
+static struct task_struct * bL_switcher_thread_create(int cpu, void *arg)
+{
+ struct task_struct *task;
+
+ task = kthread_create_on_node(bL_switcher_thread, arg,
+ cpu_to_node(cpu), "kswitcher_%d", cpu);
+ if (!IS_ERR(task)) {
+ kthread_bind(task, cpu);
+ wake_up_process(task);
+ } else
+ pr_err("%s failed for CPU %d\n", __func__, cpu);
+ return task;
+}
+
+/*
+ * bL_switch_request_cb - Switch to a specific cluster for the given CPU,
+ * with completion notification via a callback
+ *
+ * @cpu: the CPU to switch
+ * @new_cluster_id: the ID of the cluster to switch to.
+ * @completer: switch completion callback. if non-NULL,
+ * @completer(@completer_cookie) will be called on completion of
+ * the switch, in non-atomic context.
+ * @completer_cookie: opaque context argument for @completer.
+ *
+ * This function causes a cluster switch on the given CPU by waking up
+ * the appropriate switcher thread. This function may or may not return
+ * before the switch has occurred.
+ *
+ * If a @completer callback function is supplied, it will be called when
+ * the switch is complete. This can be used to determine asynchronously
+ * when the switch is complete, regardless of when bL_switch_request()
+ * returns. When @completer is supplied, no new switch request is permitted
+ * for the affected CPU until after the switch is complete, and @completer
+ * has returned.
+ */
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+ bL_switch_completion_handler completer,
+ void *completer_cookie)
+{
+ struct bL_thread *t;
+
+ if (cpu >= ARRAY_SIZE(bL_threads)) {
+ pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
+ return -EINVAL;
+ }
+
+ t = &bL_threads[cpu];
+
+ if (IS_ERR(t->task))
+ return PTR_ERR(t->task);
+ if (!t->task)
+ return -ESRCH;
+
+ spin_lock(&t->lock);
+ if (t->completer) {
+ spin_unlock(&t->lock);
+ return -EBUSY;
+ }
+ t->completer = completer;
+ t->completer_cookie = completer_cookie;
+ t->wanted_cluster = new_cluster_id;
+ spin_unlock(&t->lock);
+ wake_up(&t->wq);
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(bL_switch_request_cb);
+
+/*
+ * Activation and configuration code.
+ */
+
+static DEFINE_MUTEX(bL_switcher_activation_lock);
+static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier);
+static unsigned int bL_switcher_active;
+static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS];
+static cpumask_t bL_switcher_removed_logical_cpus;
+
+int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_register_notifier);
+
+int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier);
+
+static int bL_activation_notify(unsigned long val)
+{
+ int ret;
+
+ ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL);
+ if (ret & NOTIFY_STOP_MASK)
+ pr_err("%s: notifier chain failed with status 0x%x\n",
+ __func__, ret);
+ return notifier_to_errno(ret);
+}
+
+static void bL_switcher_restore_cpus(void)
+{
+ int i;
+
+ for_each_cpu(i, &bL_switcher_removed_logical_cpus)
+ cpu_up(i);
+}
+
+static int bL_switcher_halve_cpus(void)
+{
+ int i, j, cluster_0, gic_id, ret;
+ unsigned int cpu, cluster, mask;
+ cpumask_t available_cpus;
+
+ /* First pass to validate what we have */
+ mask = 0;
+ for_each_online_cpu(i) {
+ cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+ if (cluster >= 2) {
+ pr_err("%s: only dual cluster systems are supported\n", __func__);
+ return -EINVAL;
+ }
+ if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER))
+ return -EINVAL;
+ mask |= (1 << cluster);
+ }
+ if (mask != 3) {
+ pr_err("%s: no CPU pairing possible\n", __func__);
+ return -EINVAL;
+ }
+
+ /*
+ * Now let's do the pairing. We match each CPU with another CPU
+ * from a different cluster. To get a uniform scheduling behavior
+ * without fiddling with CPU topology and compute capacity data,
+ * we'll use logical CPUs initially belonging to the same cluster.
+ */
+ memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing));
+ cpumask_copy(&available_cpus, cpu_online_mask);
+ cluster_0 = -1;
+ for_each_cpu(i, &available_cpus) {
+ int match = -1;
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+ if (cluster_0 == -1)
+ cluster_0 = cluster;
+ if (cluster != cluster_0)
+ continue;
+ cpumask_clear_cpu(i, &available_cpus);
+ for_each_cpu(j, &available_cpus) {
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1);
+ /*
+ * Let's remember the last match to create "odd"
+ * pairing on purpose in order for other code not
+ * to assume any relation between physical and
+ * logical CPU numbers.
+ */
+ if (cluster != cluster_0)
+ match = j;
+ }
+ if (match != -1) {
+ bL_switcher_cpu_pairing[i] = match;
+ cpumask_clear_cpu(match, &available_cpus);
+ pr_info("CPU%d paired with CPU%d\n", i, match);
+ }
+ }
+
+ /*
+ * Now we disable the unwanted CPUs i.e. everything that has no
+ * pairing information (that includes the pairing counterparts).
+ */
+ cpumask_clear(&bL_switcher_removed_logical_cpus);
+ for_each_online_cpu(i) {
+ cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+
+ /* Let's take note of the GIC ID for this CPU */
+ gic_id = gic_get_cpu_id(i);
+ if (gic_id < 0) {
+ pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
+ bL_switcher_restore_cpus();
+ return -EINVAL;
+ }
+ bL_gic_id[cpu][cluster] = gic_id;
+ pr_info("GIC ID for CPU %u cluster %u is %u\n",
+ cpu, cluster, gic_id);
+
+ if (bL_switcher_cpu_pairing[i] != -1) {
+ bL_switcher_cpu_original_cluster[i] = cluster;
+ continue;
+ }
+
+ ret = cpu_down(i);
+ if (ret) {
+ bL_switcher_restore_cpus();
+ return ret;
+ }
+ cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus);
+ }
+
+ return 0;
+}
+
+/* Determine the logical CPU a given physical CPU is grouped on. */
+int bL_switcher_get_logical_index(u32 mpidr)
+{
+ int cpu;
+
+ if (!bL_switcher_active)
+ return -EUNATCH;
+
+ mpidr &= MPIDR_HWID_BITMASK;
+ for_each_online_cpu(cpu) {
+ int pairing = bL_switcher_cpu_pairing[cpu];
+ if (pairing == -1)
+ continue;
+ if ((mpidr == cpu_logical_map(cpu)) ||
+ (mpidr == cpu_logical_map(pairing)))
+ return cpu;
+ }
+ return -EINVAL;
+}
+
+static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
+{
+ trace_cpu_migrate_current(get_ns(), read_mpidr());
+}
+
+int bL_switcher_trace_trigger(void)
+{
+ int ret;
+
+ preempt_disable();
+
+ bL_switcher_trace_trigger_cpu(NULL);
+ ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
+
+ preempt_enable();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
+
+static int bL_switcher_enable(void)
+{
+ int cpu, ret;
+
+ mutex_lock(&bL_switcher_activation_lock);
+ cpu_hotplug_driver_lock();
+ if (bL_switcher_active) {
+ cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
+ return 0;
+ }
+
+ pr_info("big.LITTLE switcher initializing\n");
+
+ ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE);
+ if (ret)
+ goto error;
+
+ ret = bL_switcher_halve_cpus();
+ if (ret)
+ goto error;
+
+ bL_switcher_trace_trigger();
+
+ for_each_online_cpu(cpu) {
+ struct bL_thread *t = &bL_threads[cpu];
+ spin_lock_init(&t->lock);
+ init_waitqueue_head(&t->wq);
+ init_completion(&t->started);
+ t->wanted_cluster = -1;
+ t->task = bL_switcher_thread_create(cpu, t);
+ }
+
+ bL_switcher_active = 1;
+ bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+ pr_info("big.LITTLE switcher initialized\n");
+ goto out;
+
+error:
+ pr_warning("big.LITTLE switcher initialization failed\n");
+ bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+ cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
+ return ret;
+}
+
+#ifdef CONFIG_SYSFS
+
+static void bL_switcher_disable(void)
+{
+ unsigned int cpu, cluster;
+ struct bL_thread *t;
+ struct task_struct *task;
+
+ mutex_lock(&bL_switcher_activation_lock);
+ cpu_hotplug_driver_lock();
+
+ if (!bL_switcher_active)
+ goto out;
+
+ if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) {
+ bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+ goto out;
+ }
+
+ bL_switcher_active = 0;
+
+ /*
+ * To deactivate the switcher, we must shut down the switcher
+ * threads to prevent any other requests from being accepted.
+ * Then, if the final cluster for given logical CPU is not the
+ * same as the original one, we'll recreate a switcher thread
+ * just for the purpose of switching the CPU back without any
+ * possibility for interference from external requests.
+ */
+ for_each_online_cpu(cpu) {
+ t = &bL_threads[cpu];
+ task = t->task;
+ t->task = NULL;
+ if (!task || IS_ERR(task))
+ continue;
+ kthread_stop(task);
+ /* no more switch may happen on this CPU at this point */
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+ if (cluster == bL_switcher_cpu_original_cluster[cpu])
+ continue;
+ init_completion(&t->started);
+ t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu];
+ task = bL_switcher_thread_create(cpu, t);
+ if (!IS_ERR(task)) {
+ wait_for_completion(&t->started);
+ kthread_stop(task);
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+ if (cluster == bL_switcher_cpu_original_cluster[cpu])
+ continue;
+ }
+ /* If execution gets here, we're in trouble. */
+ pr_crit("%s: unable to restore original cluster for CPU %d\n",
+ __func__, cpu);
+ pr_crit("%s: CPU %d can't be restored\n",
+ __func__, bL_switcher_cpu_pairing[cpu]);
+ cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu],
+ &bL_switcher_removed_logical_cpus);
+ }
+
+ bL_switcher_restore_cpus();
+ bL_switcher_trace_trigger();
+
+ bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+ cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
+}
+
+static ssize_t bL_switcher_active_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", bL_switcher_active);
+}
+
+static ssize_t bL_switcher_active_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ int ret;
+
+ switch (buf[0]) {
+ case '0':
+ bL_switcher_disable();
+ ret = 0;
+ break;
+ case '1':
+ ret = bL_switcher_enable();
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return (ret >= 0) ? count : ret;
+}
+
+static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ int ret = bL_switcher_trace_trigger();
+
+ return ret ? ret : count;
+}
+
+static struct kobj_attribute bL_switcher_active_attr =
+ __ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store);
+
+static struct kobj_attribute bL_switcher_trace_trigger_attr =
+ __ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store);
+
+static struct attribute *bL_switcher_attrs[] = {
+ &bL_switcher_active_attr.attr,
+ &bL_switcher_trace_trigger_attr.attr,
+ NULL,
+};
+
+static struct attribute_group bL_switcher_attr_group = {
+ .attrs = bL_switcher_attrs,
+};
+
+static struct kobject *bL_switcher_kobj;
+
+static int __init bL_switcher_sysfs_init(void)
+{
+ int ret;
+
+ bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj);
+ if (!bL_switcher_kobj)
+ return -ENOMEM;
+ ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group);
+ if (ret)
+ kobject_put(bL_switcher_kobj);
+ return ret;
+}
+
+#endif /* CONFIG_SYSFS */
+
+bool bL_switcher_get_enabled(void)
+{
+ mutex_lock(&bL_switcher_activation_lock);
+
+ return bL_switcher_active;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_get_enabled);
+
+void bL_switcher_put_enabled(void)
+{
+ mutex_unlock(&bL_switcher_activation_lock);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_put_enabled);
+
+/*
+ * Veto any CPU hotplug operation while the switcher is active.
+ * We're just not ready to deal with that given the trickery involved.
+ */
+static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_DOWN_PREPARE:
+ if (bL_switcher_active)
+ return NOTIFY_BAD;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block bL_switcher_hotplug_notifier =
+ { &bL_switcher_hotplug_callback, NULL, 0 };
+
+static bool no_bL_switcher = true;
+core_param(no_bL_switcher, no_bL_switcher, bool, 0644);
+
+static int __init bL_switcher_init(void)
+{
+ int ret;
+
+ if (MAX_NR_CLUSTERS != 2) {
+ pr_err("%s: only dual cluster systems are supported\n", __func__);
+ return -EINVAL;
+ }
+
+ register_cpu_notifier(&bL_switcher_hotplug_notifier);
+
+ if (!no_bL_switcher) {
+ ret = bL_switcher_enable();
+ if (ret)
+ return ret;
+ }
+
+#ifdef CONFIG_SYSFS
+ ret = bL_switcher_sysfs_init();
+ if (ret)
+ pr_err("%s: unable to create sysfs entry\n", __func__);
+#endif
+
+ return 0;
+}
+
+late_initcall(bL_switcher_init);
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
new file mode 100644
index 000000000000..5e2dd197e728
--- /dev/null
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface
+ *
+ * Created by: Nicolas Pitre, November 2012
+ * Copyright: (C) 2012 Linaro Limited
+ *
+ * Dummy interface to user space for debugging purpose only.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <asm/uaccess.h>
+#include <asm/bL_switcher.h>
+
+static ssize_t bL_switcher_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ unsigned char val[3];
+ unsigned int cpu, cluster;
+ int ret;
+
+ pr_debug("%s\n", __func__);
+
+ if (len < 3)
+ return -EINVAL;
+
+ if (copy_from_user(val, buf, 3))
+ return -EFAULT;
+
+ /* format: <cpu#>,<cluster#> */
+ if (val[0] < '0' || val[0] > '4' ||
+ val[1] != ',' ||
+ val[2] < '0' || val[2] > '1')
+ return -EINVAL;
+
+ cpu = val[0] - '0';
+ cluster = val[2] - '0';
+ ret = bL_switch_request(cpu, cluster);
+
+ return ret ? : len;
+}
+
+static const struct file_operations bL_switcher_fops = {
+ .write = bL_switcher_write,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice bL_switcher_device = {
+ MISC_DYNAMIC_MINOR,
+ "b.L_switcher",
+ &bL_switcher_fops
+};
+
+static int __init bL_switcher_dummy_if_init(void)
+{
+ return misc_register(&bL_switcher_device);
+}
+
+static void __exit bL_switcher_dummy_if_exit(void)
+{
+ misc_deregister(&bL_switcher_device);
+}
+
+module_init(bL_switcher_dummy_if_init);
+module_exit(bL_switcher_dummy_if_exit);
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 370236dd1a03..4a2b32fd53a1 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
}
+extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2];
+
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+ unsigned long poke_phys_addr, unsigned long poke_val)
+{
+ unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0];
+ poke[0] = poke_phys_addr;
+ poke[1] = poke_val;
+ __cpuc_flush_dcache_area((void *)poke, 8);
+ outer_clean_range(__pa(poke), __pa(poke + 2));
+}
+
static const struct mcpm_platform_ops *platform_ops;
int __init mcpm_platform_register(const struct mcpm_platform_ops *ops)
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index 8178705c4b24..057e9c5a9e1f 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -71,12 +71,19 @@ ENTRY(mcpm_entry_point)
* position independent way.
*/
adr r5, 3f
- ldmia r5, {r6, r7, r8, r11}
+ ldmia r5, {r0, r6, r7, r8, r11}
+ add r0, r5, r0 @ r0 = mcpm_entry_early_pokes
add r6, r5, r6 @ r6 = mcpm_entry_vectors
ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys
add r8, r5, r8 @ r8 = mcpm_sync
add r11, r5, r11 @ r11 = first_man_locks
+ @ Perform an early poke, if any
+ add r0, r0, r4, lsl #3
+ ldmia r0, {r0, r1}
+ teq r0, #0
+ strne r1, [r0]
+
mov r0, #MCPM_SYNC_CLUSTER_SIZE
mla r8, r0, r10, r8 @ r8 = sync cluster base
@@ -195,7 +202,8 @@ mcpm_entry_gated:
.align 2
-3: .word mcpm_entry_vectors - .
+3: .word mcpm_entry_early_pokes - .
+ .word mcpm_entry_vectors - 3b
.word mcpm_power_up_setup_phys - 3b
.word mcpm_sync - 3b
.word first_man_locks - 3b
@@ -214,6 +222,10 @@ first_man_locks:
ENTRY(mcpm_entry_vectors)
.space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+ .type mcpm_entry_early_pokes, #object
+ENTRY(mcpm_entry_early_pokes)
+ .space 8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+
.type mcpm_power_up_setup_phys, #object
ENTRY(mcpm_power_up_setup_phys)
.space 4 @ set by mcpm_sync_init()
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
new file mode 100644
index 000000000000..87ebcbc8e3cb
--- /dev/null
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -0,0 +1,77 @@
+/*
+ * arch/arm/include/asm/bL_switcher.h
+ *
+ * Created by: Nicolas Pitre, April 2012
+ * Copyright: (C) 2012 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_BL_SWITCHER_H
+#define ASM_BL_SWITCHER_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+typedef void (*bL_switch_completion_handler)(void *cookie);
+
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+ bL_switch_completion_handler completer,
+ void *completer_cookie);
+static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
+{
+ return bL_switch_request_cb(cpu, new_cluster_id, NULL, NULL);
+}
+
+/*
+ * Register here to be notified about runtime enabling/disabling of
+ * the switcher.
+ *
+ * The notifier chain is called with the switcher activation lock held:
+ * the switcher will not be enabled or disabled during callbacks.
+ * Callbacks must not call bL_switcher_{get,put}_enabled().
+ */
+#define BL_NOTIFY_PRE_ENABLE 0
+#define BL_NOTIFY_POST_ENABLE 1
+#define BL_NOTIFY_PRE_DISABLE 2
+#define BL_NOTIFY_POST_DISABLE 3
+
+#ifdef CONFIG_BL_SWITCHER
+
+int bL_switcher_register_notifier(struct notifier_block *nb);
+int bL_switcher_unregister_notifier(struct notifier_block *nb);
+
+/*
+ * Use these functions to temporarily prevent enabling/disabling of
+ * the switcher.
+ * bL_switcher_get_enabled() returns true if the switcher is currently
+ * enabled. Each call to bL_switcher_get_enabled() must be followed
+ * by a call to bL_switcher_put_enabled(). These functions are not
+ * recursive.
+ */
+bool bL_switcher_get_enabled(void);
+void bL_switcher_put_enabled(void);
+
+int bL_switcher_trace_trigger(void);
+int bL_switcher_get_logical_index(u32 mpidr);
+
+#else
+static inline int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline bool bL_switcher_get_enabled(void) { return false; }
+static inline void bL_switcher_put_enabled(void) { }
+static inline int bL_switcher_trace_trigger(void) { return 0; }
+static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; }
+#endif /* CONFIG_BL_SWITCHER */
+
+#endif
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index 3d7351c844aa..fe3ea776dc34 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -5,7 +5,7 @@
#include <linux/threads.h>
#include <asm/irq.h>
-#define NR_IPI 7
+#define NR_IPI 8
typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 0f7b7620e9a5..7626a7fd4938 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -42,6 +42,14 @@ extern void mcpm_entry_point(void);
void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr);
/*
+ * This sets an early poke i.e a value to be poked into some address
+ * from very early assembly code before the CPU is ungated. The
+ * address must be physical, and if 0 then nothing will happen.
+ */
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+ unsigned long poke_phys_addr, unsigned long poke_val);
+
+/*
* CPU/cluster power operations API for higher subsystems to use.
*/
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 0cd7824ca762..a7eaad37497f 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -13,7 +13,9 @@
#define __ARM_PMU_H__
#include <linux/interrupt.h>
+#include <linux/percpu.h>
#include <linux/perf_event.h>
+#include <linux/types.h>
/*
* struct arm_pmu_platdata - ARM PMU platform data
@@ -71,6 +73,21 @@ struct cpupmu_regs {
u32 pmxevtcnt[8];
};
+struct arm_cpu_pmu {
+ bool valid;
+ bool active;
+
+ u32 mpidr;
+ int irq;
+
+ struct perf_event *hw_events[ARMPMU_MAX_HWEVENTS];
+ unsigned long used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
+ struct pmu_hw_events cpu_hw_events;
+ struct cpupmu_regs cpu_pmu_regs;
+
+ void *logical_state;
+};
+
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
@@ -93,16 +110,24 @@ struct arm_pmu {
int (*map_event)(struct perf_event *event);
void (*save_regs)(struct arm_pmu *, struct cpupmu_regs *);
void (*restore_regs)(struct arm_pmu *, struct cpupmu_regs *);
+ void (*cpu_init)(struct arm_pmu *, struct arm_cpu_pmu *);
int num_events;
atomic_t active_events;
struct mutex reserve_mutex;
u64 max_period;
struct platform_device *plat_device;
- struct pmu_hw_events *(*get_hw_events)(void);
+ struct pmu_hw_events *(*get_hw_events)(struct arm_pmu *);
+
+ struct list_head class_pmus_list;
+ struct arm_cpu_pmu __percpu *cpu_pmus;
};
#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
+#define for_each_pmu(pmu, head) list_for_each_entry(pmu, head, class_pmus_list)
+
+#define to_this_cpu_pmu(arm_pmu) this_cpu_ptr((arm_pmu)->cpu_pmus)
+
extern const struct dev_pm_ops armpmu_dev_pm_ops;
int armpmu_register(struct arm_pmu *armpmu, int type);
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index c5aa088c0a8b..bfab8757b9e5 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -83,6 +83,8 @@ extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
extern void smp_send_all_cpu_backtrace(void);
+extern int register_ipi_completion(struct completion *completion, int cpu);
+
struct smp_operations {
#ifdef CONFIG_SMP
/*
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index d847c622a7b5..a9da343b698c 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -205,7 +205,7 @@ static void
armpmu_del(struct perf_event *event, int flags)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+ struct pmu_hw_events *hw_events = armpmu->get_hw_events(armpmu);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -223,7 +223,7 @@ static int
armpmu_add(struct perf_event *event, int flags)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+ struct pmu_hw_events *hw_events = armpmu->get_hw_events(armpmu);
struct hw_perf_event *hwc = &event->hw;
int idx;
int err = 0;
@@ -467,8 +467,14 @@ static int armpmu_event_init(struct perf_event *event)
static void armpmu_enable(struct pmu *pmu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
- struct pmu_hw_events *hw_events = armpmu->get_hw_events();
- int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+ struct pmu_hw_events *hw_events = armpmu->get_hw_events(armpmu);
+ int enabled;
+
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
+
+ BUG_ON(!hw_events->used_mask); /* TEMPORARY */
+ enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
if (enabled)
armpmu->start(armpmu);
@@ -477,6 +483,10 @@ static void armpmu_enable(struct pmu *pmu)
static void armpmu_disable(struct pmu *pmu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
+
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
+
armpmu->stop(armpmu);
}
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 0b48a38e3cf4..b3ae24f6afab 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -19,26 +19,40 @@
#define pr_fmt(fmt) "CPU PMU: " fmt
#include <linux/bitmap.h>
+#include <linux/cpumask.h>
#include <linux/cpu_pm.h>
#include <linux/export.h>
#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/of.h>
+#include <linux/percpu.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <asm/bL_switcher.h>
#include <asm/cputype.h>
#include <asm/irq_regs.h>
#include <asm/pmu.h>
+#include <asm/smp_plat.h>
+#include <asm/topology.h>
-/* Set at runtime when we know what CPU type we are. */
-static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);
+static LIST_HEAD(cpu_pmus_list);
-static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
-static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
-static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
+#define cpu_for_each_pmu(pmu, cpu_pmu, cpu) \
+ for_each_pmu(pmu, &cpu_pmus_list) \
+ if (((cpu_pmu) = per_cpu_ptr((pmu)->cpu_pmus, cpu))->valid)
-static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
+static struct arm_pmu *__cpu_find_any_pmu(unsigned int cpu)
+{
+ struct arm_pmu *pmu;
+ struct arm_cpu_pmu *cpu_pmu;
+
+ cpu_for_each_pmu(pmu, cpu_pmu, cpu)
+ return pmu;
+
+ return NULL;
+}
/*
* Despite the names, these two functions are CPU-specific and are used
@@ -46,7 +60,7 @@ static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
*/
const char *perf_pmu_name(void)
{
- struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+ struct arm_pmu *pmu = __cpu_find_any_pmu(0);
if (!pmu)
return NULL;
@@ -56,7 +70,7 @@ EXPORT_SYMBOL_GPL(perf_pmu_name);
int perf_num_counters(void)
{
- struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+ struct arm_pmu *pmu = __cpu_find_any_pmu(0);
if (!pmu)
return 0;
@@ -70,51 +84,73 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
#include "perf_event_v6.c"
#include "perf_event_v7.c"
-static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
+static struct pmu_hw_events *cpu_pmu_get_cpu_events(struct arm_pmu *pmu)
{
- return &__get_cpu_var(cpu_hw_events);
+ return &this_cpu_ptr(pmu->cpu_pmus)->cpu_hw_events;
+}
+
+static int find_logical_cpu(u32 mpidr)
+{
+ int cpu = bL_switcher_get_logical_index(mpidr);
+
+ if (cpu != -EUNATCH)
+ return cpu;
+
+ return get_logical_index(mpidr);
}
-static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
+static void cpu_pmu_free_irq(struct arm_pmu *pmu)
{
- int i, irq, irqs;
- struct platform_device *pmu_device = cpu_pmu->plat_device;
- int cpu = -1;
+ int i;
+ int cpu;
+ struct arm_cpu_pmu *cpu_pmu;
+
+ for_each_possible_cpu(i) {
+ if (!(cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, i)))
+ continue;
+
+ if (cpu_pmu->mpidr == -1)
+ continue;
- irqs = min(pmu_device->num_resources, num_possible_cpus());
+ cpu = find_logical_cpu(cpu_pmu->mpidr);
+ if (cpu < 0)
+ continue;
- for (i = 0; i < irqs; ++i) {
- cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
- if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
+ if (!cpumask_test_and_clear_cpu(cpu, &pmu->active_irqs))
continue;
- irq = platform_get_irq(pmu_device, i);
- if (irq >= 0)
- free_irq(irq, cpu_pmu);
+ if (cpu_pmu->irq >= 0)
+ free_irq(cpu_pmu->irq, pmu);
}
}
-static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
+static int cpu_pmu_request_irq(struct arm_pmu *pmu, irq_handler_t handler)
{
int i, err, irq, irqs;
- struct platform_device *pmu_device = cpu_pmu->plat_device;
- int cpu = -1;
+ int cpu;
+ struct arm_cpu_pmu *cpu_pmu;
- if (!pmu_device)
- return -ENODEV;
+ irqs = 0;
+ for_each_possible_cpu(i)
+ if (per_cpu_ptr(pmu->cpu_pmus, i))
+ ++irqs;
- irqs = min(pmu_device->num_resources, num_possible_cpus());
if (irqs < 1) {
pr_err("no irqs for PMUs defined\n");
return -ENODEV;
}
- for (i = 0; i < irqs; ++i) {
- err = 0;
- cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
- irq = platform_get_irq(pmu_device, i);
+ for_each_possible_cpu(i) {
+ if (!(cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, i)))
+ continue;
+
+ irq = cpu_pmu->irq;
if (irq < 0)
continue;
+ cpu = find_logical_cpu(cpu_pmu->mpidr);
+ if (cpu < 0 || cpu != i)
+ continue;
+
/*
* If we have a single PMU interrupt that we can't shift,
* assume that we're running on a uniprocessor machine and
@@ -122,41 +158,51 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
*/
if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
- irq, i);
+ irq, cpu);
continue;
}
+ pr_debug("%s: requesting IRQ %d for CPU%d\n",
+ pmu->name, irq, cpu);
+
err = request_irq(irq, handler, IRQF_NOBALANCING, "arm-pmu",
- cpu_pmu);
+ pmu);
if (err) {
pr_err("unable to request IRQ%d for ARM PMU counters\n",
irq);
return err;
}
- cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
+ cpumask_set_cpu(cpu, &pmu->active_irqs);
}
return 0;
}
-static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
+static void cpu_pmu_init(struct arm_pmu *pmu)
{
int cpu;
- for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
- struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
- events->events = per_cpu(hw_events, cpu);
- events->used_mask = per_cpu(used_mask, cpu);
+ for_each_cpu_mask(cpu, pmu->valid_cpus) {
+ struct arm_cpu_pmu *cpu_pmu = per_cpu_ptr(pmu->cpu_pmus, cpu);
+ struct pmu_hw_events *events = &cpu_pmu->cpu_hw_events;
+
+ events->events = cpu_pmu->hw_events;
+ events->used_mask = cpu_pmu->used_mask;
raw_spin_lock_init(&events->pmu_lock);
+
+ if (pmu->cpu_init)
+ pmu->cpu_init(pmu, cpu_pmu);
+
+ cpu_pmu->valid = true;
}
- cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
- cpu_pmu->request_irq = cpu_pmu_request_irq;
- cpu_pmu->free_irq = cpu_pmu_free_irq;
+ pmu->get_hw_events = cpu_pmu_get_cpu_events;
+ pmu->request_irq = cpu_pmu_request_irq;
+ pmu->free_irq = cpu_pmu_free_irq;
/* Ensure the PMU has sane values out of reset. */
- if (cpu_pmu->reset)
- on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
+ if (pmu->reset)
+ on_each_cpu_mask(&pmu->valid_cpus, pmu->reset, pmu, 1);
}
/*
@@ -168,36 +214,42 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
unsigned long action, void *hcpu)
{
- struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
+ struct arm_pmu *pmu;
+ struct arm_cpu_pmu *cpu_pmu;
+ int ret = NOTIFY_DONE;
if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
return NOTIFY_DONE;
- if (pmu && pmu->reset)
- pmu->reset(pmu);
- else
- return NOTIFY_DONE;
+ cpu_for_each_pmu(pmu, cpu_pmu, (unsigned int)hcpu)
+ if (pmu->reset) {
+ pmu->reset(pmu);
+ ret = NOTIFY_OK;
+ }
- return NOTIFY_OK;
+ return ret;
}
static int cpu_pmu_pm_notify(struct notifier_block *b,
unsigned long action, void *hcpu)
{
int cpu = smp_processor_id();
- struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
- struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
+ struct arm_pmu *pmu;
+ struct arm_cpu_pmu *cpu_pmu;
+ int ret = NOTIFY_DONE;
- if (!pmu)
- return NOTIFY_DONE;
+ cpu_for_each_pmu(pmu, cpu_pmu, cpu) {
+ struct cpupmu_regs *pmuregs = &cpu_pmu->cpu_pmu_regs;
- if (action == CPU_PM_ENTER && pmu->save_regs) {
- pmu->save_regs(pmu, pmuregs);
- } else if (action == CPU_PM_EXIT && pmu->restore_regs) {
- pmu->restore_regs(pmu, pmuregs);
+ if (action == CPU_PM_ENTER && pmu->save_regs)
+ pmu->save_regs(pmu, pmuregs);
+ else if (action == CPU_PM_EXIT && pmu->restore_regs)
+ pmu->restore_regs(pmu, pmuregs);
+
+ ret = NOTIFY_OK;
}
- return NOTIFY_OK;
+ return ret;
}
static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
@@ -286,25 +338,100 @@ static int probe_current_pmu(struct arm_pmu *pmu)
return ret;
}
+static void cpu_pmu_free(struct arm_pmu *pmu)
+{
+ if (!pmu)
+ return;
+
+ free_percpu(pmu->cpu_pmus);
+ kfree(pmu);
+}
+
+/*
+ * HACK: Find a b.L switcher partner for CPU cpu on the specified cluster
+ * This information should be obtained from an interface provided by the
+ * Switcher itself, if possible.
+ */
+#ifdef CONFIG_BL_SWITCHER
+static int bL_get_partner(int cpu, int cluster)
+{
+ unsigned int i;
+
+
+ for_each_possible_cpu(i) {
+ if (cpu_topology[i].thread_id == cpu_topology[cpu].thread_id &&
+ cpu_topology[i].core_id == cpu_topology[cpu].core_id &&
+ cpu_topology[i].socket_id == cluster)
+ return i;
+ }
+
+ return -1; /* no partner found */
+}
+#else
+static int bL_get_partner(int __always_unused cpu, int __always_unused cluster)
+{
+ return -1;
+}
+#endif
+
+static int find_irq(struct platform_device *pdev,
+ struct device_node *pmu_node,
+ struct device_node *cluster_node,
+ u32 mpidr)
+{
+ int irq = -1;
+ u32 cluster;
+ u32 core;
+ struct device_node *cores_node;
+ struct device_node *core_node = NULL;
+
+ if (of_property_read_u32(cluster_node, "reg", &cluster) ||
+ cluster != MPIDR_AFFINITY_LEVEL(mpidr, 1))
+ goto error;
+
+ cores_node = of_get_child_by_name(cluster_node, "cores");
+ if (!cores_node)
+ goto error;
+
+ for_each_child_of_node(cores_node, core_node)
+ if (!of_property_read_u32(core_node, "reg", &core) &&
+ core == MPIDR_AFFINITY_LEVEL(mpidr, 0))
+ break;
+
+ if (!core_node)
+ goto error;
+
+ irq = platform_get_irq(pdev, core);
+
+error:
+ of_node_put(core_node);
+ of_node_put(cores_node);
+ return irq;
+}
+
static int cpu_pmu_device_probe(struct platform_device *pdev)
{
const struct of_device_id *of_id;
struct device_node *node = pdev->dev.of_node;
struct arm_pmu *pmu;
+ struct arm_cpu_pmu __percpu *cpu_pmus;
int ret = 0;
- int cpu;
pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
- if (!pmu) {
- pr_info("failed to allocate PMU device!");
- return -ENOMEM;
- }
+ if (!pmu)
+ goto error_nomem;
+
+ pmu->cpu_pmus = cpu_pmus = alloc_percpu(struct arm_cpu_pmu);
+ if (!cpu_pmus)
+ goto error_nomem;
if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
struct device_node *ncluster;
int cluster = -1;
cpumask_t sibling_mask;
+ cpumask_t phys_sibling_mask;
+ unsigned int i;
ncluster = of_parse_phandle(node, "cluster", 0);
if (ncluster) {
@@ -315,11 +442,59 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
cluster = be32_to_cpup(hwid);
}
/* set sibling mask to all cpu mask if socket is not specified */
- if (cluster == -1 ||
+ /*
+ * In a switcher kernel, we affine all PMUs to CPUs and
+ * abstract the runtime presence/absence of PMUs at a lower
+ * level.
+ */
+ if (cluster == -1 || IS_ENABLED(CONFIG_BL_SWITCHER) ||
cluster_to_logical_mask(cluster, &sibling_mask))
- cpumask_setall(&sibling_mask);
+ cpumask_copy(&sibling_mask, cpu_possible_mask);
- smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
+ if (bL_switcher_get_enabled())
+ /*
+ * The switcher initialises late now, so it should not
+ * have initialised yet:
+ */
+ BUG();
+
+ cpumask_copy(&phys_sibling_mask, cpu_possible_mask);
+
+ /*
+ * HACK: Deduce how the switcher will modify the topology
+ * in order to fill in PMU<->CPU combinations which don't
+ * make sense when the switcher is disabled. Ideally, this
+ * knowledge should come from the swithcer somehow.
+ */
+ for_each_possible_cpu(i) {
+ int cpu = i;
+
+ per_cpu_ptr(cpu_pmus, i)->mpidr = -1;
+ per_cpu_ptr(cpu_pmus, i)->irq = -1;
+
+ if (cpu_topology[i].socket_id != cluster) {
+ cpumask_clear_cpu(i, &phys_sibling_mask);
+ cpu = bL_get_partner(i, cluster);
+ }
+
+ if (cpu == -1)
+ cpumask_clear_cpu(i, &sibling_mask);
+ else {
+ int irq = find_irq(pdev, node, ncluster,
+ cpu_logical_map(cpu));
+ per_cpu_ptr(cpu_pmus, i)->mpidr =
+ cpu_logical_map(cpu);
+ per_cpu_ptr(cpu_pmus, i)->irq = irq;
+ }
+ }
+
+ /*
+ * This relies on an MP view of the system to choose the right
+ * CPU to run init_fn:
+ */
+ smp_call_function_any(&phys_sibling_mask, init_fn, pmu, 1);
+
+ bL_switcher_put_enabled();
/* now set the valid_cpus after init */
cpumask_copy(&pmu->valid_cpus, &sibling_mask);
@@ -327,24 +502,26 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
ret = probe_current_pmu(pmu);
}
- if (ret) {
- pr_info("failed to probe PMU!");
- goto out_free;
- }
-
- for_each_cpu_mask(cpu, pmu->valid_cpus)
- per_cpu(cpu_pmu, cpu) = pmu;
+ if (ret)
+ goto error;
pmu->plat_device = pdev;
cpu_pmu_init(pmu);
ret = armpmu_register(pmu, -1);
- if (!ret)
- return 0;
+ if (ret)
+ goto error;
-out_free:
- pr_info("failed to register PMU devices!");
- kfree(pmu);
+ list_add(&pmu->class_pmus_list, &cpu_pmus_list);
+ goto out;
+
+error_nomem:
+ pr_warn("out of memory\n");
+ ret = -ENOMEM;
+error:
+ pr_warn("failed to register PMU device(s)!\n");
+ cpu_pmu_free(pmu);
+out:
return ret;
}
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 03664b0e8fa4..a191bdb9ebd6 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -439,7 +439,7 @@ static void armv6pmu_enable_event(struct perf_event *event)
unsigned long val, mask, evt, flags;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
int idx = hwc->idx;
if (ARMV6_CYCLE_COUNTER == idx) {
@@ -477,7 +477,7 @@ armv6pmu_handle_irq(int irq_num,
unsigned long pmcr = armv6_pmcr_read();
struct perf_sample_data data;
struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
- struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(cpu_pmu);
struct pt_regs *regs;
int idx;
@@ -533,7 +533,7 @@ armv6pmu_handle_irq(int irq_num,
static void armv6pmu_start(struct arm_pmu *cpu_pmu)
{
unsigned long flags, val;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
@@ -545,7 +545,7 @@ static void armv6pmu_start(struct arm_pmu *cpu_pmu)
static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
{
unsigned long flags, val;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
@@ -586,7 +586,7 @@ static void armv6pmu_disable_event(struct perf_event *event)
unsigned long val, mask, evt, flags;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
int idx = hwc->idx;
if (ARMV6_CYCLE_COUNTER == idx) {
@@ -621,7 +621,7 @@ static void armv6mpcore_pmu_disable_event(struct perf_event *event)
unsigned long val, mask, flags, evt = 0;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
int idx = hwc->idx;
if (ARMV6_CYCLE_COUNTER == idx) {
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 654db5030c31..25762a548f26 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -18,6 +18,175 @@
#ifdef CONFIG_CPU_V7
+struct armv7_pmu_logical_state {
+ u32 PMCR;
+ u32 PMCNTENSET;
+ u32 PMCNTENCLR;
+ u32 PMOVSR;
+ u32 PMSWINC;
+ u32 PMSELR;
+ u32 PMCEID0;
+ u32 PMCEID1;
+
+ u32 PMCCNTR;
+
+ u32 PMUSERENR;
+ u32 PMINTENSET;
+ u32 PMINTENCLR;
+ u32 PMOVSSET;
+
+ struct armv7_pmu_logical_cntr_state {
+ u32 PMXEVTYPER;
+ u32 PMXEVCNTR;
+ } cntrs[1]; /* we will grow this during allocation */
+};
+
+#define __v7_logical_state(cpupmu) \
+ ((struct armv7_pmu_logical_state *)(cpupmu)->logical_state)
+
+#define __v7_logical_state_single(cpupmu, name) \
+ __v7_logical_state(cpupmu)->name
+#define __v7_logical_state_cntr(cpupmu, name) \
+ __v7_logical_state(cpupmu)->cntrs[__v7_logical_state(cpupmu)->PMSELR].name
+
+#define __def_v7_pmu_reg_W(kind, name, op1, Cm, op2) \
+ static inline u32 __v7_pmu_write_physical_##name(u32 value) \
+ { \
+ asm volatile ( \
+ "mcr p15, " #op1 ", %0, c9, " #Cm ", " #op2 \
+ :: "r" (value) \
+ ); \
+ \
+ return value; \
+ } \
+ \
+ static inline u32 __v7_pmu_write_logical_##name( \
+ struct arm_cpu_pmu *cpupmu, u32 value) \
+ { \
+ __v7_logical_state_##kind(cpupmu, name) = value; \
+ return value; \
+ }
+
+#define __def_v7_pmu_reg_R(kind, name, op1, Cm, op2) \
+ static inline u32 __v7_pmu_read_physical_##name(void) \
+ { \
+ u32 result; \
+ \
+ asm volatile ( \
+ "mrc p15, " #op1 ", %0, c9, " #Cm ", " #op2 \
+ : "=r" (result) \
+ ); \
+ \
+ return result; \
+ } \
+ \
+ static inline u32 __v7_pmu_read_logical_##name( \
+ struct arm_cpu_pmu *cpupmu) \
+ { \
+ return __v7_logical_state_##kind(cpupmu, name); \
+ }
+
+#define __def_v7_pmu_reg_WO(name, op1, Cm, op2) \
+ __def_v7_pmu_reg_W(single, name, op1, Cm, op2)
+#define __def_v7_pmu_reg_RO(name, op1, Cm, op2) \
+ __def_v7_pmu_reg_R(single, name, op1, Cm, op2)
+
+#define __def_v7_pmu_reg_RW(name, op1, Cm, op2) \
+ __def_v7_pmu_reg_WO(name, op1, Cm, op2) \
+ __def_v7_pmu_reg_RO(name, op1, Cm, op2)
+
+#define __def_v7_pmu_cntr_WO(name, op1, Cm, op2) \
+ __def_v7_pmu_reg_W(cntr, name, op1, Cm, op2)
+#define __def_v7_pmu_cntr_RO(name, op1, Cm, op2) \
+ __def_v7_pmu_reg_R(cntr, name, op1, Cm, op2)
+
+#define __def_v7_pmu_cntr_RW(name, op1, Cm, op2) \
+ __def_v7_pmu_cntr_WO(name, op1, Cm, op2) \
+ __def_v7_pmu_cntr_RO(name, op1, Cm, op2)
+
+#define __def_v7_pmu_reg(name, prot, op1, Cm, op2) \
+ __def_v7_pmu_reg_##prot(name, op1, Cm, op2)
+#define __def_v7_pmu_cntr(name, prot, op1, Cm, op2) \
+ __def_v7_pmu_cntr_##prot(name, op1, Cm, op2)
+
+__def_v7_pmu_reg(PMCR, RW, 0, c12, 0)
+__def_v7_pmu_reg(PMCNTENSET, RW, 0, c12, 1)
+__def_v7_pmu_reg(PMCNTENCLR, RW, 0, c12, 2)
+__def_v7_pmu_reg(PMOVSR, RW, 0, c12, 3)
+__def_v7_pmu_reg(PMSWINC, WO, 0, c12, 4)
+__def_v7_pmu_reg(PMSELR, RW, 0, c12, 5)
+__def_v7_pmu_reg(PMCEID0, RO, 0, c12, 6)
+__def_v7_pmu_reg(PMCEID1, RO, 0, c12, 7)
+
+__def_v7_pmu_reg(PMCCNTR, RW, 0, c13, 0)
+__def_v7_pmu_cntr(PMXEVTYPER, RW, 0, c13, 1)
+__def_v7_pmu_cntr(PMXEVCNTR, RW, 0, c13, 2)
+
+__def_v7_pmu_reg(PMUSERENR, RW, 0, c14, 0)
+__def_v7_pmu_reg(PMINTENSET, RW, 0, c14, 1)
+__def_v7_pmu_reg(PMINTENCLR, RW, 0, c14, 2)
+__def_v7_pmu_reg(PMOVSSET, RW, 0, c14, 3)
+
+#define __v7_pmu_write_physical(name, value) \
+ __v7_pmu_write_physical_##name(value)
+#define __v7_pmu_read_physical(name) \
+ __v7_pmu_read_physical_##name()
+
+#define __v7_pmu_write_logical(cpupmu, name, value) \
+ __v7_pmu_write_logical_##name(cpupmu, value)
+#define __v7_pmu_read_logical(cpupmu, name) \
+ __v7_pmu_read_logical_##name(cpupmu)
+
+#define __v7_pmu_write_reg(cpupmu, name, value) do { \
+ if ((cpupmu)->active) \
+ __v7_pmu_write_physical(name, value); \
+ else \
+ __v7_pmu_write_logical(cpupmu, name, value); \
+} while(0)
+
+#define __v7_pmu_read_reg(cpupmu, name) ( \
+ (cpupmu)->active ? \
+ __v7_pmu_read_physical(name) : \
+ __v7_pmu_read_logical(cpupmu, name) \
+)
+
+#define __v7_pmu_reg_set(cpupmu, name, logical_name, mask) do { \
+ if ((cpupmu)->active) \
+ __v7_pmu_write_physical(name, mask); \
+ else { \
+ u32 __value; \
+ __value =__v7_pmu_read_logical(cpupmu, logical_name) | (mask); \
+ __v7_pmu_write_logical(cpupmu, logical_name, __value); \
+ } \
+} while(0)
+
+#define __v7_pmu_reg_clr(cpupmu, name, logical_name, mask) do { \
+ if ((cpupmu)->active) \
+ __v7_pmu_write_physical(name, mask); \
+ else { \
+ u32 __value; \
+ __value = __v7_pmu_read_logical(cpupmu, logical_name) & ~(mask); \
+ __v7_pmu_write_logical(cpupmu, logical_name, __value); \
+ } \
+} while(0)
+
+#define __v7_pmu_save_reg(cpupmu, name) \
+ __v7_pmu_write_logical(cpupmu, name, \
+ __v7_pmu_read_physical(name))
+#define __v7_pmu_restore_reg(cpupmu, name) \
+ __v7_pmu_write_physical(name, \
+ __v7_pmu_read_logical(cpupmu, name))
+static u32 read_mpidr(void)
+{
+ u32 result;
+
+ asm volatile ("mrc p15, 0, %0, c0, c0, 5" : "=r" (result));
+
+ return result;
+}
+
+static void armv7pmu_reset(void *info);
+
/*
* Common ARMv7 event types
*
@@ -784,18 +953,16 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
#define ARMV7_EXCLUDE_USER (1 << 30)
#define ARMV7_INCLUDE_HYP (1 << 27)
-static inline u32 armv7_pmnc_read(void)
+static inline u32 armv7_pmnc_read(struct arm_cpu_pmu *cpupmu)
{
- u32 val;
- asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
- return val;
+ return __v7_pmu_read_reg(cpupmu, PMCR);
}
-static inline void armv7_pmnc_write(u32 val)
+static inline void armv7_pmnc_write(struct arm_cpu_pmu *cpupmu, u32 val)
{
val &= ARMV7_PMNC_MASK;
isb();
- asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+ __v7_pmu_write_reg(cpupmu, PMCR, val);
}
static inline int armv7_pmnc_has_overflowed(u32 pmnc)
@@ -814,10 +981,10 @@ static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx));
}
-static inline int armv7_pmnc_select_counter(int idx)
+static inline int armv7_pmnc_select_counter(struct arm_cpu_pmu *cpupmu, int idx)
{
u32 counter = ARMV7_IDX_TO_COUNTER(idx);
- asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
+ __v7_pmu_write_reg(cpupmu, PMSELR, counter);
isb();
return idx;
@@ -825,185 +992,197 @@ static inline int armv7_pmnc_select_counter(int idx)
static inline u32 armv7pmu_read_counter(struct perf_event *event)
{
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct arm_pmu *pmu = to_arm_pmu(event->pmu);
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
u32 value = 0;
- if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
+ if (!armv7_pmnc_counter_valid(pmu, idx))
pr_err("CPU%u reading wrong counter %d\n",
smp_processor_id(), idx);
else if (idx == ARMV7_IDX_CYCLE_COUNTER)
- asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
- else if (armv7_pmnc_select_counter(idx) == idx)
- asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
+ value = __v7_pmu_read_reg(cpupmu, PMCCNTR);
+ else if (armv7_pmnc_select_counter(cpupmu, idx) == idx)
+ value = __v7_pmu_read_reg(cpupmu, PMXEVCNTR);
return value;
}
static inline void armv7pmu_write_counter(struct perf_event *event, u32 value)
{
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ struct arm_pmu *pmu = to_arm_pmu(event->pmu);
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
+ if (!armv7_pmnc_counter_valid(pmu, idx))
pr_err("CPU%u writing wrong counter %d\n",
smp_processor_id(), idx);
else if (idx == ARMV7_IDX_CYCLE_COUNTER)
- asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
- else if (armv7_pmnc_select_counter(idx) == idx)
- asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
+ __v7_pmu_write_reg(cpupmu, PMCCNTR, value);
+ else if (armv7_pmnc_select_counter(cpupmu, idx) == idx)
+ __v7_pmu_write_reg(cpupmu, PMXEVCNTR, value);
}
-static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
+static inline void armv7_pmnc_write_evtsel(struct arm_cpu_pmu *cpupmu, int idx, u32 val)
{
- if (armv7_pmnc_select_counter(idx) == idx) {
+ if (armv7_pmnc_select_counter(cpupmu, idx) == idx) {
val &= ARMV7_EVTYPE_MASK;
- asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+ __v7_pmu_write_reg(cpupmu, PMXEVTYPER, val);
}
}
-static inline int armv7_pmnc_enable_counter(int idx)
+static inline int armv7_pmnc_enable_counter(struct arm_cpu_pmu *cpupmu, int idx)
{
u32 counter = ARMV7_IDX_TO_COUNTER(idx);
- asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
+ __v7_pmu_reg_set(cpupmu, PMCNTENSET, PMCNTENSET, BIT(counter));
return idx;
}
-static inline int armv7_pmnc_disable_counter(int idx)
+static inline int armv7_pmnc_disable_counter(struct arm_cpu_pmu *cpupmu, int idx)
{
u32 counter = ARMV7_IDX_TO_COUNTER(idx);
- asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
+ __v7_pmu_reg_clr(cpupmu, PMCNTENCLR, PMCNTENSET, BIT(counter));
return idx;
}
-static inline int armv7_pmnc_enable_intens(int idx)
+static inline int armv7_pmnc_enable_intens(struct arm_cpu_pmu *cpupmu, int idx)
{
u32 counter = ARMV7_IDX_TO_COUNTER(idx);
- asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
+ __v7_pmu_reg_set(cpupmu, PMINTENSET, PMCNTENSET, BIT(counter));
return idx;
}
-static inline int armv7_pmnc_disable_intens(int idx)
+static inline int armv7_pmnc_disable_intens(struct arm_cpu_pmu *cpupmu, int idx)
{
u32 counter = ARMV7_IDX_TO_COUNTER(idx);
- asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
+ __v7_pmu_reg_clr(cpupmu, PMINTENCLR, PMINTENSET, BIT(counter));
isb();
/* Clear the overflow flag in case an interrupt is pending. */
- asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter)));
+ __v7_pmu_reg_clr(cpupmu, PMOVSR, PMOVSR, BIT(counter));
isb();
return idx;
}
-static inline u32 armv7_pmnc_getreset_flags(void)
+static inline u32 armv7_pmnc_getreset_flags(struct arm_cpu_pmu *cpupmu)
{
u32 val;
/* Read */
- asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
+ val = __v7_pmu_read_reg(cpupmu, PMOVSR);
/* Write to clear flags */
val &= ARMV7_FLAG_MASK;
- asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
+ __v7_pmu_reg_clr(cpupmu, PMOVSR, PMOVSR, val);
return val;
}
#ifdef DEBUG
-static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
+static void armv7_pmnc_dump_regs(struct arm_pmu *pmu)
{
u32 val;
unsigned int cnt;
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
printk(KERN_INFO "PMNC registers dump:\n");
-
- asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
- printk(KERN_INFO "PMNC =0x%08x\n", val);
-
- asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
- printk(KERN_INFO "CNTENS=0x%08x\n", val);
-
- asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
- printk(KERN_INFO "INTENS=0x%08x\n", val);
-
- asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
- printk(KERN_INFO "FLAGS =0x%08x\n", val);
-
- asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
- printk(KERN_INFO "SELECT=0x%08x\n", val);
-
- asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
- printk(KERN_INFO "CCNT =0x%08x\n", val);
+ printk(KERN_INFO "PMNC =0x%08x\n", __v7_pmu_read_reg(PMCR));
+ printk(KERN_INFO "CNTENS=0x%08x\n", __v7_pmu_read_reg(PMCNTENSET));
+ printk(KERN_INFO "INTENS=0x%08x\n", __v7_pmu_read_reg(PMINTENSET));
+ printk(KERN_INFO "FLAGS =0x%08x\n", __v7_pmu_read_reg(PMOVSR));
+ printk(KERN_INFO "SELECT=0x%08x\n", __v7_pmu_read_reg(PMSELR));
+ printk(KERN_INFO "CCNT =0x%08x\n", __v7_pmu_read_reg(PMCCNTR));
for (cnt = ARMV7_IDX_COUNTER0;
- cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
- armv7_pmnc_select_counter(cnt);
- asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
+ cnt <= ARMV7_IDX_COUNTER_LAST(pmu); cnt++) {
+ armv7_pmnc_select_counter(cpupmu, cnt);
printk(KERN_INFO "CNT[%d] count =0x%08x\n",
- ARMV7_IDX_TO_COUNTER(cnt), val);
- asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
+ ARMV7_IDX_TO_COUNTER(cnt),
+ __v7_pmu_read_reg(cpupmu, PMXEVCNTR));
printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
- ARMV7_IDX_TO_COUNTER(cnt), val);
+ ARMV7_IDX_TO_COUNTER(cnt),
+ __v7_pmu_read_reg(cpupmu, PMXEVTYPER));
}
}
#endif
-static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu,
+static void armv7pmu_save_regs(struct arm_pmu *pmu,
struct cpupmu_regs *regs)
{
unsigned int cnt;
- asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc));
- if (!(regs->pmc & ARMV7_PMNC_E))
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+
+ if (!cpupmu->active)
return;
- asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset));
- asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren));
- asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset));
- asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0]));
+ if (!*cpupmu->cpu_hw_events.used_mask)
+ return;
+
+ if (!__v7_pmu_save_reg(cpupmu, PMCR) & ARMV7_PMNC_E)
+ return;
+
+ __v7_pmu_save_reg(cpupmu, PMCNTENSET);
+ __v7_pmu_save_reg(cpupmu, PMUSERENR);
+ __v7_pmu_save_reg(cpupmu, PMINTENSET);
+ __v7_pmu_save_reg(cpupmu, PMCCNTR);
+
for (cnt = ARMV7_IDX_COUNTER0;
- cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
- armv7_pmnc_select_counter(cnt);
- asm volatile("mrc p15, 0, %0, c9, c13, 1"
- : "=r"(regs->pmxevttype[cnt]));
- asm volatile("mrc p15, 0, %0, c9, c13, 2"
- : "=r"(regs->pmxevtcnt[cnt]));
+ cnt <= ARMV7_IDX_COUNTER_LAST(pmu); cnt++) {
+ armv7_pmnc_select_counter(cpupmu, cnt);
+ __v7_pmu_save_reg(cpupmu, PMSELR); /* mirror physical PMSELR */
+ __v7_pmu_save_reg(cpupmu, PMXEVTYPER);
+ __v7_pmu_save_reg(cpupmu, PMXEVCNTR);
}
return;
}
-static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu,
+/* armv7pmu_reset() must be called before calling this funtion */
+static void armv7pmu_restore_regs(struct arm_pmu *pmu,
struct cpupmu_regs *regs)
{
unsigned int cnt;
- if (!(regs->pmc & ARMV7_PMNC_E))
+ u32 pmcr;
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+
+ armv7pmu_reset(pmu);
+
+ if (!cpupmu->active)
return;
- asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset));
- asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren));
- asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset));
- asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0]));
+ if (!*cpupmu->cpu_hw_events.used_mask)
+ return;
+
+ pmcr = __v7_pmu_read_logical(cpupmu, PMCR);
+ if (!pmcr & ARMV7_PMNC_E)
+ return;
+
+ __v7_pmu_restore_reg(cpupmu, PMCNTENSET);
+ __v7_pmu_restore_reg(cpupmu, PMUSERENR);
+ __v7_pmu_restore_reg(cpupmu, PMINTENSET);
+ __v7_pmu_restore_reg(cpupmu, PMCCNTR);
+
for (cnt = ARMV7_IDX_COUNTER0;
- cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
- armv7_pmnc_select_counter(cnt);
- asm volatile("mcr p15, 0, %0, c9, c13, 1"
- : : "r"(regs->pmxevttype[cnt]));
- asm volatile("mcr p15, 0, %0, c9, c13, 2"
- : : "r"(regs->pmxevtcnt[cnt]));
+ cnt <= ARMV7_IDX_COUNTER_LAST(pmu); cnt++) {
+ armv7_pmnc_select_counter(cpupmu, cnt);
+ __v7_pmu_save_reg(cpupmu, PMSELR); /* mirror physical PMSELR */
+ __v7_pmu_restore_reg(cpupmu, PMXEVTYPER);
+ __v7_pmu_restore_reg(cpupmu, PMXEVCNTR);
}
- asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc));
+ __v7_pmu_write_reg(cpupmu, PMCR, pmcr);
}
static void armv7pmu_enable_event(struct perf_event *event)
{
unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct arm_pmu *pmu = to_arm_pmu(event->pmu);
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+ struct pmu_hw_events *events = pmu->get_hw_events(pmu);
int idx = hwc->idx;
- if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ if (!armv7_pmnc_counter_valid(pmu, idx)) {
pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
smp_processor_id(), idx);
return;
@@ -1018,25 +1197,25 @@ static void armv7pmu_enable_event(struct perf_event *event)
/*
* Disable counter
*/
- armv7_pmnc_disable_counter(idx);
+ armv7_pmnc_disable_counter(cpupmu, idx);
/*
* Set event (if destined for PMNx counters)
* We only need to set the event for the cycle counter if we
* have the ability to perform event filtering.
*/
- if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
- armv7_pmnc_write_evtsel(idx, hwc->config_base);
+ if (pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
+ armv7_pmnc_write_evtsel(cpupmu, idx, hwc->config_base);
/*
* Enable interrupt for this counter
*/
- armv7_pmnc_enable_intens(idx);
+ armv7_pmnc_enable_intens(cpupmu, idx);
/*
* Enable counter
*/
- armv7_pmnc_enable_counter(idx);
+ armv7_pmnc_enable_counter(cpupmu,idx);
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
@@ -1045,11 +1224,12 @@ static void armv7pmu_disable_event(struct perf_event *event)
{
unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct arm_pmu *pmu = to_arm_pmu(event->pmu);
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+ struct pmu_hw_events *events = pmu->get_hw_events(pmu);
int idx = hwc->idx;
- if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+ if (!armv7_pmnc_counter_valid(pmu, idx)) {
pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
smp_processor_id(), idx);
return;
@@ -1063,12 +1243,12 @@ static void armv7pmu_disable_event(struct perf_event *event)
/*
* Disable counter
*/
- armv7_pmnc_disable_counter(idx);
+ armv7_pmnc_disable_counter(cpupmu, idx);
/*
* Disable interrupt for this counter
*/
- armv7_pmnc_disable_intens(idx);
+ armv7_pmnc_disable_intens(cpupmu, idx);
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
@@ -1077,15 +1257,23 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
{
u32 pmnc;
struct perf_sample_data data;
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
- struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+ struct arm_pmu *pmu = (struct arm_pmu *)dev;
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+ struct pmu_hw_events *cpuc = pmu->get_hw_events(pmu);
struct pt_regs *regs;
int idx;
+ if (!cpupmu->active) {
+ pr_warn_ratelimited("%s: Spurious interrupt for inactive PMU %s: event counts will be wrong.\n",
+ __func__, pmu->name);
+ pr_warn_once("This is a known interrupt affinity bug in the b.L switcher perf support.\n");
+ return IRQ_NONE;
+ }
+
/*
* Get and reset the IRQ flags
*/
- pmnc = armv7_pmnc_getreset_flags();
+ pmnc = armv7_pmnc_getreset_flags(cpupmu);
/*
* Did an overflow occur?
@@ -1098,7 +1286,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
*/
regs = get_irq_regs();
- for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+ for (idx = 0; idx < pmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
@@ -1120,7 +1308,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
continue;
if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
+ pmu->disable(event);
}
/*
@@ -1135,25 +1323,27 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
return IRQ_HANDLED;
}
-static void armv7pmu_start(struct arm_pmu *cpu_pmu)
+static void armv7pmu_start(struct arm_pmu *pmu)
{
unsigned long flags;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+ struct pmu_hw_events *events = pmu->get_hw_events(pmu);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Enable all counters */
- armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
+ armv7_pmnc_write(cpupmu, armv7_pmnc_read(cpupmu) | ARMV7_PMNC_E);
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
-static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
+static void armv7pmu_stop(struct arm_pmu *pmu)
{
unsigned long flags;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+ struct pmu_hw_events *events = pmu->get_hw_events(pmu);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable all counters */
- armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
+ armv7_pmnc_write(cpupmu, armv7_pmnc_read(cpupmu) & ~ARMV7_PMNC_E);
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
@@ -1212,19 +1402,33 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
return 0;
}
+static bool check_active(struct arm_cpu_pmu *cpupmu)
+{
+ u32 mpidr = read_mpidr();
+
+ BUG_ON(!(mpidr & 0x80000000)); /* this won't work on uniprocessor */
+
+ cpupmu->active = ((mpidr ^ cpupmu->mpidr) & 0xFFFFFF) == 0;
+ return cpupmu->active;
+}
+
static void armv7pmu_reset(void *info)
{
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
- u32 idx, nb_cnt = cpu_pmu->num_events;
+ struct arm_pmu *pmu = (struct arm_pmu *)info;
+ struct arm_cpu_pmu *cpupmu = to_this_cpu_pmu(pmu);
+ u32 idx, nb_cnt = pmu->num_events;
+
+ if (!check_active(cpupmu))
+ return;
/* The counter and interrupt enable registers are unknown at reset. */
for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
- armv7_pmnc_disable_counter(idx);
- armv7_pmnc_disable_intens(idx);
+ armv7_pmnc_disable_counter(cpupmu, idx);
+ armv7_pmnc_disable_intens(cpupmu, idx);
}
/* Initialize & Reset PMNC: C and P bits */
- armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
+ armv7_pmnc_write(cpupmu, ARMV7_PMNC_P | ARMV7_PMNC_C);
}
static int armv7_a8_map_event(struct perf_event *event)
@@ -1257,8 +1461,13 @@ static int armv7_a7_map_event(struct perf_event *event)
&armv7_a7_perf_cache_map, 0xFF);
}
+static void armv7pmu_cpu_init(struct arm_pmu *pmu,
+ struct arm_cpu_pmu *cpupmu);
+
static void armv7pmu_init(struct arm_pmu *cpu_pmu)
{
+ struct arm_cpu_pmu *cpu_pmus = cpu_pmu->cpu_pmus;
+
cpu_pmu->handle_irq = armv7pmu_handle_irq;
cpu_pmu->enable = armv7pmu_enable_event;
cpu_pmu->disable = armv7pmu_disable_event;
@@ -1270,7 +1479,10 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->reset = armv7pmu_reset;
cpu_pmu->save_regs = armv7pmu_save_regs;
cpu_pmu->restore_regs = armv7pmu_restore_regs;
+ cpu_pmu->cpu_init = armv7pmu_cpu_init;
cpu_pmu->max_period = (1LLU << 32) - 1;
+
+ cpu_pmu->cpu_pmus = cpu_pmus;
};
static u32 armv7_read_num_pmnc_events(void)
@@ -1278,12 +1490,38 @@ static u32 armv7_read_num_pmnc_events(void)
u32 nb_cnt;
/* Read the nb of CNTx counters supported from PMNC */
- nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
+ nb_cnt = (__v7_pmu_read_physical(PMCR) >> ARMV7_PMNC_N_SHIFT);
+ nb_cnt &= ARMV7_PMNC_N_MASK;
/* Add the CPU cycles counter and return */
return nb_cnt + 1;
}
+static void armv7pmu_cpu_init(struct arm_pmu *pmu,
+ struct arm_cpu_pmu *cpupmu)
+{
+ size_t size = offsetof(struct armv7_pmu_logical_state, cntrs) +
+ pmu->num_events * sizeof(*__v7_logical_state(cpupmu));
+
+ cpupmu->logical_state = kzalloc(size, GFP_KERNEL);
+
+ /*
+ * We need a proper error return mechanism for these init functions.
+ * Until then, panicking the kernel is acceptable, since a failure
+ * here is indicative of crippling memory contstraints which will
+ * likely make the system unusable anyway:
+ */
+ BUG_ON(!cpupmu->logical_state);
+
+ /*
+ * Save the "read-only" ID registers in logical_state.
+ * Because they are read-only, there are no direct accessors,
+ * so poke them directly into the logical_state structure:
+ */
+ __v7_logical_state(cpupmu)->PMCEID0 = __v7_pmu_read_physical(PMCEID0);
+ __v7_logical_state(cpupmu)->PMCEID1 = __v7_pmu_read_physical(PMCEID1);
+}
+
static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 63990c42fac9..cd670eafbb56 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -225,7 +225,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
unsigned long pmnc;
struct perf_sample_data data;
struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
- struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(cpu_pmu);
struct pt_regs *regs;
int idx;
@@ -285,7 +285,7 @@ static void xscale1pmu_enable_event(struct perf_event *event)
unsigned long val, mask, evt, flags;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
int idx = hwc->idx;
switch (idx) {
@@ -321,7 +321,7 @@ static void xscale1pmu_disable_event(struct perf_event *event)
unsigned long val, mask, evt, flags;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
int idx = hwc->idx;
switch (idx) {
@@ -374,7 +374,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
{
unsigned long flags, val;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale1pmu_read_pmnc();
@@ -386,7 +386,7 @@ static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
{
unsigned long flags, val;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale1pmu_read_pmnc();
@@ -572,7 +572,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
unsigned long pmnc, of_flags;
struct perf_sample_data data;
struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
- struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(cpu_pmu);
struct pt_regs *regs;
int idx;
@@ -626,7 +626,7 @@ static void xscale2pmu_enable_event(struct perf_event *event)
unsigned long flags, ien, evtsel;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
int idx = hwc->idx;
ien = xscale2pmu_read_int_enable();
@@ -672,7 +672,7 @@ static void xscale2pmu_disable_event(struct perf_event *event)
unsigned long flags, ien, evtsel, of_flags;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
int idx = hwc->idx;
ien = xscale2pmu_read_int_enable();
@@ -738,7 +738,7 @@ out:
static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
{
unsigned long flags, val;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
@@ -750,7 +750,7 @@ static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
{
unsigned long flags, val;
- struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events(cpu_pmu);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale2pmu_read_pmnc();
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 15c70f0202f7..70f35a48c8ae 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -67,6 +67,7 @@ enum ipi_msg_type {
IPI_CALL_FUNC_SINGLE,
IPI_CPU_STOP,
IPI_CPU_BACKTRACE,
+ IPI_COMPLETION,
};
static DECLARE_COMPLETION(cpu_running);
@@ -465,6 +466,7 @@ static const char *ipi_types[NR_IPI] = {
S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_CPU_BACKTRACE, "CPU backtrace"),
+ S(IPI_COMPLETION, "completion interrupts"),
};
void show_ipi_list(struct seq_file *p, int prec)
@@ -642,6 +644,19 @@ static void ipi_cpu_backtrace(unsigned int cpu, struct pt_regs *regs)
}
}
+static DEFINE_PER_CPU(struct completion *, cpu_completion);
+
+int register_ipi_completion(struct completion *completion, int cpu)
+{
+ per_cpu(cpu_completion, cpu) = completion;
+ return IPI_COMPLETION;
+}
+
+static void ipi_complete(unsigned int cpu)
+{
+ complete(per_cpu(cpu_completion, cpu));
+}
+
/*
* Main handler for inter-processor interrupts
*/
@@ -696,6 +711,12 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
ipi_cpu_backtrace(cpu, regs);
break;
+ case IPI_COMPLETION:
+ irq_enter();
+ ipi_complete(cpu);
+ irq_exit();
+ break;
+
default:
printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
cpu, ipinr);
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index ed284ab0041b..d60b7f4e1851 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -175,7 +175,8 @@ static u32 cci_pmu_get_max_counters(void)
return n_cnts + 1;
}
-static struct pmu_hw_events *cci_pmu_get_hw_events(void)
+static struct pmu_hw_events *cci_pmu_get_hw_events(
+ struct arm_pmu *__always_unused pmu)
{
return &cci_hw_events;
}
@@ -252,7 +253,7 @@ static int cci_pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
static irqreturn_t cci_pmu_handle_irq(int irq_num, void *dev)
{
struct arm_pmu *cci_pmu = (struct arm_pmu *)dev;
- struct pmu_hw_events *events = cci_pmu->get_hw_events();
+ struct pmu_hw_events *events = cci_pmu->get_hw_events(cci_pmu);
struct perf_sample_data data;
struct pt_regs *regs;
int idx;
@@ -304,7 +305,7 @@ static void cci_pmu_enable_event(struct perf_event *event)
{
unsigned long flags;
struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = cci_pmu->get_hw_events();
+ struct pmu_hw_events *events = cci_pmu->get_hw_events(cci_pmu);
struct hw_perf_event *hw_counter = &event->hw;
int idx = hw_counter->idx;
@@ -328,7 +329,7 @@ static void cci_pmu_disable_event(struct perf_event *event)
{
unsigned long flags;
struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = cci_pmu->get_hw_events();
+ struct pmu_hw_events *events = cci_pmu->get_hw_events(cci_pmu);
struct hw_perf_event *hw_counter = &event->hw;
int idx = hw_counter->idx;
@@ -348,7 +349,7 @@ static void cci_pmu_start(struct arm_pmu *cci_pmu)
{
u32 val;
unsigned long flags;
- struct pmu_hw_events *events = cci_pmu->get_hw_events();
+ struct pmu_hw_events *events = cci_pmu->get_hw_events(cci_pmu);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
@@ -363,7 +364,7 @@ static void cci_pmu_stop(struct arm_pmu *cci_pmu)
{
u32 val;
unsigned long flags;
- struct pmu_hw_events *events = cci_pmu->get_hw_events();
+ struct pmu_hw_events *events = cci_pmu->get_hw_events(cci_pmu);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
diff --git a/drivers/clk/versatile/clk-vexpress-spc.c b/drivers/clk/versatile/clk-vexpress-spc.c
index 37125bf48cbd..bb566e244b0c 100644
--- a/drivers/clk/versatile/clk-vexpress-spc.c
+++ b/drivers/clk/versatile/clk-vexpress-spc.c
@@ -102,7 +102,7 @@ struct clk *vexpress_clk_register_spc(const char *name, int cluster_id)
#if defined(CONFIG_OF)
void __init vexpress_clk_of_register_spc(void)
{
- char name[14] = "cpu-cluster.";
+ char name[14] = "cpu-cluster.X";
struct device_node *node = NULL;
struct clk *clk;
const u32 *val;
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 5d7f53fcd6f5..7c2be81f7dcb 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -24,27 +24,140 @@
#include <linux/cpufreq.h>
#include <linux/cpumask.h>
#include <linux/export.h>
+#include <linux/mutex.h>
#include <linux/of_platform.h>
#include <linux/opp.h>
#include <linux/slab.h>
#include <linux/topology.h>
#include <linux/types.h>
+#include <asm/bL_switcher.h>
#include "arm_big_little.h"
-/* Currently we support only two clusters */
-#define MAX_CLUSTERS 2
+#ifdef CONFIG_BL_SWITCHER
+bool bL_switching_enabled;
+#endif
+
+#define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq)
+#define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
static struct cpufreq_arm_bL_ops *arm_bL_ops;
static struct clk *clk[MAX_CLUSTERS];
-static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS];
-static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)};
+static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
+static atomic_t cluster_usage[MAX_CLUSTERS + 1] = {ATOMIC_INIT(0),
+ ATOMIC_INIT(0)};
+
+static unsigned int clk_big_min; /* (Big) clock frequencies */
+static unsigned int clk_little_max; /* Maximum clock frequency (Little) */
+
+static DEFINE_PER_CPU(unsigned int, physical_cluster);
+static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq);
+
+static struct mutex cluster_lock[MAX_CLUSTERS];
+
+static unsigned int find_cluster_maxfreq(int cluster)
+{
+ int j;
+ u32 max_freq = 0, cpu_freq;
+
+ for_each_online_cpu(j) {
+ cpu_freq = per_cpu(cpu_last_req_freq, j);
+
+ if ((cluster == per_cpu(physical_cluster, j)) &&
+ (max_freq < cpu_freq))
+ max_freq = cpu_freq;
+ }
+
+ pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster,
+ max_freq);
+
+ return max_freq;
+}
+
+static unsigned int clk_get_cpu_rate(unsigned int cpu)
+{
+ u32 cur_cluster = per_cpu(physical_cluster, cpu);
+ u32 rate = clk_get_rate(clk[cur_cluster]) / 1000;
+
+ /* For switcher we use virtual A15 clock rates */
+ if (is_bL_switching_enabled())
+ rate = VIRT_FREQ(cur_cluster, rate);
+
+ pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu,
+ cur_cluster, rate);
+
+ return rate;
+}
+
+static unsigned int bL_cpufreq_get_rate(unsigned int cpu)
+{
+ pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq, cpu));
+
+ return per_cpu(cpu_last_req_freq, cpu);
+}
-static unsigned int bL_cpufreq_get(unsigned int cpu)
+static unsigned int
+bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
{
- u32 cur_cluster = cpu_to_cluster(cpu);
+ u32 new_rate, prev_rate;
+ int ret;
+
+ mutex_lock(&cluster_lock[new_cluster]);
+
+ prev_rate = per_cpu(cpu_last_req_freq, cpu);
+ per_cpu(cpu_last_req_freq, cpu) = rate;
+ per_cpu(physical_cluster, cpu) = new_cluster;
+
+ if (is_bL_switching_enabled()) {
+ new_rate = find_cluster_maxfreq(new_cluster);
+ new_rate = ACTUAL_FREQ(new_cluster, new_rate);
+ } else {
+ new_rate = rate;
+ }
+
+ pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d, freq: %d\n",
+ __func__, cpu, old_cluster, new_cluster, new_rate);
+
+ ret = clk_set_rate(clk[new_cluster], new_rate * 1000);
+ if (WARN_ON(ret)) {
+ pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret,
+ new_cluster);
+ per_cpu(cpu_last_req_freq, cpu) = prev_rate;
+ per_cpu(physical_cluster, cpu) = old_cluster;
+
+ mutex_unlock(&cluster_lock[new_cluster]);
+
+ return ret;
+ }
- return clk_get_rate(clk[cur_cluster]) / 1000;
+ mutex_unlock(&cluster_lock[new_cluster]);
+
+ /* Recalc freq for old cluster when switching clusters */
+ if (old_cluster != new_cluster) {
+ pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d\n",
+ __func__, cpu, old_cluster, new_cluster);
+
+ /* Switch cluster */
+ bL_switch_request(cpu, new_cluster);
+
+ mutex_lock(&cluster_lock[old_cluster]);
+
+ /* Set freq of old cluster if there are cpus left on it */
+ new_rate = find_cluster_maxfreq(old_cluster);
+ new_rate = ACTUAL_FREQ(old_cluster, new_rate);
+
+ if (new_rate) {
+ pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n",
+ __func__, old_cluster, new_rate);
+
+ if (clk_set_rate(clk[old_cluster], new_rate * 1000))
+ pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n",
+ __func__, ret, old_cluster);
+ }
+ mutex_unlock(&cluster_lock[old_cluster]);
+ }
+
+ return 0;
}
/* Validate policy frequency range */
@@ -60,12 +173,14 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
unsigned int target_freq, unsigned int relation)
{
struct cpufreq_freqs freqs;
- u32 cpu = policy->cpu, freq_tab_idx, cur_cluster;
+ u32 cpu = policy->cpu, freq_tab_idx, cur_cluster, new_cluster,
+ actual_cluster;
int ret = 0;
- cur_cluster = cpu_to_cluster(policy->cpu);
+ cur_cluster = cpu_to_cluster(cpu);
+ new_cluster = actual_cluster = per_cpu(physical_cluster, cpu);
- freqs.old = bL_cpufreq_get(policy->cpu);
+ freqs.old = bL_cpufreq_get_rate(cpu);
/* Determine valid target frequency using freq_table */
cpufreq_frequency_table_target(policy, freq_table[cur_cluster],
@@ -79,13 +194,21 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
if (freqs.old == freqs.new)
return 0;
+ if (is_bL_switching_enabled()) {
+ if ((actual_cluster == A15_CLUSTER) &&
+ (freqs.new < clk_big_min)) {
+ new_cluster = A7_CLUSTER;
+ } else if ((actual_cluster == A7_CLUSTER) &&
+ (freqs.new > clk_little_max)) {
+ new_cluster = A15_CLUSTER;
+ }
+ }
+
cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
- ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000);
- if (ret) {
- pr_err("clk_set_rate failed: %d\n", ret);
+ ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs.new);
+ if (ret)
return ret;
- }
policy->cur = freqs.new;
@@ -94,7 +217,73 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
return ret;
}
-static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
+static inline u32 get_table_count(struct cpufreq_frequency_table *table)
+{
+ int count;
+
+ for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++)
+ ;
+
+ return count;
+}
+
+/* get the minimum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_min(struct cpufreq_frequency_table *table)
+{
+ int i;
+ uint32_t min_freq = ~0;
+ for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+ if (table[i].frequency < min_freq)
+ min_freq = table[i].frequency;
+ return min_freq;
+}
+
+/* get the maximum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_max(struct cpufreq_frequency_table *table)
+{
+ int i;
+ uint32_t max_freq = 0;
+ for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+ if (table[i].frequency > max_freq)
+ max_freq = table[i].frequency;
+ return max_freq;
+}
+
+static int merge_cluster_tables(void)
+{
+ int i, j, k = 0, count = 1;
+ struct cpufreq_frequency_table *table;
+
+ for (i = 0; i < MAX_CLUSTERS; i++)
+ count += get_table_count(freq_table[i]);
+
+ table = kzalloc(sizeof(*table) * count, GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ freq_table[MAX_CLUSTERS] = table;
+
+ /* Add in reverse order to get freqs in increasing order */
+ for (i = MAX_CLUSTERS - 1; i >= 0; i--) {
+ for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END;
+ j++) {
+ table[k].frequency = VIRT_FREQ(i,
+ freq_table[i][j].frequency);
+ pr_debug("%s: index: %d, freq: %d\n", __func__, k,
+ table[k].frequency);
+ k++;
+ }
+ }
+
+ table[k].index = k;
+ table[k].frequency = CPUFREQ_TABLE_END;
+
+ pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k);
+
+ return 0;
+}
+
+static void _put_cluster_clk_and_freq_table(struct device *cpu_dev)
{
u32 cluster = cpu_to_cluster(cpu_dev->id);
@@ -105,10 +294,35 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
}
}
-static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
{
u32 cluster = cpu_to_cluster(cpu_dev->id);
- char name[14] = "cpu-cluster.";
+ int i;
+
+ if (cluster < MAX_CLUSTERS)
+ return _put_cluster_clk_and_freq_table(cpu_dev);
+
+ if (atomic_dec_return(&cluster_usage[MAX_CLUSTERS]))
+ return;
+
+ for (i = 0; i < MAX_CLUSTERS; i++) {
+ struct device *cdev = get_cpu_device(i);
+ if (!cdev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__, i);
+ return;
+ }
+
+ _put_cluster_clk_and_freq_table(cdev);
+ }
+
+ /* free virtual table */
+ kfree(freq_table[MAX_CLUSTERS]);
+}
+
+static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
+{
+ u32 cluster = cpu_to_cluster(cpu_dev->id);
+ char name[14] = "cpu-cluster.X";
int ret;
if (atomic_inc_return(&cluster_usage[cluster]) != 1)
@@ -149,6 +363,62 @@ atomic_dec:
return ret;
}
+static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+{
+ u32 cluster = cpu_to_cluster(cpu_dev->id);
+ int i, ret;
+
+ if (cluster < MAX_CLUSTERS)
+ return _get_cluster_clk_and_freq_table(cpu_dev);
+
+ if (atomic_inc_return(&cluster_usage[MAX_CLUSTERS]) != 1)
+ return 0;
+
+ /*
+ * Get data for all clusters and fill virtual cluster with a merge of
+ * both
+ */
+ for (i = 0; i < MAX_CLUSTERS; i++) {
+ struct device *cdev = get_cpu_device(i);
+ if (!cdev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__, i);
+ return -ENODEV;
+ }
+
+ ret = _get_cluster_clk_and_freq_table(cdev);
+ if (ret)
+ goto put_clusters;
+ }
+
+ ret = merge_cluster_tables();
+ if (ret)
+ goto put_clusters;
+
+ /* Assuming 2 cluster, set clk_big_min and clk_little_max */
+ clk_big_min = get_table_min(freq_table[0]);
+ clk_little_max = VIRT_FREQ(1, get_table_max(freq_table[1]));
+
+ pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n",
+ __func__, cluster, clk_big_min, clk_little_max);
+
+ return 0;
+
+put_clusters:
+ while (i--) {
+ struct device *cdev = get_cpu_device(i);
+ if (!cdev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__, i);
+ return -ENODEV;
+ }
+
+ _put_cluster_clk_and_freq_table(cdev);
+ }
+
+ atomic_dec(&cluster_usage[MAX_CLUSTERS]);
+
+ return ret;
+}
+
/* Per-CPU initialization */
static int bL_cpufreq_init(struct cpufreq_policy *policy)
{
@@ -177,37 +447,28 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu);
+ if (cur_cluster < MAX_CLUSTERS) {
+ cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+
+ per_cpu(physical_cluster, policy->cpu) = cur_cluster;
+ } else {
+ /* Assumption: during init, we are always running on A15 */
+ per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
+ }
+
if (arm_bL_ops->get_transition_latency)
policy->cpuinfo.transition_latency =
arm_bL_ops->get_transition_latency(cpu_dev);
else
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
- policy->cur = bL_cpufreq_get(policy->cpu);
-
- cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+ policy->cur = clk_get_cpu_rate(policy->cpu);
+ per_cpu(cpu_last_req_freq, policy->cpu) = policy->cur;
dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu);
return 0;
}
-static int bL_cpufreq_exit(struct cpufreq_policy *policy)
-{
- struct device *cpu_dev;
-
- cpu_dev = get_cpu_device(policy->cpu);
- if (!cpu_dev) {
- pr_err("%s: failed to get cpu%d device\n", __func__,
- policy->cpu);
- return -ENODEV;
- }
-
- put_cluster_clk_and_freq_table(cpu_dev);
- dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu);
-
- return 0;
-}
-
/* Export freq_table to sysfs */
static struct freq_attr *bL_cpufreq_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
@@ -219,16 +480,47 @@ static struct cpufreq_driver bL_cpufreq_driver = {
.flags = CPUFREQ_STICKY,
.verify = bL_cpufreq_verify_policy,
.target = bL_cpufreq_set_target,
- .get = bL_cpufreq_get,
+ .get = bL_cpufreq_get_rate,
.init = bL_cpufreq_init,
- .exit = bL_cpufreq_exit,
.have_governor_per_policy = true,
.attr = bL_cpufreq_attr,
};
+static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb,
+ unsigned long action, void *_arg)
+{
+ pr_debug("%s: action: %ld\n", __func__, action);
+
+ switch (action) {
+ case BL_NOTIFY_PRE_ENABLE:
+ case BL_NOTIFY_PRE_DISABLE:
+ cpufreq_unregister_driver(&bL_cpufreq_driver);
+ break;
+
+ case BL_NOTIFY_POST_ENABLE:
+ set_switching_enabled(true);
+ cpufreq_register_driver(&bL_cpufreq_driver);
+ break;
+
+ case BL_NOTIFY_POST_DISABLE:
+ set_switching_enabled(false);
+ cpufreq_register_driver(&bL_cpufreq_driver);
+ break;
+
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block bL_switcher_notifier = {
+ .notifier_call = bL_cpufreq_switcher_notifier,
+};
+
int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops)
{
- int ret;
+ int ret, i;
if (arm_bL_ops) {
pr_debug("%s: Already registered: %s, exiting\n", __func__,
@@ -243,16 +535,29 @@ int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops)
arm_bL_ops = ops;
+ ret = bL_switcher_get_enabled();
+ set_switching_enabled(ret);
+
+ for (i = 0; i < MAX_CLUSTERS; i++)
+ mutex_init(&cluster_lock[i]);
+
ret = cpufreq_register_driver(&bL_cpufreq_driver);
if (ret) {
pr_info("%s: Failed registering platform driver: %s, err: %d\n",
__func__, ops->name, ret);
arm_bL_ops = NULL;
} else {
- pr_info("%s: Registered platform driver: %s\n", __func__,
- ops->name);
+ ret = bL_switcher_register_notifier(&bL_switcher_notifier);
+ if (ret) {
+ cpufreq_unregister_driver(&bL_cpufreq_driver);
+ arm_bL_ops = NULL;
+ } else {
+ pr_info("%s: Registered platform driver: %s\n",
+ __func__, ops->name);
+ }
}
+ bL_switcher_put_enabled();
return ret;
}
EXPORT_SYMBOL_GPL(bL_cpufreq_register);
@@ -265,9 +570,31 @@ void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops)
return;
}
+ bL_switcher_get_enabled();
+ bL_switcher_unregister_notifier(&bL_switcher_notifier);
cpufreq_unregister_driver(&bL_cpufreq_driver);
+ bL_switcher_put_enabled();
pr_info("%s: Un-registered platform driver: %s\n", __func__,
arm_bL_ops->name);
+
+ /* For saving table get/put on every cpu in/out */
+ if (is_bL_switching_enabled()) {
+ put_cluster_clk_and_freq_table(get_cpu_device(0));
+ } else {
+ int i;
+
+ for (i = 0; i < MAX_CLUSTERS; i++) {
+ struct device *cdev = get_cpu_device(i);
+ if (!cdev) {
+ pr_err("%s: failed to get cpu%d device\n",
+ __func__, i);
+ return;
+ }
+
+ put_cluster_clk_and_freq_table(cdev);
+ }
+ }
+
arm_bL_ops = NULL;
}
EXPORT_SYMBOL_GPL(bL_cpufreq_unregister);
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
index 79b2ce17884d..4f5a03d3aef6 100644
--- a/drivers/cpufreq/arm_big_little.h
+++ b/drivers/cpufreq/arm_big_little.h
@@ -23,6 +23,20 @@
#include <linux/device.h>
#include <linux/types.h>
+/* Currently we support only two clusters */
+#define A15_CLUSTER 0
+#define A7_CLUSTER 1
+#define MAX_CLUSTERS 2
+
+#ifdef CONFIG_BL_SWITCHER
+extern bool bL_switching_enabled;
+#define is_bL_switching_enabled() bL_switching_enabled
+#define set_switching_enabled(x) (bL_switching_enabled = (x))
+#else
+#define is_bL_switching_enabled() false
+#define set_switching_enabled(x) do { } while (0)
+#endif
+
struct cpufreq_arm_bL_ops {
char name[CPUFREQ_NAME_LEN];
int (*get_transition_latency)(struct device *cpu_dev);
@@ -36,7 +50,8 @@ struct cpufreq_arm_bL_ops {
static inline int cpu_to_cluster(int cpu)
{
- return topology_physical_package_id(cpu);
+ return is_bL_switching_enabled() ? MAX_CLUSTERS:
+ topology_physical_package_id(cpu);
}
int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops);
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 2ccfaed7d844..db16c4683fe4 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -21,6 +21,7 @@
#include <linux/spinlock.h>
#include <linux/notifier.h>
#include <asm/cputime.h>
+#include <asm/bL_switcher.h>
static spinlock_t cpufreq_stats_lock;
@@ -404,7 +405,7 @@ static struct notifier_block notifier_trans_block = {
.notifier_call = cpufreq_stat_notifier_trans
};
-static int __init cpufreq_stats_init(void)
+static int cpufreq_stats_setup(void)
{
int ret;
unsigned int cpu;
@@ -432,7 +433,8 @@ static int __init cpufreq_stats_init(void)
return 0;
}
-static void __exit cpufreq_stats_exit(void)
+
+static void cpufreq_stats_cleanup(void)
{
unsigned int cpu;
@@ -447,6 +449,49 @@ static void __exit cpufreq_stats_exit(void)
}
}
+static int cpufreq_stats_switcher_notifier(struct notifier_block *nfb,
+ unsigned long action, void *_arg)
+{
+ switch (action) {
+ case BL_NOTIFY_PRE_ENABLE:
+ case BL_NOTIFY_PRE_DISABLE:
+ cpufreq_stats_cleanup();
+ break;
+
+ case BL_NOTIFY_POST_ENABLE:
+ case BL_NOTIFY_POST_DISABLE:
+ cpufreq_stats_setup();
+ break;
+
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block switcher_notifier = {
+ .notifier_call = cpufreq_stats_switcher_notifier,
+};
+
+static int __init cpufreq_stats_init(void)
+{
+ int ret;
+ spin_lock_init(&cpufreq_stats_lock);
+
+ ret = cpufreq_stats_setup();
+ if (!ret)
+ bL_switcher_register_notifier(&switcher_notifier);
+
+ return ret;
+}
+
+static void __exit cpufreq_stats_exit(void)
+{
+ bL_switcher_unregister_notifier(&switcher_notifier);
+ cpufreq_stats_cleanup();
+}
+
MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>");
MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats "
"through sysfs filesystem");
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index b66049fde51d..2fa2632be9fe 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -253,10 +253,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
return -EINVAL;
+ raw_spin_lock(&irq_controller_lock);
mask = 0xff << shift;
bit = gic_cpu_map[cpu] << shift;
-
- raw_spin_lock(&irq_controller_lock);
val = readl_relaxed(reg) & ~mask;
writel_relaxed(val | bit, reg);
raw_spin_unlock(&irq_controller_lock);
@@ -652,7 +651,9 @@ static void __init gic_pm_init(struct gic_chip_data *gic)
void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
{
int cpu;
- unsigned long map = 0;
+ unsigned long flags, map = 0;
+
+ raw_spin_lock_irqsave(&irq_controller_lock, flags);
/* Convert our logical CPU mask into a physical one. */
for_each_cpu(cpu, mask)
@@ -666,9 +667,145 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
/* this always happens on GIC0 */
writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+
+ raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
}
#endif
+#ifdef CONFIG_BL_SWITCHER
+/*
+ * gic_send_sgi - send a SGI directly to given CPU interface number
+ *
+ * cpu_id: the ID for the destination CPU interface
+ * irq: the IPI number to send a SGI for
+ */
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq)
+{
+ BUG_ON(cpu_id >= NR_GIC_CPU_IF);
+ cpu_id = 1 << cpu_id;
+ /* this always happens on GIC0 */
+ writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+}
+
+/*
+ * gic_get_cpu_id - get the CPU interface ID for the specified CPU
+ *
+ * @cpu: the logical CPU number to get the GIC ID for.
+ *
+ * Return the CPU interface ID for the given logical CPU number,
+ * or -1 if the CPU number is too large or the interface ID is
+ * unknown (more than one bit set).
+ */
+int gic_get_cpu_id(unsigned int cpu)
+{
+ unsigned int cpu_bit;
+
+ if (cpu >= NR_GIC_CPU_IF)
+ return -1;
+ cpu_bit = gic_cpu_map[cpu];
+ if (cpu_bit & (cpu_bit - 1))
+ return -1;
+ return __ffs(cpu_bit);
+}
+
+/*
+ * gic_migrate_target - migrate IRQs to another PU interface
+ *
+ * @new_cpu_id: the CPU target ID to migrate IRQs to
+ *
+ * Migrate all peripheral interrupts with a target matching the current CPU
+ * to the interface corresponding to @new_cpu_id. The CPU interface mapping
+ * is also updated. Targets to other CPU interfaces are unchanged.
+ * This must be called with IRQs locally disabled.
+ */
+void gic_migrate_target(unsigned int new_cpu_id)
+{
+ unsigned int old_cpu_id, gic_irqs, gic_nr = 0;
+ void __iomem *dist_base;
+ int i, ror_val, cpu = smp_processor_id();
+ u32 val, old_mask, active_mask;
+
+ if (gic_nr >= MAX_GIC_NR)
+ BUG();
+
+ dist_base = gic_data_dist_base(&gic_data[gic_nr]);
+ if (!dist_base)
+ return;
+ gic_irqs = gic_data[gic_nr].gic_irqs;
+
+ old_cpu_id = __ffs(gic_cpu_map[cpu]);
+ old_mask = 0x01010101 << old_cpu_id;
+ ror_val = (old_cpu_id - new_cpu_id) & 31;
+
+ raw_spin_lock(&irq_controller_lock);
+
+ gic_cpu_map[cpu] = 1 << new_cpu_id;
+
+ for (i = 8; i < DIV_ROUND_UP(gic_irqs, 4); i++) {
+ val = readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+ active_mask = val & old_mask;
+ if (active_mask) {
+ val &= ~active_mask;
+ val |= ror32(active_mask, ror_val);
+ writel_relaxed(val, dist_base + GIC_DIST_TARGET + i * 4);
+ }
+ }
+
+ raw_spin_unlock(&irq_controller_lock);
+
+ /*
+ * Now let's migrate and clear any potential SGIs that might be
+ * pending for us (old_cpu_id). Since GIC_DIST_SGI_PENDING_SET
+ * is a banked register, we can only forward the SGI using
+ * GIC_DIST_SOFTINT. The original SGI source is lost but Linux
+ * doesn't use that information anyway.
+ *
+ * For the same reason we do not adjust SGI source information
+ * for previously sent SGIs by us to other CPUs either.
+ */
+ for (i = 0; i < 16; i += 4) {
+ int j;
+ val = readl_relaxed(dist_base + GIC_DIST_SGI_PENDING_SET + i);
+ if (!val)
+ continue;
+ writel_relaxed(val, dist_base + GIC_DIST_SGI_PENDING_CLEAR + i);
+ for (j = i; j < i + 4; j++) {
+ if (val & 0xff)
+ writel_relaxed((1 << (new_cpu_id + 16)) | j,
+ dist_base + GIC_DIST_SOFTINT);
+ val >>= 8;
+ }
+ }
+}
+
+/*
+ * gic_get_sgir_physaddr - get the physical address for the SGI register
+ *
+ * REturn the physical address of the SGI register to be used
+ * by some early assembly code when the kernel is not yet available.
+ */
+static unsigned long gic_dist_physaddr;
+
+unsigned long gic_get_sgir_physaddr(void)
+{
+ if (!gic_dist_physaddr)
+ return 0;
+ return gic_dist_physaddr + GIC_DIST_SOFTINT;
+}
+
+void __init gic_init_physaddr(struct device_node *node)
+{
+ struct resource res;
+ if (of_address_to_resource(node, 0, &res) == 0) {
+ gic_dist_physaddr = res.start;
+ pr_info("GIC physical location is %#lx\n", gic_dist_physaddr);
+ }
+}
+
+#else
+#define gic_init_physaddr(node) do { } while(0)
+#endif
+
static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hw)
{
@@ -850,6 +987,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent)
percpu_offset = 0;
gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node);
+ if (!gic_cnt)
+ gic_init_physaddr(node);
if (parent) {
irq = irq_of_parse_and_map(node, 0);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index b5696108c06e..40643ca79cd9 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -31,6 +31,8 @@
#define GIC_DIST_TARGET 0x800
#define GIC_DIST_CONFIG 0xc00
#define GIC_DIST_SOFTINT 0xf00
+#define GIC_DIST_SGI_PENDING_CLEAR 0xf10
+#define GIC_DIST_SGI_PENDING_SET 0xf20
#define GICH_HCR 0x0
#define GICH_VTR 0x4
@@ -75,6 +77,11 @@ static inline void gic_init(unsigned int nr, int start,
gic_init_bases(nr, start, dist, cpu, 0, NULL);
}
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
+int gic_get_cpu_id(unsigned int cpu);
+void gic_migrate_target(unsigned int new_cpu_id);
+unsigned long gic_get_sgir_physaddr(void);
+
#endif /* __ASSEMBLY */
#endif
diff --git a/include/trace/events/power_cpu_migrate.h b/include/trace/events/power_cpu_migrate.h
new file mode 100644
index 000000000000..f76dd4de625e
--- /dev/null
+++ b/include/trace/events/power_cpu_migrate.h
@@ -0,0 +1,67 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM power
+
+#if !defined(_TRACE_POWER_CPU_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWER_CPU_MIGRATE_H
+
+#include <linux/tracepoint.h>
+
+#define __cpu_migrate_proto \
+ TP_PROTO(u64 timestamp, \
+ u32 cpu_hwid)
+#define __cpu_migrate_args \
+ TP_ARGS(timestamp, \
+ cpu_hwid)
+
+DECLARE_EVENT_CLASS(cpu_migrate,
+
+ __cpu_migrate_proto,
+ __cpu_migrate_args,
+
+ TP_STRUCT__entry(
+ __field(u64, timestamp )
+ __field(u32, cpu_hwid )
+ ),
+
+ TP_fast_assign(
+ __entry->timestamp = timestamp;
+ __entry->cpu_hwid = cpu_hwid;
+ ),
+
+ TP_printk("timestamp=%llu cpu_hwid=0x%08lX",
+ (unsigned long long)__entry->timestamp,
+ (unsigned long)__entry->cpu_hwid
+ )
+);
+
+#define __define_cpu_migrate_event(name) \
+ DEFINE_EVENT(cpu_migrate, cpu_migrate_##name, \
+ __cpu_migrate_proto, \
+ __cpu_migrate_args \
+ )
+
+__define_cpu_migrate_event(begin);
+__define_cpu_migrate_event(finish);
+__define_cpu_migrate_event(current);
+
+#undef __define_cpu_migrate
+#undef __cpu_migrate_proto
+#undef __cpu_migrate_args
+
+/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
+#ifndef _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING
+#define _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING
+
+/*
+ * Set from_phys_cpu and to_phys_cpu to CPU_MIGRATE_ALL_CPUS to indicate
+ * a whole-cluster migration:
+ */
+#define CPU_MIGRATE_ALL_CPUS 0x80000000U
+#endif
+
+#endif /* _TRACE_POWER_CPU_MIGRATE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE power_cpu_migrate
+#include <trace/define_trace.h>
diff --git a/linaro/configs/big-LITTLE-IKS.conf b/linaro/configs/big-LITTLE-IKS.conf
new file mode 100644
index 000000000000..b067fde86eaa
--- /dev/null
+++ b/linaro/configs/big-LITTLE-IKS.conf
@@ -0,0 +1,5 @@
+CONFIG_BIG_LITTLE=y
+CONFIG_BL_SWITCHER=y
+CONFIG_ARM_DT_BL_CPUFREQ=y
+CONFIG_ARM_VEXPRESS_BL_CPUFREQ=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y