diff options
author | Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com> | 2012-09-25 17:26:51 +0100 |
---|---|---|
committer | Viresh Kumar <viresh.kumar@linaro.org> | 2012-11-14 09:08:16 +0530 |
commit | 810433e74acba88da6dc3a541c95ada38430bdbc (patch) | |
tree | 4df0704e4280bcd001604b3894e1bbe3d7cc549d | |
parent | 5143555f3ec2c89d661f8798804e8e99361cfeb7 (diff) |
ARM: perf: set cpu affinity to support multiple PMUs
In a system with multiple heterogeneous CPU PMUs and each PMUs can handle
events on a subset of CPUs, probably belonging a the same cluster.
This patch introduces a cpumask to track which CPUs each PMU supports.
It also updates armpmu_event_init to reject cpu-specific events being
initialised for unsupported CPUs. Since process-specific events can be
initialised for all the CPU PMUs,armpmu_start/stop/add are modified to
prevent from being added on unsupported CPUs.
Signed-off-by: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
-rw-r--r-- | Documentation/devicetree/bindings/arm/pmu.txt | 3 | ||||
-rw-r--r-- | arch/arm/include/asm/pmu.h | 1 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event.c | 16 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_cpu.c | 36 |
4 files changed, 49 insertions, 7 deletions
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 343781b9f24..4ce82d045a6 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -16,6 +16,9 @@ Required properties: "arm,arm1176-pmu" "arm,arm1136-pmu" - interrupts : 1 combined interrupt or 1 per core. +- cluster : a phandle to the cluster to which it belongs + If there are more than one cluster with same CPU type + then there should be separate PMU nodes per cluster. Example: diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index f24edad26c7..3713e57a467 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -65,6 +65,7 @@ struct pmu_hw_events { struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; + cpumask_t valid_cpus; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct perf_event *event); diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ac067fe92f7..72c4fbe63d1 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -12,6 +12,7 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt +#include <linux/cpumask.h> #include <linux/kernel.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -167,6 +168,8 @@ armpmu_stop(struct perf_event *event, int flags) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; /* * ARM pmu always has to update the counter, so ignore * PERF_EF_UPDATE, see comments in armpmu_start(). @@ -183,6 +186,8 @@ static void armpmu_start(struct perf_event *event, int flags) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; /* * ARM pmu always has to reprogram the period, so ignore * PERF_EF_RELOAD, see the comment below. @@ -210,6 +215,9 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; + WARN_ON(idx < 0); armpmu_stop(event, PERF_EF_UPDATE); @@ -228,6 +236,10 @@ armpmu_add(struct perf_event *event, int flags) int idx; int err = 0; + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return 0; + perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ @@ -426,6 +438,10 @@ static int armpmu_event_init(struct perf_event *event) int err = 0; atomic_t *active_events = &armpmu->active_events; + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->valid_cpus)) + return -ENOENT; + /* does not support taken branch sampling */ if (has_branch_stack(event)) return -EOPNOTSUPP; diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 8aae245408f..61a6e767e01 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -136,7 +136,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) static void __devinit cpu_pmu_init(struct arm_pmu *cpu_pmu) { int cpu; - for_each_possible_cpu(cpu) { + for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) { struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); events->events = per_cpu(hw_events, cpu); events->used_mask = per_cpu(used_mask, cpu); @@ -149,7 +149,7 @@ static void __devinit cpu_pmu_init(struct arm_pmu *cpu_pmu) /* Ensure the PMU has sane values out of reset. */ if (cpu_pmu && cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); + on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1); } /* @@ -251,6 +251,9 @@ static int __devinit probe_current_pmu(struct arm_pmu *pmu) } } + /* assume PMU support all the CPUs in this case */ + cpumask_setall(&pmu->valid_cpus); + put_cpu(); return ret; } @@ -258,10 +261,9 @@ static int __devinit probe_current_pmu(struct arm_pmu *pmu) static int __devinit cpu_pmu_device_probe(struct platform_device *pdev) { const struct of_device_id *of_id; - int (*init_fn)(struct arm_pmu *); struct device_node *node = pdev->dev.of_node; struct arm_pmu *pmu; - int ret = -ENODEV; + int ret = 0; int cpu; pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); @@ -271,8 +273,28 @@ static int __devinit cpu_pmu_device_probe(struct platform_device *pdev) } if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { - init_fn = of_id->data; - ret = init_fn(pmu); + smp_call_func_t init_fn = (smp_call_func_t)of_id->data; + struct device_node *ncluster; + int cluster = -1; + cpumask_t sibling_mask; + + ncluster = of_parse_phandle(node, "cluster", 0); + if (ncluster) { + int len; + const u32 *hwid; + hwid = of_get_property(ncluster, "reg", &len); + if (hwid && len == 4) + cluster = be32_to_cpup(hwid); + } + /* set sibling mask to all cpu mask if socket is not specified */ + if (cluster == -1 || + cluster_to_logical_mask(cluster, &sibling_mask)) + cpumask_setall(&sibling_mask); + + smp_call_function_any(&sibling_mask, init_fn, pmu, 1); + + /* now set the valid_cpus after init */ + cpumask_copy(&pmu->valid_cpus, &sibling_mask); } else { ret = probe_current_pmu(pmu); } @@ -283,7 +305,7 @@ static int __devinit cpu_pmu_device_probe(struct platform_device *pdev) return ret; } - for_each_possible_cpu(cpu) + for_each_cpu_mask(cpu, pmu->valid_cpus) per_cpu(cpu_pmu, cpu) = pmu; pmu->plat_device = pdev; |