diff options
-rw-r--r-- | arch/arm/include/asm/pmu.h | 13 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event.c | 9 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_cpu.c | 148 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v7.c | 2 | ||||
-rw-r--r-- | arch/arm/mach-ux500/cpu-db8500.c | 29 |
5 files changed, 163 insertions, 38 deletions
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index b1596bd59129..26c7d29c976d 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -87,6 +87,14 @@ struct pmu_hw_events { * already have to allocate this struct per cpu. */ struct arm_pmu *percpu_pmu; + +#ifdef CONFIG_SMP + /* + * This is used to schedule workaround logic on platforms where all + * the PMUs are attached to a single SPI. + */ + struct irq_work work; +#endif }; struct arm_pmu { @@ -117,6 +125,11 @@ struct arm_pmu { struct platform_device *plat_device; struct pmu_hw_events __percpu *hw_events; struct notifier_block hotplug_nb; +#ifdef CONFIG_SMP + int muxed_spi_workaround_irq; + struct work_struct muxed_spi_workaround_work; + atomic_t remaining_irq_work; +#endif }; #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f7c65adaa428..e5c537b57f94 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -299,8 +299,6 @@ validate_group(struct perf_event *event) static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) { struct arm_pmu *armpmu; - struct platform_device *plat_device; - struct arm_pmu_platdata *plat; int ret; u64 start_clock, finish_clock; @@ -311,14 +309,9 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) * dereference. */ armpmu = *(void **)dev; - plat_device = armpmu->plat_device; - plat = dev_get_platdata(&plat_device->dev); start_clock = sched_clock(); - if (plat && plat->handle_irq) - ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); - else - ret = armpmu->handle_irq(irq, armpmu); + ret = armpmu->handle_irq(irq, armpmu); finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index dd9acc95ebc0..76227484baa9 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -59,6 +59,142 @@ int perf_num_counters(void) } EXPORT_SYMBOL_GPL(perf_num_counters); +#ifdef CONFIG_SMP +/* + * Workaround logic that is distributed to all cores if the PMU has only + * a single IRQ and the CPU receiving that IRQ cannot handle it. Its + * job is to try to service the interrupt on the current CPU. It will + * also enable the IRQ again if all the other CPUs have already tried to + * service it. + */ +static void cpu_pmu_do_percpu_work(struct irq_work *w) +{ + struct pmu_hw_events *hw_events = + container_of(w, struct pmu_hw_events, work); + struct arm_pmu *cpu_pmu = hw_events->percpu_pmu; + + /* Ignore the return code, we can do nothing useful with it */ + cpu_pmu->handle_irq(0, cpu_pmu); + + if (atomic_dec_and_test(&cpu_pmu->remaining_irq_work)) + enable_irq(cpu_pmu->muxed_spi_workaround_irq); +} + +/* + * Issue work to the other CPUs. Must be called whilst we own the + * hotplug locks. + */ +static void cpu_pmu_queue_percpu_work(struct arm_pmu *cpu_pmu) +{ + int cpu; + + atomic_add(num_online_cpus() - 1, &cpu_pmu->remaining_irq_work); + + for_each_online_cpu(cpu) { + struct pmu_hw_events *hw_events = + per_cpu_ptr(cpu_pmu->hw_events, cpu); + + if (cpu == smp_processor_id()) + continue; + + /* + * We assume that the IPI within irq_work_queue_on() + * implies a full memory barrier making the value of + * cpu_pmu->remaining_irq_work visible to the target. + */ + if (!irq_work_queue_on(&hw_events->work, cpu)) + if (atomic_dec_and_test(&cpu_pmu->remaining_irq_work)) + enable_irq(cpu_pmu->muxed_spi_workaround_irq); + } +} + +void cpu_pmu_muxed_spi_workaround_worker(struct work_struct *work) +{ + struct arm_pmu *cpu_pmu = + container_of(work, struct arm_pmu, muxed_spi_workaround_work); + + get_online_cpus(); + cpu_pmu_queue_percpu_work(cpu_pmu); + put_online_cpus(); +} + +/* + * Called when the main interrupt handler cannot determine the source + * of interrupt. It will deploy a workaround if we are running on an SMP + * platform with only a single muxed SPI. + * + * The workaround disables the interrupt and distributes irqwork to all + * other processors in the system. Hopefully one of them will clear the + * interrupt... + */ +static irqreturn_t cpu_pmu_handle_irq_none(int irq_num, struct arm_pmu *cpu_pmu) +{ + + if (irq_num != cpu_pmu->muxed_spi_workaround_irq) + return IRQ_NONE; + + disable_irq_nosync(cpu_pmu->muxed_spi_workaround_irq); + + if (try_get_online_cpus()) { + cpu_pmu_queue_percpu_work(cpu_pmu); + put_online_cpus(); + } else { + /* + * There is a CPU hotplug operation in flight making it + * unsafe for us to queue the percpu work. The PMU is + * already silenced so we'll leave it like that and + * schedule some work to tidy things up. + * + * Taking this code path should be very rare which is + * good because the latencies involved here are way to + * long for good profiling. + */ + schedule_work(&cpu_pmu->muxed_spi_workaround_work); + } + + return IRQ_HANDLED; +} + +static int cpu_pmu_muxed_spi_workaround_init(struct arm_pmu *cpu_pmu) +{ + struct platform_device *pmu_device = cpu_pmu->plat_device; + int cpu; + + for_each_possible_cpu(cpu) { + struct pmu_hw_events *hw_events = + per_cpu_ptr(cpu_pmu->hw_events, cpu); + + init_irq_work(&hw_events->work, cpu_pmu_do_percpu_work); + } + + INIT_WORK(&cpu_pmu->muxed_spi_workaround_work, + cpu_pmu_muxed_spi_workaround_worker); + atomic_set(&cpu_pmu->remaining_irq_work, 0); + cpu_pmu->muxed_spi_workaround_irq = platform_get_irq(pmu_device, 0); + + return 0; +} + +static void cpu_pmu_muxed_spi_workaround_term(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->muxed_spi_workaround_irq = 0; +} +#else /* CONFIG_SMP */ +static int cpu_pmu_muxed_spi_workaround_init(struct arm_pmu *cpu_pmu) +{ + return 0; +} + +static void cpu_pmu_muxed_spi_workaround_term(struct arm_pmu *cpu_pmu) +{ +} + +static irqreturn_t cpu_pmu_handle_irq_none(int irq_num, struct arm_pmu *cpu_pmu) +{ + return IRQ_NONE; +} +#endif /* CONFIG_SMP */ + /* Include the PMU-specific implementations. */ #include "perf_event_xscale.c" #include "perf_event_v6.c" @@ -98,6 +234,8 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) if (irq >= 0) free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i)); } + + cpu_pmu_muxed_spi_workaround_term(cpu_pmu); } } @@ -155,6 +293,16 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) cpumask_set_cpu(i, &cpu_pmu->active_irqs); } + + /* + * If we are running SMP and have only one interrupt source + * then get ready to share that single irq among the cores. + */ + if (nr_cpu_ids > 1 && irqs == 1) { + err = cpu_pmu_muxed_spi_workaround_init(cpu_pmu); + if (err) + return err; + } } return 0; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 8993770c47de..0dd914c10803 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -792,7 +792,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) * Did an overflow occur? */ if (!armv7_pmnc_has_overflowed(pmnc)) - return IRQ_NONE; + return cpu_pmu_handle_irq_none(irq_num, cpu_pmu); /* * Handle the counter(s) overflow(s) diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 6f63954c8bde..917774999c5c 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -12,8 +12,6 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/amba/bus.h> -#include <linux/interrupt.h> -#include <linux/irq.h> #include <linux/platform_device.h> #include <linux/io.h> #include <linux/mfd/abx500/ab8500.h> @@ -23,7 +21,6 @@ #include <linux/regulator/machine.h> #include <linux/random.h> -#include <asm/pmu.h> #include <asm/mach/map.h> #include "setup.h" @@ -99,30 +96,6 @@ static void __init u8500_map_io(void) iotable_init(u8500_io_desc, ARRAY_SIZE(u8500_io_desc)); } -/* - * The PMU IRQ lines of two cores are wired together into a single interrupt. - * Bounce the interrupt to the other core if it's not ours. - */ -static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler) -{ - irqreturn_t ret = handler(irq, dev); - int other = !smp_processor_id(); - - if (ret == IRQ_NONE && cpu_online(other)) - irq_set_affinity(irq, cpumask_of(other)); - - /* - * We should be able to get away with the amount of IRQ_NONEs we give, - * while still having the spurious IRQ detection code kick in if the - * interrupt really starts hitting spuriously. - */ - return ret; -} - -static struct arm_pmu_platdata db8500_pmu_platdata = { - .handle_irq = db8500_pmu_handler, -}; - static const char *db8500_read_soc_id(void) { void __iomem *uid = __io_address(U8500_BB_UID_BASE); @@ -143,8 +116,6 @@ static struct device * __init db8500_soc_device_init(void) } static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = { - /* Requires call-back bindings. */ - OF_DEV_AUXDATA("arm,cortex-a9-pmu", 0, "arm-pmu", &db8500_pmu_platdata), /* Requires DMA bindings. */ OF_DEV_AUXDATA("stericsson,ux500-msp-i2s", 0x80123000, "ux500-msp-i2s.0", &msp0_platform_data), |