From 8ded1e1a92daa96307e4b84b707fee5993bc6047 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 7 Jul 2015 18:16:15 +0100
Subject: ARM: 8401/1: perf: Set affinity for PPI based PMUs

For PPI based PMUs, we bail out early in of_pmu_irq_cfg() without
setting the PMU's supported_cpus bitmap. This causes the
smp_call_function_any() in armv7_probe_num_events() to fail. Set
the bitmap to be all CPUs so that we properly probe PMUs that use
PPIs.

Fixes: cc88116da0d1 ("arm: perf: treat PMUs as CPU affine")
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/perf_event.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 357f57ea83f4..f3ddd0ff2d8b 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -795,8 +795,10 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 
 	/* Don't bother with PPIs; they're already affine */
 	irq = platform_get_irq(pdev, 0);
-	if (irq >= 0 && irq_is_percpu(irq))
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		cpumask_setall(&pmu->supported_cpus);
 		return 0;
+	}
 
 	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
 	if (!irqs)
-- 
cgit v1.2.3


From f81309067ff2d84788316c513a415f6bb8c9171f Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 1 Jun 2015 23:44:46 +0100
Subject: ARM: move heavy barrier support out of line

The existing memory barrier macro causes a significant amount of code
to be inserted inline at every call site.  For example, in
gpio_set_irq_type(), we have this for mb():

c0344c08:       f57ff04e        dsb     st
c0344c0c:       e59f8190        ldr     r8, [pc, #400]  ; c0344da4 <gpio_set_irq_type+0x230>
c0344c10:       e3590004        cmp     r9, #4
c0344c14:       e5983014        ldr     r3, [r8, #20]
c0344c18:       0a000054        beq     c0344d70 <gpio_set_irq_type+0x1fc>
c0344c1c:       e3530000        cmp     r3, #0
c0344c20:       0a000004        beq     c0344c38 <gpio_set_irq_type+0xc4>
c0344c24:       e50b2030        str     r2, [fp, #-48]  ; 0xffffffd0
c0344c28:       e50bc034        str     ip, [fp, #-52]  ; 0xffffffcc
c0344c2c:       e12fff33        blx     r3
c0344c30:       e51bc034        ldr     ip, [fp, #-52]  ; 0xffffffcc
c0344c34:       e51b2030        ldr     r2, [fp, #-48]  ; 0xffffffd0
c0344c38:       e5963004        ldr     r3, [r6, #4]

Moving the outer_cache_sync() call out of line reduces the impact of
the barrier:

c0344968:       f57ff04e        dsb     st
c034496c:       e35a0004        cmp     sl, #4
c0344970:       e50b2030        str     r2, [fp, #-48]  ; 0xffffffd0
c0344974:       0a000044        beq     c0344a8c <gpio_set_irq_type+0x1b8>
c0344978:       ebf363dd        bl      c001d8f4 <arm_heavy_mb>
c034497c:       e5953004        ldr     r3, [r5, #4]

This should reduce the cache footprint of this code.  Overall, this
results in a reduction of around 20K in the kernel size:

    text    data      bss      dec     hex filename
10773970  667392 10369656 21811018 14ccf4a ../build/imx6/vmlinux-old
10754219  667392 10369656 21791267 14c8223 ../build/imx6/vmlinux-new

Another advantage to this approach is that we can finally resolve the
issue of SoCs which have their own memory barrier requirements within
multiplatform kernels (such as OMAP.)  Here, the bus interconnects
need additional handling to ensure that writes become visible in the
correct order (eg, between dma_map() operations, writes to DMA
coherent memory, and MMIO accesses.)

Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/irq.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 350f188c92d2..b96c8ed1723a 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -39,6 +39,7 @@
 #include <linux/export.h>
 
 #include <asm/hardware/cache-l2x0.h>
+#include <asm/outercache.h>
 #include <asm/exception.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/irq.h>
-- 
cgit v1.2.3


From 8ae81c25cfe962a788a28e023d9a78934d807f7d Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 29 Jun 2015 22:58:46 +0100
Subject: arm: perf: Set affinity for PPI based PMUs

For PPI based PMUs, we bail out early in of_pmu_irq_cfg() without
setting the PMU's supported_cpus bitmap. This causes the
smp_call_function_any() in armv7_probe_num_events() to fail. Set
the bitmap to be all CPUs so that we properly probe PMUs that use
PPIs.

Fixes: cc88116da0d1 ("arm: perf: treat PMUs as CPU affine")
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 54272e0be713..7d5379c1c443 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -795,8 +795,10 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 
 	/* Don't bother with PPIs; they're already affine */
 	irq = platform_get_irq(pdev, 0);
-	if (irq >= 0 && irq_is_percpu(irq))
+	if (irq >= 0 && irq_is_percpu(irq)) {
+		cpumask_setall(&pmu->supported_cpus);
 		return 0;
+	}
 
 	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
 	if (!irqs)
-- 
cgit v1.2.3


From b6c084d7aa8bca21920cbbe13ad58572fa85ece6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 29 Jun 2015 13:59:01 +0100
Subject: ARM: perf: extend interrupt-affinity property for PPIs

On systems containing multiple, heterogeneous clusters we need a way to
associate a PMU "device" with the CPU(s) on which it exists. For PMUs
that signal overflow with SPIs, this relationship is determined via the
"interrupt-affinity" property, which contains a list of phandles to CPU
nodes for the PMU. For PMUs using PPIs, the per-cpu nature of the
interrupt isn't enough to determine the set of CPUs which actually
contain the device.

This patch allows the interrupt-affinity property to be specified on a
PMU node irrespective of the interrupt type. For PPIs, it identifies
the set of CPUs signalling the PPI in question.

Tested-by: Stephen Boyd <sboyd@codeaurora.org> # Krait PMU
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event.c | 65 ++++++++++++++++++++++++++++++--------------
 1 file changed, 44 insertions(+), 21 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 7d5379c1c443..5a8f17bfcc60 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -790,32 +790,39 @@ static int probe_current_pmu(struct arm_pmu *pmu,
 
 static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 {
-	int i, irq, *irqs;
+	int *irqs, i = 0;
+	bool using_spi = false;
 	struct platform_device *pdev = pmu->plat_device;
 
-	/* Don't bother with PPIs; they're already affine */
-	irq = platform_get_irq(pdev, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
-		cpumask_setall(&pmu->supported_cpus);
-		return 0;
-	}
-
 	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
 	if (!irqs)
 		return -ENOMEM;
 
-	for (i = 0; i < pdev->num_resources; ++i) {
+	do {
 		struct device_node *dn;
-		int cpu;
+		int cpu, irq;
 
-		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
-				      i);
-		if (!dn) {
-			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
-				of_node_full_name(pdev->dev.of_node), i);
+		/* See if we have an affinity entry */
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i);
+		if (!dn)
 			break;
+
+		/* Check the IRQ type and prohibit a mix of PPIs and SPIs */
+		irq = platform_get_irq(pdev, i);
+		if (irq >= 0) {
+			bool spi = !irq_is_percpu(irq);
+
+			if (i > 0 && spi != using_spi) {
+				pr_err("PPI/SPI IRQ type mismatch for %s!\n",
+					dn->name);
+				kfree(irqs);
+				return -EINVAL;
+			}
+
+			using_spi = spi;
 		}
 
+		/* Now look up the logical CPU number */
 		for_each_possible_cpu(cpu)
 			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
 				break;
@@ -824,20 +831,36 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 			pr_warn("Failed to find logical CPU for %s\n",
 				dn->name);
 			of_node_put(dn);
+			cpumask_setall(&pmu->supported_cpus);
 			break;
 		}
 		of_node_put(dn);
 
-		irqs[i] = cpu;
+		/* For SPIs, we need to track the affinity per IRQ */
+		if (using_spi) {
+			if (i >= pdev->num_resources) {
+				of_node_put(dn);
+				break;
+			}
+
+			irqs[i] = cpu;
+		}
+
+		/* Keep track of the CPUs containing this PMU type */
 		cpumask_set_cpu(cpu, &pmu->supported_cpus);
-	}
+		of_node_put(dn);
+		i++;
+	} while (1);
 
-	if (i == pdev->num_resources) {
+	/* If we didn't manage to parse anything, claim to support all CPUs */
+	if (cpumask_weight(&pmu->supported_cpus) == 0)
+		cpumask_setall(&pmu->supported_cpus);
+
+	/* If we matched up the IRQ affinities, use them to route the SPIs */
+	if (using_spi && i == pdev->num_resources)
 		pmu->irq_affinity = irqs;
-	} else {
+	else
 		kfree(irqs);
-		cpumask_setall(&pmu->supported_cpus);
-	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From bc1e3c4687df62a1f2ba1b6be11efbeb76145366 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 30 Jun 2015 13:56:57 +0100
Subject: ARM: perf: replace arch_find_n_match_cpu_physical_id with
 of_cpu_device_node_get

arch_find_n_match_cpu_physical_id parses the device tree to get the
device node for a given logical cpu index. However, since ARM PMUs get
probed after the CPU device nodes are stashed while registering the
cpus, we can use of_cpu_device_node_get to avoid another DT parse.

This patch replaces arch_find_n_match_cpu_physical_id with
of_cpu_device_node_get to reuse the stashed value directly instead.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 5a8f17bfcc60..1cb40651d783 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -15,7 +15,7 @@
 #include <linux/cpumask.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
-#include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
@@ -824,7 +824,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
 
 		/* Now look up the logical CPU number */
 		for_each_possible_cpu(cpu)
-			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+			if (dn == of_cpu_device_node_get(cpu))
 				break;
 
 		if (cpu >= nr_cpu_ids) {
-- 
cgit v1.2.3


From fa8ad7889d83bcf0a6cdbf6d3622f3ec019cde14 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 6 Jul 2015 12:23:53 +0100
Subject: arm: perf: factor arm_pmu core out to drivers

To enable sharing of the arm_pmu code with arm64, this patch factors it
out to drivers/perf/. A new drivers/perf directory is added for
performance monitor drivers to live under.

MAINTAINERS is updated accordingly. Files added previously without a
corresponsing MAINTAINERS update (perf_regs.c, perf_callchain.c, and
perf_event.h) are also added.

Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
[will: augmented Kconfig help slightly]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/Makefile            |   3 +-
 arch/arm/kernel/perf_event.c        | 921 ------------------------------------
 arch/arm/kernel/perf_event_v6.c     |   2 +-
 arch/arm/kernel/perf_event_v7.c     |   2 +-
 arch/arm/kernel/perf_event_xscale.c |   2 +-
 5 files changed, 4 insertions(+), 926 deletions(-)
 delete mode 100644 arch/arm/kernel/perf_event.c

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index e69f7a19735d..fcb25c1c5c21 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -71,8 +71,7 @@ obj-$(CONFIG_CPU_PJ4)		+= pj4-cp0.o
 obj-$(CONFIG_CPU_PJ4B)		+= pj4-cp0.o
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o \
-				   perf_event_xscale.o perf_event_v6.o \
+obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event_xscale.o perf_event_v6.o \
 				   perf_event_v7.o
 CFLAGS_pj4-cp0.o		:= -marm
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
deleted file mode 100644
index 1cb40651d783..000000000000
--- a/arch/arm/kernel/perf_event.c
+++ /dev/null
@@ -1,921 +0,0 @@
-#undef DEBUG
-
-/*
- * ARM performance counter support.
- *
- * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
- * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
- *
- * This code is based on the sparc64 perf event code, which is in turn based
- * on the x86 code.
- */
-#define pr_fmt(fmt) "hw perfevents: " fmt
-
-#include <linux/bitmap.h>
-#include <linux/cpumask.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/irq.h>
-#include <linux/irqdesc.h>
-
-#include <asm/cputype.h>
-#include <asm/irq_regs.h>
-#include <asm/pmu.h>
-
-static int
-armpmu_map_cache_event(const unsigned (*cache_map)
-				      [PERF_COUNT_HW_CACHE_MAX]
-				      [PERF_COUNT_HW_CACHE_OP_MAX]
-				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
-		       u64 config)
-{
-	unsigned int cache_type, cache_op, cache_result, ret;
-
-	cache_type = (config >>  0) & 0xff;
-	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-		return -EINVAL;
-
-	cache_op = (config >>  8) & 0xff;
-	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-		return -EINVAL;
-
-	cache_result = (config >> 16) & 0xff;
-	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
-
-	if (ret == CACHE_OP_UNSUPPORTED)
-		return -ENOENT;
-
-	return ret;
-}
-
-static int
-armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
-{
-	int mapping;
-
-	if (config >= PERF_COUNT_HW_MAX)
-		return -EINVAL;
-
-	mapping = (*event_map)[config];
-	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
-}
-
-static int
-armpmu_map_raw_event(u32 raw_event_mask, u64 config)
-{
-	return (int)(config & raw_event_mask);
-}
-
-int
-armpmu_map_event(struct perf_event *event,
-		 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
-		 const unsigned (*cache_map)
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX],
-		 u32 raw_event_mask)
-{
-	u64 config = event->attr.config;
-	int type = event->attr.type;
-
-	if (type == event->pmu->type)
-		return armpmu_map_raw_event(raw_event_mask, config);
-
-	switch (type) {
-	case PERF_TYPE_HARDWARE:
-		return armpmu_map_hw_event(event_map, config);
-	case PERF_TYPE_HW_CACHE:
-		return armpmu_map_cache_event(cache_map, config);
-	case PERF_TYPE_RAW:
-		return armpmu_map_raw_event(raw_event_mask, config);
-	}
-
-	return -ENOENT;
-}
-
-int armpmu_event_set_period(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	s64 left = local64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int ret = 0;
-
-	if (unlikely(left <= -period)) {
-		left = period;
-		local64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		local64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	/*
-	 * Limit the maximum period to prevent the counter value
-	 * from overtaking the one we are about to program. In
-	 * effect we are reducing max_period to account for
-	 * interrupt latency (and we are being very conservative).
-	 */
-	if (left > (armpmu->max_period >> 1))
-		left = armpmu->max_period >> 1;
-
-	local64_set(&hwc->prev_count, (u64)-left);
-
-	armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
-
-	perf_event_update_userpage(event);
-
-	return ret;
-}
-
-u64 armpmu_event_update(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	u64 delta, prev_raw_count, new_raw_count;
-
-again:
-	prev_raw_count = local64_read(&hwc->prev_count);
-	new_raw_count = armpmu->read_counter(event);
-
-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			     new_raw_count) != prev_raw_count)
-		goto again;
-
-	delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
-
-	local64_add(delta, &event->count);
-	local64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static void
-armpmu_read(struct perf_event *event)
-{
-	armpmu_event_update(event);
-}
-
-static void
-armpmu_stop(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	/*
-	 * ARM pmu always has to update the counter, so ignore
-	 * PERF_EF_UPDATE, see comments in armpmu_start().
-	 */
-	if (!(hwc->state & PERF_HES_STOPPED)) {
-		armpmu->disable(event);
-		armpmu_event_update(event);
-		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
-	}
-}
-
-static void armpmu_start(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	/*
-	 * ARM pmu always has to reprogram the period, so ignore
-	 * PERF_EF_RELOAD, see the comment below.
-	 */
-	if (flags & PERF_EF_RELOAD)
-		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-
-	hwc->state = 0;
-	/*
-	 * Set the period again. Some counters can't be stopped, so when we
-	 * were stopped we simply disabled the IRQ source and the counter
-	 * may have been left counting. If we don't do this step then we may
-	 * get an interrupt too soon or *way* too late if the overflow has
-	 * happened since disabling.
-	 */
-	armpmu_event_set_period(event);
-	armpmu->enable(event);
-}
-
-static void
-armpmu_del(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
-	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-
-	armpmu_stop(event, PERF_EF_UPDATE);
-	hw_events->events[idx] = NULL;
-	clear_bit(idx, hw_events->used_mask);
-	if (armpmu->clear_event_idx)
-		armpmu->clear_event_idx(hw_events, event);
-
-	perf_event_update_userpage(event);
-}
-
-static int
-armpmu_add(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
-	struct hw_perf_event *hwc = &event->hw;
-	int idx;
-	int err = 0;
-
-	/* An event following a process won't be stopped earlier */
-	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
-		return -ENOENT;
-
-	perf_pmu_disable(event->pmu);
-
-	/* If we don't have a space for the counter then finish early. */
-	idx = armpmu->get_event_idx(hw_events, event);
-	if (idx < 0) {
-		err = idx;
-		goto out;
-	}
-
-	/*
-	 * If there is an event in the counter we are going to use then make
-	 * sure it is disabled.
-	 */
-	event->hw.idx = idx;
-	armpmu->disable(event);
-	hw_events->events[idx] = event;
-
-	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
-	if (flags & PERF_EF_START)
-		armpmu_start(event, PERF_EF_RELOAD);
-
-	/* Propagate our changes to the userspace mapping. */
-	perf_event_update_userpage(event);
-
-out:
-	perf_pmu_enable(event->pmu);
-	return err;
-}
-
-static int
-validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
-			       struct perf_event *event)
-{
-	struct arm_pmu *armpmu;
-
-	if (is_software_event(event))
-		return 1;
-
-	/*
-	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
-	 * core perf code won't check that the pmu->ctx == leader->ctx
-	 * until after pmu->event_init(event).
-	 */
-	if (event->pmu != pmu)
-		return 0;
-
-	if (event->state < PERF_EVENT_STATE_OFF)
-		return 1;
-
-	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
-		return 1;
-
-	armpmu = to_arm_pmu(event->pmu);
-	return armpmu->get_event_idx(hw_events, event) >= 0;
-}
-
-static int
-validate_group(struct perf_event *event)
-{
-	struct perf_event *sibling, *leader = event->group_leader;
-	struct pmu_hw_events fake_pmu;
-
-	/*
-	 * Initialise the fake PMU. We only need to populate the
-	 * used_mask for the purposes of validation.
-	 */
-	memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
-
-	if (!validate_event(event->pmu, &fake_pmu, leader))
-		return -EINVAL;
-
-	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(event->pmu, &fake_pmu, sibling))
-			return -EINVAL;
-	}
-
-	if (!validate_event(event->pmu, &fake_pmu, event))
-		return -EINVAL;
-
-	return 0;
-}
-
-static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
-{
-	struct arm_pmu *armpmu;
-	struct platform_device *plat_device;
-	struct arm_pmu_platdata *plat;
-	int ret;
-	u64 start_clock, finish_clock;
-
-	/*
-	 * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
-	 * the handlers expect a struct arm_pmu*. The percpu_irq framework will
-	 * do any necessary shifting, we just need to perform the first
-	 * dereference.
-	 */
-	armpmu = *(void **)dev;
-	plat_device = armpmu->plat_device;
-	plat = dev_get_platdata(&plat_device->dev);
-
-	start_clock = sched_clock();
-	if (plat && plat->handle_irq)
-		ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
-	else
-		ret = armpmu->handle_irq(irq, armpmu);
-	finish_clock = sched_clock();
-
-	perf_sample_event_took(finish_clock - start_clock);
-	return ret;
-}
-
-static void
-armpmu_release_hardware(struct arm_pmu *armpmu)
-{
-	armpmu->free_irq(armpmu);
-}
-
-static int
-armpmu_reserve_hardware(struct arm_pmu *armpmu)
-{
-	int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
-	if (err) {
-		armpmu_release_hardware(armpmu);
-		return err;
-	}
-
-	return 0;
-}
-
-static void
-hw_perf_event_destroy(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	atomic_t *active_events	 = &armpmu->active_events;
-	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
-
-	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
-		armpmu_release_hardware(armpmu);
-		mutex_unlock(pmu_reserve_mutex);
-	}
-}
-
-static int
-event_requires_mode_exclusion(struct perf_event_attr *attr)
-{
-	return attr->exclude_idle || attr->exclude_user ||
-	       attr->exclude_kernel || attr->exclude_hv;
-}
-
-static int
-__hw_perf_event_init(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	int mapping;
-
-	mapping = armpmu->map_event(event);
-
-	if (mapping < 0) {
-		pr_debug("event %x:%llx not supported\n", event->attr.type,
-			 event->attr.config);
-		return mapping;
-	}
-
-	/*
-	 * We don't assign an index until we actually place the event onto
-	 * hardware. Use -1 to signify that we haven't decided where to put it
-	 * yet. For SMP systems, each core has it's own PMU so we can't do any
-	 * clever allocation or constraints checking at this point.
-	 */
-	hwc->idx		= -1;
-	hwc->config_base	= 0;
-	hwc->config		= 0;
-	hwc->event_base		= 0;
-
-	/*
-	 * Check whether we need to exclude the counter from certain modes.
-	 */
-	if ((!armpmu->set_event_filter ||
-	     armpmu->set_event_filter(hwc, &event->attr)) &&
-	     event_requires_mode_exclusion(&event->attr)) {
-		pr_debug("ARM performance counters do not support "
-			 "mode exclusion\n");
-		return -EOPNOTSUPP;
-	}
-
-	/*
-	 * Store the event encoding into the config_base field.
-	 */
-	hwc->config_base	    |= (unsigned long)mapping;
-
-	if (!is_sampling_event(event)) {
-		/*
-		 * For non-sampling runs, limit the sample_period to half
-		 * of the counter width. That way, the new counter value
-		 * is far less likely to overtake the previous one unless
-		 * you have some serious IRQ latency issues.
-		 */
-		hwc->sample_period  = armpmu->max_period >> 1;
-		hwc->last_period    = hwc->sample_period;
-		local64_set(&hwc->period_left, hwc->sample_period);
-	}
-
-	if (event->group_leader != event) {
-		if (validate_group(event) != 0)
-			return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int armpmu_event_init(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	int err = 0;
-	atomic_t *active_events = &armpmu->active_events;
-
-	/*
-	 * Reject CPU-affine events for CPUs that are of a different class to
-	 * that which this PMU handles. Process-following events (where
-	 * event->cpu == -1) can be migrated between CPUs, and thus we have to
-	 * reject them later (in armpmu_add) if they're scheduled on a
-	 * different class of CPU.
-	 */
-	if (event->cpu != -1 &&
-		!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
-		return -ENOENT;
-
-	/* does not support taken branch sampling */
-	if (has_branch_stack(event))
-		return -EOPNOTSUPP;
-
-	if (armpmu->map_event(event) == -ENOENT)
-		return -ENOENT;
-
-	event->destroy = hw_perf_event_destroy;
-
-	if (!atomic_inc_not_zero(active_events)) {
-		mutex_lock(&armpmu->reserve_mutex);
-		if (atomic_read(active_events) == 0)
-			err = armpmu_reserve_hardware(armpmu);
-
-		if (!err)
-			atomic_inc(active_events);
-		mutex_unlock(&armpmu->reserve_mutex);
-	}
-
-	if (err)
-		return err;
-
-	err = __hw_perf_event_init(event);
-	if (err)
-		hw_perf_event_destroy(event);
-
-	return err;
-}
-
-static void armpmu_enable(struct pmu *pmu)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
-	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
-
-	/* For task-bound events we may be called on other CPUs */
-	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
-		return;
-
-	if (enabled)
-		armpmu->start(armpmu);
-}
-
-static void armpmu_disable(struct pmu *pmu)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-
-	/* For task-bound events we may be called on other CPUs */
-	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
-		return;
-
-	armpmu->stop(armpmu);
-}
-
-/*
- * In heterogeneous systems, events are specific to a particular
- * microarchitecture, and aren't suitable for another. Thus, only match CPUs of
- * the same microarchitecture.
- */
-static int armpmu_filter_match(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	unsigned int cpu = smp_processor_id();
-	return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
-}
-
-static void armpmu_init(struct arm_pmu *armpmu)
-{
-	atomic_set(&armpmu->active_events, 0);
-	mutex_init(&armpmu->reserve_mutex);
-
-	armpmu->pmu = (struct pmu) {
-		.pmu_enable	= armpmu_enable,
-		.pmu_disable	= armpmu_disable,
-		.event_init	= armpmu_event_init,
-		.add		= armpmu_add,
-		.del		= armpmu_del,
-		.start		= armpmu_start,
-		.stop		= armpmu_stop,
-		.read		= armpmu_read,
-		.filter_match	= armpmu_filter_match,
-	};
-}
-
-int armpmu_register(struct arm_pmu *armpmu, int type)
-{
-	armpmu_init(armpmu);
-	pr_info("enabled with %s PMU driver, %d counters available\n",
-			armpmu->name, armpmu->num_events);
-	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
-}
-
-/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *__oprofile_cpu_pmu;
-
-/*
- * Despite the names, these two functions are CPU-specific and are used
- * by the OProfile/perf code.
- */
-const char *perf_pmu_name(void)
-{
-	if (!__oprofile_cpu_pmu)
-		return NULL;
-
-	return __oprofile_cpu_pmu->name;
-}
-EXPORT_SYMBOL_GPL(perf_pmu_name);
-
-int perf_num_counters(void)
-{
-	int max_events = 0;
-
-	if (__oprofile_cpu_pmu != NULL)
-		max_events = __oprofile_cpu_pmu->num_events;
-
-	return max_events;
-}
-EXPORT_SYMBOL_GPL(perf_num_counters);
-
-static void cpu_pmu_enable_percpu_irq(void *data)
-{
-	int irq = *(int *)data;
-
-	enable_percpu_irq(irq, IRQ_TYPE_NONE);
-}
-
-static void cpu_pmu_disable_percpu_irq(void *data)
-{
-	int irq = *(int *)data;
-
-	disable_percpu_irq(irq);
-}
-
-static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
-{
-	int i, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
-		on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
-		free_percpu_irq(irq, &hw_events->percpu_pmu);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			if (cpu_pmu->irq_affinity)
-				cpu = cpu_pmu->irq_affinity[i];
-
-			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
-				continue;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq >= 0)
-				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
-		}
-	}
-}
-
-static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
-{
-	int i, err, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-	struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (irqs < 1) {
-		pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
-		return 0;
-	}
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
-		err = request_percpu_irq(irq, handler, "arm-pmu",
-					 &hw_events->percpu_pmu);
-		if (err) {
-			pr_err("unable to request IRQ%d for ARM PMU counters\n",
-				irq);
-			return err;
-		}
-		on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			err = 0;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq < 0)
-				continue;
-
-			if (cpu_pmu->irq_affinity)
-				cpu = cpu_pmu->irq_affinity[i];
-
-			/*
-			 * If we have a single PMU interrupt that we can't shift,
-			 * assume that we're running on a uniprocessor machine and
-			 * continue. Otherwise, continue without this interrupt.
-			 */
-			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
-				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
-					irq, cpu);
-				continue;
-			}
-
-			err = request_irq(irq, handler,
-					  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
-					  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
-			if (err) {
-				pr_err("unable to request IRQ%d for ARM PMU counters\n",
-					irq);
-				return err;
-			}
-
-			cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
-		}
-	}
-
-	return 0;
-}
-
-/*
- * PMU hardware loses all context when a CPU goes offline.
- * When a CPU is hotplugged back in, since some hardware registers are
- * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
- * junk values out of them.
- */
-static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
-			  void *hcpu)
-{
-	int cpu = (unsigned long)hcpu;
-	struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
-
-	if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
-		return NOTIFY_DONE;
-
-	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
-		return NOTIFY_DONE;
-
-	if (pmu->reset)
-		pmu->reset(pmu);
-	else
-		return NOTIFY_DONE;
-
-	return NOTIFY_OK;
-}
-
-static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
-{
-	int err;
-	int cpu;
-	struct pmu_hw_events __percpu *cpu_hw_events;
-
-	cpu_hw_events = alloc_percpu(struct pmu_hw_events);
-	if (!cpu_hw_events)
-		return -ENOMEM;
-
-	cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
-	err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
-	if (err)
-		goto out_hw_events;
-
-	for_each_possible_cpu(cpu) {
-		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
-		raw_spin_lock_init(&events->pmu_lock);
-		events->percpu_pmu = cpu_pmu;
-	}
-
-	cpu_pmu->hw_events	= cpu_hw_events;
-	cpu_pmu->request_irq	= cpu_pmu_request_irq;
-	cpu_pmu->free_irq	= cpu_pmu_free_irq;
-
-	/* Ensure the PMU has sane values out of reset. */
-	if (cpu_pmu->reset)
-		on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset,
-			 cpu_pmu, 1);
-
-	/* If no interrupts available, set the corresponding capability flag */
-	if (!platform_get_irq(cpu_pmu->plat_device, 0))
-		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
-	return 0;
-
-out_hw_events:
-	free_percpu(cpu_hw_events);
-	return err;
-}
-
-static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
-{
-	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
-	free_percpu(cpu_pmu->hw_events);
-}
-
-/*
- * CPU PMU identification and probing.
- */
-static int probe_current_pmu(struct arm_pmu *pmu,
-			     const struct pmu_probe_info *info)
-{
-	int cpu = get_cpu();
-	unsigned int cpuid = read_cpuid_id();
-	int ret = -ENODEV;
-
-	pr_info("probing PMU on CPU %d\n", cpu);
-
-	for (; info->init != NULL; info++) {
-		if ((cpuid & info->mask) != info->cpuid)
-			continue;
-		ret = info->init(pmu);
-		break;
-	}
-
-	put_cpu();
-	return ret;
-}
-
-static int of_pmu_irq_cfg(struct arm_pmu *pmu)
-{
-	int *irqs, i = 0;
-	bool using_spi = false;
-	struct platform_device *pdev = pmu->plat_device;
-
-	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-	if (!irqs)
-		return -ENOMEM;
-
-	do {
-		struct device_node *dn;
-		int cpu, irq;
-
-		/* See if we have an affinity entry */
-		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i);
-		if (!dn)
-			break;
-
-		/* Check the IRQ type and prohibit a mix of PPIs and SPIs */
-		irq = platform_get_irq(pdev, i);
-		if (irq >= 0) {
-			bool spi = !irq_is_percpu(irq);
-
-			if (i > 0 && spi != using_spi) {
-				pr_err("PPI/SPI IRQ type mismatch for %s!\n",
-					dn->name);
-				kfree(irqs);
-				return -EINVAL;
-			}
-
-			using_spi = spi;
-		}
-
-		/* Now look up the logical CPU number */
-		for_each_possible_cpu(cpu)
-			if (dn == of_cpu_device_node_get(cpu))
-				break;
-
-		if (cpu >= nr_cpu_ids) {
-			pr_warn("Failed to find logical CPU for %s\n",
-				dn->name);
-			of_node_put(dn);
-			cpumask_setall(&pmu->supported_cpus);
-			break;
-		}
-		of_node_put(dn);
-
-		/* For SPIs, we need to track the affinity per IRQ */
-		if (using_spi) {
-			if (i >= pdev->num_resources) {
-				of_node_put(dn);
-				break;
-			}
-
-			irqs[i] = cpu;
-		}
-
-		/* Keep track of the CPUs containing this PMU type */
-		cpumask_set_cpu(cpu, &pmu->supported_cpus);
-		of_node_put(dn);
-		i++;
-	} while (1);
-
-	/* If we didn't manage to parse anything, claim to support all CPUs */
-	if (cpumask_weight(&pmu->supported_cpus) == 0)
-		cpumask_setall(&pmu->supported_cpus);
-
-	/* If we matched up the IRQ affinities, use them to route the SPIs */
-	if (using_spi && i == pdev->num_resources)
-		pmu->irq_affinity = irqs;
-	else
-		kfree(irqs);
-
-	return 0;
-}
-
-int arm_pmu_device_probe(struct platform_device *pdev,
-			 const struct of_device_id *of_table,
-			 const struct pmu_probe_info *probe_table)
-{
-	const struct of_device_id *of_id;
-	const int (*init_fn)(struct arm_pmu *);
-	struct device_node *node = pdev->dev.of_node;
-	struct arm_pmu *pmu;
-	int ret = -ENODEV;
-
-	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
-	if (!pmu) {
-		pr_info("failed to allocate PMU device!\n");
-		return -ENOMEM;
-	}
-
-	if (!__oprofile_cpu_pmu)
-		__oprofile_cpu_pmu = pmu;
-
-	pmu->plat_device = pdev;
-
-	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
-		init_fn = of_id->data;
-
-		ret = of_pmu_irq_cfg(pmu);
-		if (!ret)
-			ret = init_fn(pmu);
-	} else {
-		ret = probe_current_pmu(pmu, probe_table);
-		cpumask_setall(&pmu->supported_cpus);
-	}
-
-	if (ret) {
-		pr_info("failed to probe PMU!\n");
-		goto out_free;
-	}
-
-	ret = cpu_pmu_init(pmu);
-	if (ret)
-		goto out_free;
-
-	ret = armpmu_register(pmu, -1);
-	if (ret)
-		goto out_destroy;
-
-	return 0;
-
-out_destroy:
-	cpu_pmu_destroy(pmu);
-out_free:
-	pr_info("failed to register PMU devices!\n");
-	kfree(pmu);
-	return ret;
-}
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 09f83e414a72..09413e7b49aa 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -34,9 +34,9 @@
 
 #include <asm/cputype.h>
 #include <asm/irq_regs.h>
-#include <asm/pmu.h>
 
 #include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 
 enum armv6_perf_types {
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index f9b37f876e20..126dc679b230 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -21,11 +21,11 @@
 #include <asm/cp15.h>
 #include <asm/cputype.h>
 #include <asm/irq_regs.h>
-#include <asm/pmu.h>
 #include <asm/vfp.h>
 #include "../vfp/vfpinstr.h"
 
 #include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 
 /*
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 304d056d5b25..aa0499e2eef7 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -16,9 +16,9 @@
 
 #include <asm/cputype.h>
 #include <asm/irq_regs.h>
-#include <asm/pmu.h>
 
 #include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 
 enum xscale_perf_types {
-- 
cgit v1.2.3


From 787047eea24a2443c366679ae6b5a3873a33b64e Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 29 Jul 2015 00:34:48 +0100
Subject: ARM: 8392/3: smp: Only expose /sys/.../cpuX/online if hotpluggable

Writes to /sys/.../cpuX/online fail if we determine the platform
doesn't support hotplug for that CPU. Furthermore, if the cpu_die
op isn't specified the system hangs when we try to offline a CPU
and it comes right back online unexpectedly. Let's figure this
stuff out before we make the sysfs nodes so that the online file
doesn't even exist if it isn't (at least sometimes) possible to
hotplug the CPU.

Add a new 'cpu_can_disable' op and repoint all 'cpu_disable'
implementations at it because all implementers use the op to
indicate if a CPU can be hotplugged or not in a static fashion.
With PSCI we may need to add a 'cpu_disable' op so that the
secure OS can be migrated off the CPU we're trying to hotplug.
In this case, the 'cpu_can_disable' op will indicate that all
CPUs are hotpluggable by returning true, but the 'cpu_disable' op
will make a PSCI migration call and occasionally fail, denying
the hotplug of a CPU. This shouldn't be any worse than x86 where
we may indicate that all CPUs are hotpluggable but occasionally
we can't offline a CPU due to check_irq_vectors_for_cpu_disable()
failing to find a CPU to move vectors to.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Acked-by: Simon Horman <horms@verge.net.au> [shmobile portion]
Tested-by: Simon Horman <horms@verge.net.au>
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: <linux-sh@vger.kernel.org>
Tested-by: Tyler Baker <tyler.baker@linaro.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/setup.c |  2 +-
 arch/arm/kernel/smp.c   | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 36c18b73c1f4..6bbec6042052 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1015,7 +1015,7 @@ static int __init topology_init(void)
 
 	for_each_possible_cpu(cpu) {
 		struct cpuinfo_arm *cpuinfo = &per_cpu(cpu_data, cpu);
-		cpuinfo->cpu.hotpluggable = 1;
+		cpuinfo->cpu.hotpluggable = platform_can_hotplug_cpu(cpu);
 		register_cpu(&cpuinfo->cpu, cpu);
 	}
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 90dfbedfbfb8..3cd846f48eaf 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -175,13 +175,26 @@ static int platform_cpu_disable(unsigned int cpu)
 	if (smp_ops.cpu_disable)
 		return smp_ops.cpu_disable(cpu);
 
+	return 0;
+}
+
+int platform_can_hotplug_cpu(unsigned int cpu)
+{
+	/* cpu_die must be specified to support hotplug */
+	if (!smp_ops.cpu_die)
+		return 0;
+
+	if (smp_ops.cpu_can_disable)
+		return smp_ops.cpu_can_disable(cpu);
+
 	/*
 	 * By default, allow disabling all CPUs except the first one,
 	 * since this is special on a lot of platforms, e.g. because
 	 * of clock tick interrupts.
 	 */
-	return cpu == 0 ? -EPERM : 0;
+	return cpu != 0;
 }
+
 /*
  * __cpu_disable runs on the processor to be shutdown.
  */
-- 
cgit v1.2.3


From 37cf524f9360f9165d67459b7bf795c01824df98 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 31 Jul 2015 15:46:18 +0100
Subject: ARM: psci: boot_secondary: replace __pa with virt_to_idmap

On some PAE systems (e.g. TI Keystone), memory is above the 32-bit
addressable limit, and the interconnect provides an aliased view of
parts of physical memory in the 32-bit addressable space. This alias
is strictly for boot time usage, and is not otherwise usable because
of coherency limitations.

In this case, virt_to_phys(secondary_startup) would return the
physical address of the secondary CPU boot entry point, but on such
systems, this would be above the 4GB limit.

A separate function, virt_to_idmap(), has been provided to return a
usable physical address for functions in the identity mapping, and
this must be used in preference to virt_to_phys() or __pa() to find
the physical entry point for functions in the identity mapping range.

For other systems, virt_to_idmap() and virt_to_phys() return identical
physical addresses.

Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by Vitaly Andrianov <vitalya@ti.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
[Mark: apply rmk's suggested rewording]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/psci_smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
index 28a1db4da704..244aaddfbfda 100644
--- a/arch/arm/kernel/psci_smp.c
+++ b/arch/arm/kernel/psci_smp.c
@@ -51,7 +51,7 @@ static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 	if (psci_ops.cpu_on)
 		return psci_ops.cpu_on(cpu_logical_map(cpu),
-				       __pa(secondary_startup));
+					virt_to_idmap(&secondary_startup));
 	return -ENODEV;
 }
 
-- 
cgit v1.2.3


From be120397e7709d9d5ed88317a385ce864a2603bc Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 31 Jul 2015 15:46:19 +0100
Subject: ARM: migrate to common PSCI client code

Now that the common PSCI client code has been factored out to
drivers/firmware, and made safe for 32-bit use, move the 32-bit ARM code
over to it. This results in a moderate reduction of duplicated lines,
and will prevent further duplication as the PSCI client code is updated
for PSCI 1.0 and beyond.

The two legacy platform users of the PSCI invocation code are updated to
account for interface changes. In both cases the power state parameter
(which is constant) is now generated using macros, so that the
pack/unpack logic can be killed in preparation for PSCI 1.0 power state
changes.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/Makefile   |   2 +-
 arch/arm/kernel/psci.c     | 299 ---------------------------------------------
 arch/arm/kernel/psci_smp.c |  29 +++--
 arch/arm/kernel/setup.c    |   3 +-
 4 files changed, 25 insertions(+), 308 deletions(-)
 delete mode 100644 arch/arm/kernel/psci.c

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index e69f7a19735d..3b995f5c524d 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -89,7 +89,7 @@ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 ifeq ($(CONFIG_ARM_PSCI),y)
-obj-y				+= psci.o psci-call.o
+obj-y				+= psci-call.o
 obj-$(CONFIG_SMP)		+= psci_smp.o
 endif
 
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
deleted file mode 100644
index f90fdf4ce7c7..000000000000
--- a/arch/arm/kernel/psci.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#define pr_fmt(fmt) "psci: " fmt
-
-#include <linux/init.h>
-#include <linux/of.h>
-#include <linux/reboot.h>
-#include <linux/pm.h>
-#include <uapi/linux/psci.h>
-
-#include <asm/compiler.h>
-#include <asm/errno.h>
-#include <asm/psci.h>
-#include <asm/system_misc.h>
-
-struct psci_operations psci_ops;
-
-static int (*invoke_psci_fn)(u32, u32, u32, u32);
-typedef int (*psci_initcall_t)(const struct device_node *);
-
-asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32);
-asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32);
-
-enum psci_function {
-	PSCI_FN_CPU_SUSPEND,
-	PSCI_FN_CPU_ON,
-	PSCI_FN_CPU_OFF,
-	PSCI_FN_MIGRATE,
-	PSCI_FN_AFFINITY_INFO,
-	PSCI_FN_MIGRATE_INFO_TYPE,
-	PSCI_FN_MAX,
-};
-
-static u32 psci_function_id[PSCI_FN_MAX];
-
-static int psci_to_linux_errno(int errno)
-{
-	switch (errno) {
-	case PSCI_RET_SUCCESS:
-		return 0;
-	case PSCI_RET_NOT_SUPPORTED:
-		return -EOPNOTSUPP;
-	case PSCI_RET_INVALID_PARAMS:
-		return -EINVAL;
-	case PSCI_RET_DENIED:
-		return -EPERM;
-	};
-
-	return -EINVAL;
-}
-
-static u32 psci_power_state_pack(struct psci_power_state state)
-{
-	return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT)
-			& PSCI_0_2_POWER_STATE_ID_MASK) |
-		((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
-		 & PSCI_0_2_POWER_STATE_TYPE_MASK) |
-		((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
-		 & PSCI_0_2_POWER_STATE_AFFL_MASK);
-}
-
-static int psci_get_version(void)
-{
-	int err;
-
-	err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
-	return err;
-}
-
-static int psci_cpu_suspend(struct psci_power_state state,
-			    unsigned long entry_point)
-{
-	int err;
-	u32 fn, power_state;
-
-	fn = psci_function_id[PSCI_FN_CPU_SUSPEND];
-	power_state = psci_power_state_pack(state);
-	err = invoke_psci_fn(fn, power_state, entry_point, 0);
-	return psci_to_linux_errno(err);
-}
-
-static int psci_cpu_off(struct psci_power_state state)
-{
-	int err;
-	u32 fn, power_state;
-
-	fn = psci_function_id[PSCI_FN_CPU_OFF];
-	power_state = psci_power_state_pack(state);
-	err = invoke_psci_fn(fn, power_state, 0, 0);
-	return psci_to_linux_errno(err);
-}
-
-static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
-{
-	int err;
-	u32 fn;
-
-	fn = psci_function_id[PSCI_FN_CPU_ON];
-	err = invoke_psci_fn(fn, cpuid, entry_point, 0);
-	return psci_to_linux_errno(err);
-}
-
-static int psci_migrate(unsigned long cpuid)
-{
-	int err;
-	u32 fn;
-
-	fn = psci_function_id[PSCI_FN_MIGRATE];
-	err = invoke_psci_fn(fn, cpuid, 0, 0);
-	return psci_to_linux_errno(err);
-}
-
-static int psci_affinity_info(unsigned long target_affinity,
-		unsigned long lowest_affinity_level)
-{
-	int err;
-	u32 fn;
-
-	fn = psci_function_id[PSCI_FN_AFFINITY_INFO];
-	err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0);
-	return err;
-}
-
-static int psci_migrate_info_type(void)
-{
-	int err;
-	u32 fn;
-
-	fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE];
-	err = invoke_psci_fn(fn, 0, 0, 0);
-	return err;
-}
-
-static int get_set_conduit_method(struct device_node *np)
-{
-	const char *method;
-
-	pr_info("probing for conduit method from DT.\n");
-
-	if (of_property_read_string(np, "method", &method)) {
-		pr_warn("missing \"method\" property\n");
-		return -ENXIO;
-	}
-
-	if (!strcmp("hvc", method)) {
-		invoke_psci_fn = __invoke_psci_fn_hvc;
-	} else if (!strcmp("smc", method)) {
-		invoke_psci_fn = __invoke_psci_fn_smc;
-	} else {
-		pr_warn("invalid \"method\" property: %s\n", method);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
-{
-	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
-}
-
-static void psci_sys_poweroff(void)
-{
-	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
-}
-
-/*
- * PSCI Function IDs for v0.2+ are well defined so use
- * standard values.
- */
-static int psci_0_2_init(struct device_node *np)
-{
-	int err, ver;
-
-	err = get_set_conduit_method(np);
-
-	if (err)
-		goto out_put_node;
-
-	ver = psci_get_version();
-
-	if (ver == PSCI_RET_NOT_SUPPORTED) {
-		/* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
-		pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
-		err = -EOPNOTSUPP;
-		goto out_put_node;
-	} else {
-		pr_info("PSCIv%d.%d detected in firmware.\n",
-				PSCI_VERSION_MAJOR(ver),
-				PSCI_VERSION_MINOR(ver));
-
-		if (PSCI_VERSION_MAJOR(ver) == 0 &&
-				PSCI_VERSION_MINOR(ver) < 2) {
-			err = -EINVAL;
-			pr_err("Conflicting PSCI version detected.\n");
-			goto out_put_node;
-		}
-	}
-
-	pr_info("Using standard PSCI v0.2 function IDs\n");
-	psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND;
-	psci_ops.cpu_suspend = psci_cpu_suspend;
-
-	psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
-	psci_ops.cpu_off = psci_cpu_off;
-
-	psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON;
-	psci_ops.cpu_on = psci_cpu_on;
-
-	psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE;
-	psci_ops.migrate = psci_migrate;
-
-	psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO;
-	psci_ops.affinity_info = psci_affinity_info;
-
-	psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] =
-		PSCI_0_2_FN_MIGRATE_INFO_TYPE;
-	psci_ops.migrate_info_type = psci_migrate_info_type;
-
-	arm_pm_restart = psci_sys_reset;
-
-	pm_power_off = psci_sys_poweroff;
-
-out_put_node:
-	of_node_put(np);
-	return err;
-}
-
-/*
- * PSCI < v0.2 get PSCI Function IDs via DT.
- */
-static int psci_0_1_init(struct device_node *np)
-{
-	u32 id;
-	int err;
-
-	err = get_set_conduit_method(np);
-
-	if (err)
-		goto out_put_node;
-
-	pr_info("Using PSCI v0.1 Function IDs from DT\n");
-
-	if (!of_property_read_u32(np, "cpu_suspend", &id)) {
-		psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
-		psci_ops.cpu_suspend = psci_cpu_suspend;
-	}
-
-	if (!of_property_read_u32(np, "cpu_off", &id)) {
-		psci_function_id[PSCI_FN_CPU_OFF] = id;
-		psci_ops.cpu_off = psci_cpu_off;
-	}
-
-	if (!of_property_read_u32(np, "cpu_on", &id)) {
-		psci_function_id[PSCI_FN_CPU_ON] = id;
-		psci_ops.cpu_on = psci_cpu_on;
-	}
-
-	if (!of_property_read_u32(np, "migrate", &id)) {
-		psci_function_id[PSCI_FN_MIGRATE] = id;
-		psci_ops.migrate = psci_migrate;
-	}
-
-out_put_node:
-	of_node_put(np);
-	return err;
-}
-
-static const struct of_device_id psci_of_match[] __initconst = {
-	{ .compatible = "arm,psci", .data = psci_0_1_init},
-	{ .compatible = "arm,psci-0.2", .data = psci_0_2_init},
-	{},
-};
-
-int __init psci_init(void)
-{
-	struct device_node *np;
-	const struct of_device_id *matched_np;
-	psci_initcall_t init_fn;
-
-	np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
-	if (!np)
-		return -ENODEV;
-
-	init_fn = (psci_initcall_t)matched_np->data;
-	return init_fn(np);
-}
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
index 244aaddfbfda..61c04b02faeb 100644
--- a/arch/arm/kernel/psci_smp.c
+++ b/arch/arm/kernel/psci_smp.c
@@ -17,6 +17,8 @@
 #include <linux/smp.h>
 #include <linux/of.h>
 #include <linux/delay.h>
+#include <linux/psci.h>
+
 #include <uapi/linux/psci.h>
 
 #include <asm/psci.h>
@@ -56,17 +58,29 @@ static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+int psci_cpu_disable(unsigned int cpu)
+{
+	/* Fail early if we don't have CPU_OFF support */
+	if (!psci_ops.cpu_off)
+		return -EOPNOTSUPP;
+
+	/* Trusted OS will deny CPU_OFF */
+	if (psci_tos_resident_on(cpu))
+		return -EPERM;
+
+	return 0;
+}
+
 void __ref psci_cpu_die(unsigned int cpu)
 {
-       const struct psci_power_state ps = {
-               .type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
-       };
+	u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN <<
+		    PSCI_0_2_POWER_STATE_TYPE_SHIFT;
 
-       if (psci_ops.cpu_off)
-               psci_ops.cpu_off(ps);
+	if (psci_ops.cpu_off)
+		psci_ops.cpu_off(state);
 
-       /* We should never return */
-       panic("psci: cpu %d failed to shutdown\n", cpu);
+	/* We should never return */
+	panic("psci: cpu %d failed to shutdown\n", cpu);
 }
 
 int __ref psci_cpu_kill(unsigned int cpu)
@@ -109,6 +123,7 @@ bool __init psci_smp_available(void)
 struct smp_operations __initdata psci_smp_ops = {
 	.smp_boot_secondary	= psci_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable		= psci_cpu_disable,
 	.cpu_die		= psci_cpu_die,
 	.cpu_kill		= psci_cpu_kill,
 #endif
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 36c18b73c1f4..9c38bd42f04b 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -31,6 +31,7 @@
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/sort.h>
+#include <linux/psci.h>
 
 #include <asm/unified.h>
 #include <asm/cp15.h>
@@ -972,7 +973,7 @@ void __init setup_arch(char **cmdline_p)
 	unflatten_device_tree();
 
 	arm_dt_init_cpu_maps();
-	psci_init();
+	psci_dt_init();
 	xen_early_init();
 #ifdef CONFIG_SMP
 	if (is_smp()) {
-- 
cgit v1.2.3


From a5f4c561b3b19a9bc43a81da6382b0098ebbc1fb Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Thu, 13 Aug 2015 00:01:52 +0100
Subject: ARM: 8415/1: early fixmap support for earlycon

Add early fixmap support, initially to support permanent, fixed
mapping support for early console. A temporary, early pte is
created which is migrated to a permanent mapping in paging_init.
This is also needed since the attributes may change as the memory
types are initialized. The 3MiB range of fixmap spans two pte
tables, but currently only one pte is created for early fixmap
support.

Re-add FIX_KMAP_BEGIN to the index calculation in highmem.c since
the index for kmap does not start at zero anymore. This reverts
4221e2e6b316 ("ARM: 8031/1: fixmap: remove FIX_KMAP_BEGIN and
FIX_KMAP_END") to some extent.

Cc: Mark Salter <msalter@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Laura Abbott <lauraa@codeaurora.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/setup.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 6bbec6042052..e2ecee6b70ca 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -37,6 +37,7 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/elf.h>
+#include <asm/fixmap.h>
 #include <asm/procinfo.h>
 #include <asm/psci.h>
 #include <asm/sections.h>
@@ -954,6 +955,9 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = cmd_line;
 
+	if (IS_ENABLED(CONFIG_FIX_EARLYCON_MEM))
+		early_fixmap_init();
+
 	parse_early_param();
 
 #ifdef CONFIG_MMU
-- 
cgit v1.2.3


From 1eef5d2f1b461c120bcd82077edee5ec706ac53b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 19 Aug 2015 21:23:48 +0100
Subject: ARM: domains: switch to keeping domain value in register

Rather than modifying both the domain access control register and our
per-thread copy, modify only the domain access control register, and
use the per-thread copy to save and restore the register over context
switches.  We can also avoid the explicit initialisation of the
init thread_info structure.

This allows us to avoid needing to gain access to the thread information
at the uaccess control sites.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-armv.S |  2 ++
 arch/arm/kernel/process.c    | 13 ++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7dac3086e361..d19adcf6c580 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -770,6 +770,8 @@ ENTRY(__switch_to)
 	ldr	r4, [r2, #TI_TP_VALUE]
 	ldr	r5, [r2, #TI_TP_VALUE + 4]
 #ifdef CONFIG_CPU_USE_DOMAINS
+	mrc	p15, 0, r6, c3, c0, 0		@ Get domain register
+	str	r6, [r1, #TI_CPU_DOMAIN]	@ Save old domain register
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
 	switch_tls r1, r4, r5, r3, r7
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index f192a2a41719..e722f9b3c9b1 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -146,10 +146,9 @@ void __show_regs(struct pt_regs *regs)
 		buf[0] = '\0';
 #ifdef CONFIG_CPU_CP15_MMU
 		{
-			unsigned int transbase, dac;
+			unsigned int transbase, dac = get_domain();
 			asm("mrc p15, 0, %0, c2, c0\n\t"
-			    "mrc p15, 0, %1, c3, c0\n"
-			    : "=r" (transbase), "=r" (dac));
+			    : "=r" (transbase));
 			snprintf(buf, sizeof(buf), "  Table: %08x  DAC: %08x",
 			  	transbase, dac);
 		}
@@ -210,6 +209,14 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 
 	memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
 
+	/*
+	 * Copy the initial value of the domain access control register
+	 * from the current thread: thread->addr_limit will have been
+	 * copied from the current thread via setup_thread_stack() in
+	 * kernel/fork.c
+	 */
+	thread->cpu_domain = get_domain();
+
 	if (likely(!(p->flags & PF_KTHREAD))) {
 		*childregs = *current_pt_regs();
 		childregs->ARM_r0 = 0;
-- 
cgit v1.2.3


From 0171356a7708af01ad3224702b7f0aaa5b7a1399 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 21 Aug 2015 09:23:26 +0100
Subject: ARM: domains: move initial domain setting value to asm/domains.h

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/head.S | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index bd755d97e459..d56e5e9a9e1e 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -461,10 +461,7 @@ __enable_mmu:
 #ifdef CONFIG_ARM_LPAE
 	mcrr	p15, 0, r4, r5, c2		@ load TTBR0
 #else
-	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
-		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
-		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
-		      domain_val(DOMAIN_IO, DOMAIN_CLIENT))
+	mov	r5, #DACR_INIT
 	mcr	p15, 0, r5, c3, c0, 0		@ load domain access register
 	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
 #endif
-- 
cgit v1.2.3


From 3c2aed5b28819564e1a07b4686bd89802bcc4d6b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 21 Aug 2015 09:30:16 +0100
Subject: ARM: domains: get rid of manager mode for user domain

Since we switched to early trap initialisation in 94e5a85b3be0
("ARM: earlier initialization of vectors page") we haven't been writing
directly to the vectors page, and so there's no need for this domain
to be in manager mode.  Switch it to client mode.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/traps.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index d358226236f2..969f9d9e665f 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -870,7 +870,6 @@ void __init early_trap_init(void *vectors_base)
 	kuser_init(vectors_base);
 
 	flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
-	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
 #else /* ifndef CONFIG_CPU_V7M */
 	/*
 	 * on V7-M there is no need to copy the vector table to a dedicated
-- 
cgit v1.2.3


From 3302caddf10ad50710dbb7a94ccbdb3ad5bf1412 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 20 Aug 2015 16:13:37 +0100
Subject: ARM: entry: efficiency cleanups

Make the "fast" syscall return path fast again.  The addition of IRQ
tracing and context tracking has made this path grossly inefficient.
We can do much better if these options are enabled if we save the
syscall return code on the stack - we then don't need to save a bunch
of registers around every single callout to C code.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-common.S | 61 +++++++++++++++++++++++++++++++-----------
 arch/arm/kernel/signal.c       |  6 +++++
 2 files changed, 51 insertions(+), 16 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 92828a1dec80..dd3721d1185e 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -24,35 +24,55 @@
 
 
 	.align	5
+#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
 /*
- * This is the fast syscall return path.  We do as little as
- * possible here, and this includes saving r0 back into the SVC
- * stack.
+ * This is the fast syscall return path.  We do as little as possible here,
+ * such as avoiding writing r0 to the stack.  We only use this path if we
+ * have tracing and context tracking disabled - the overheads from those
+ * features make this path too inefficient.
  */
 ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
-	disable_irq				@ disable interrupts
+	disable_irq_notrace			@ disable interrupts
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
-	tst	r1, #_TIF_SYSCALL_WORK
-	bne	__sys_trace_return
-	tst	r1, #_TIF_WORK_MASK
+	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
-	asm_trace_hardirqs_on
 
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
-	ct_user_enter
 
 	restore_user_regs fast = 1, offset = S_OFF
  UNWIND(.fnend		)
+ENDPROC(ret_fast_syscall)
 
-/*
- * Ok, we need to do extra processing, enter the slow path.
- */
+	/* Ok, we need to do extra processing, enter the slow path. */
 fast_work_pending:
 	str	r0, [sp, #S_R0+S_OFF]!		@ returned r0
-work_pending:
+	/* fall through to work_pending */
+#else
+/*
+ * The "replacement" ret_fast_syscall for when tracing or context tracking
+ * is enabled.  As we will need to call out to some C functions, we save
+ * r0 first to avoid needing to save registers around each C function call.
+ */
+ret_fast_syscall:
+ UNWIND(.fnstart	)
+ UNWIND(.cantunwind	)
+	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
+	disable_irq_notrace			@ disable interrupts
+	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
+	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+	beq	no_work_pending
+ UNWIND(.fnend		)
+ENDPROC(ret_fast_syscall)
+
+	/* Slower path - fall through to work_pending */
+#endif
+
+	tst	r1, #_TIF_SYSCALL_WORK
+	bne	__sys_trace_return_nosave
+slow_work_pending:
 	mov	r0, sp				@ 'regs'
 	mov	r2, why				@ 'syscall'
 	bl	do_work_pending
@@ -64,16 +84,19 @@ work_pending:
 
 /*
  * "slow" syscall return path.  "why" tells us if this was a real syscall.
+ * IRQs may be enabled here, so always disable them.  Note that we use the
+ * "notrace" version to avoid calling into the tracing code unnecessarily.
+ * do_work_pending() will update this state if necessary.
  */
 ENTRY(ret_to_user)
 ret_slow_syscall:
-	disable_irq				@ disable interrupts
+	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
 	ldr	r1, [tsk, #TI_FLAGS]
 	tst	r1, #_TIF_WORK_MASK
-	bne	work_pending
+	bne	slow_work_pending
 no_work_pending:
-	asm_trace_hardirqs_on
+	asm_trace_hardirqs_on save = 0
 
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
@@ -251,6 +274,12 @@ __sys_trace_return:
 	bl	syscall_trace_exit
 	b	ret_slow_syscall
 
+__sys_trace_return_nosave:
+	asm_trace_hardirqs_off save=0
+	mov	r0, sp
+	bl	syscall_trace_exit
+	b	ret_slow_syscall
+
 	.align	5
 #ifdef CONFIG_ALIGNMENT_TRAP
 	.type	__cr_alignment, #object
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 423663e23791..b6cda06b455f 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -562,6 +562,12 @@ static int do_signal(struct pt_regs *regs, int syscall)
 asmlinkage int
 do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 {
+	/*
+	 * The assembly code enters us with IRQs off, but it hasn't
+	 * informed the tracing code of that for efficiency reasons.
+	 * Update the trace code with the current status.
+	 */
+	trace_hardirqs_off();
 	do {
 		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
 			schedule();
-- 
cgit v1.2.3


From e0aa3a665782e29cec752ae667c51ed4ee75d11f Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 20 Aug 2015 17:39:32 +0100
Subject: ARM: entry: ensure that IRQs are enabled when calling
 syscall_trace_exit()

The audit code looks like it's been written to cope with being called
with IRQs enabled.  However, it's unclear whether IRQs should be
enabled or disabled when calling the syscall tracing infrastructure.

Right now, sometimes we call this with IRQs enabled, and other times
with IRQs disabled.  Opt for IRQs being enabled for consistency.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-common.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index dd3721d1185e..d83a40d8e055 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -275,7 +275,7 @@ __sys_trace_return:
 	b	ret_slow_syscall
 
 __sys_trace_return_nosave:
-	asm_trace_hardirqs_off save=0
+	enable_irq_notrace
 	mov	r0, sp
 	bl	syscall_trace_exit
 	b	ret_slow_syscall
-- 
cgit v1.2.3


From 3fba7e23f754a9a6e639b640fa2a393712ffe1b8 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 19 Aug 2015 11:02:28 +0100
Subject: ARM: uaccess: provide uaccess_save_and_enable() and uaccess_restore()

Provide uaccess_save_and_enable() and uaccess_restore() to permit
control of userspace visibility to the kernel, and hook these into
the appropriate places in the kernel where we need to access
userspace.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/armksyms.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index a88671cfe1ff..a35d72d30b56 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -91,9 +91,9 @@ EXPORT_SYMBOL(__memzero);
 #ifdef CONFIG_MMU
 EXPORT_SYMBOL(copy_page);
 
-EXPORT_SYMBOL(__copy_from_user);
-EXPORT_SYMBOL(__copy_to_user);
-EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(arm_copy_from_user);
+EXPORT_SYMBOL(arm_copy_to_user);
+EXPORT_SYMBOL(arm_clear_user);
 
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
-- 
cgit v1.2.3


From 9205b797dbe519a629267ec8c5766cd973d35063 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 24 Aug 2015 21:49:30 +0100
Subject: ARM: 8421/1: smp: Collapse arch_cpu_idle_dead() into cpu_die()

The only caller of cpu_die() on ARM is arch_cpu_idle_dead(), so
let's simplify the code by renaming cpu_die() to
arch_cpu_idle_dead(). While were here, drop the __ref annotation
because __cpuinit is gone nowadays.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/process.c | 7 -------
 arch/arm/kernel/smp.c     | 2 +-
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index f192a2a41719..358984b7f249 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -91,13 +91,6 @@ void arch_cpu_idle_exit(void)
 	ledtrig_cpu(CPU_LED_IDLE_END);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-void arch_cpu_idle_dead(void)
-{
-	cpu_die();
-}
-#endif
-
 void __show_regs(struct pt_regs *regs)
 {
 	unsigned long flags;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 3cd846f48eaf..0aad7cdf2e58 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -266,7 +266,7 @@ void __cpu_die(unsigned int cpu)
  * of the other hotplug-cpu capable cores, so presumably coming
  * out of idle fixes this.
  */
-void __ref cpu_die(void)
+void arch_cpu_idle_dead(void)
 {
 	unsigned int cpu = smp_processor_id();
 
-- 
cgit v1.2.3


From aa06e5c1f9c2b466712be904cc5b56a813e24cfd Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 26 Aug 2015 20:07:25 +0100
Subject: ARM: entry: get rid of multiple macro definitions

The following structure is just asking for trouble:

 #ifdef CONFIG_symbol
	.macro foo
	...
	.endm
	.macro bar
	...
	.endm
	.macro baz
	...
	.endm
 #else
	.macro foo
	...
	.endm
	.macro bar
	...
	.endm
 #ifdef CONFIG_symbol2
	.macro baz
	...
	.endm
 #else
	.macro baz
	...
	.endm
 #endif
 #endif

such as one defintion being updated, but the other definitions miss out.
Where the contents of a macro needs to be conditional, the hint is in
the first clause of this very sentence.  "contents" "conditional".  Not
multiple separate definitions, especially not when much of the macro
is the same between different configs.

This patch fixes this bad style, which had caused the Thumb2 code to
miss-out on the uaccess updates.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-header.S | 109 +++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 65 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 1a0045abead7..d47b5161b029 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -196,7 +196,7 @@
 	msr	cpsr_c, \rtemp			@ switch back to the SVC mode
 	.endm
 
-#ifndef CONFIG_THUMB2_KERNEL
+
 	.macro	svc_exit, rpsr, irq = 0
 	.if	\irq != 0
 	@ IRQs already off
@@ -215,6 +215,9 @@
 	blne	trace_hardirqs_off
 #endif
 	.endif
+
+#ifndef CONFIG_THUMB2_KERNEL
+	@ ARM mode SVC restore
 	msr	spsr_cxsf, \rpsr
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
 	@ We must avoid clrex due to Cortex-A15 erratum #830321
@@ -222,6 +225,20 @@
 	strex	r1, r2, [r0]			@ clear the exclusive monitor
 #endif
 	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
+#else
+	@ Thumb mode SVC restore
+	ldr	lr, [sp, #S_SP]			@ top of the stack
+	ldrd	r0, r1, [sp, #S_LR]		@ calling lr and pc
+
+	@ We must avoid clrex due to Cortex-A15 erratum #830321
+	strex	r2, r1, [sp, #S_LR]		@ clear the exclusive monitor
+
+	stmdb	lr!, {r0, r1, \rpsr}		@ calling lr and rfe context
+	ldmia	sp, {r0 - r12}
+	mov	sp, lr
+	ldr	lr, [sp], #4
+	rfeia	sp!
+#endif
 	.endm
 
 	@
@@ -241,6 +258,8 @@
 	@ on the stack remains correct).
 	@
 	.macro  svc_exit_via_fiq
+#ifndef CONFIG_THUMB2_KERNEL
+	@ ARM mode restore
 	mov	r0, sp
 	ldmib	r0, {r1 - r14}	@ abort is deadly from here onward (it will
 				@ clobber state restored below)
@@ -250,9 +269,26 @@
 	msr	spsr_cxsf, r9
 	ldr	r0, [r0, #S_R0]
 	ldmia	r8, {pc}^
+#else
+	@ Thumb mode restore
+	add	r0, sp, #S_R2
+	ldr	lr, [sp, #S_LR]
+	ldr	sp, [sp, #S_SP] @ abort is deadly from here onward (it will
+			        @ clobber state restored below)
+	ldmia	r0, {r2 - r12}
+	mov	r1, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
+	msr	cpsr_c, r1
+	sub	r0, #S_R2
+	add	r8, r0, #S_PC
+	ldmia	r0, {r0 - r1}
+	rfeia	r8
+#endif
 	.endm
 
+
 	.macro	restore_user_regs, fast = 0, offset = 0
+#ifndef CONFIG_THUMB2_KERNEL
+	@ ARM mode restore
 	mov	r2, sp
 	ldr	r1, [r2, #\offset + S_PSR]	@ get calling cpsr
 	ldr	lr, [r2, #\offset + S_PC]!	@ get pc
@@ -270,72 +306,16 @@
 						@ after ldm {}^
 	add	sp, sp, #\offset + S_FRAME_SIZE
 	movs	pc, lr				@ return & move spsr_svc into cpsr
-	.endm
-
-#else	/* CONFIG_THUMB2_KERNEL */
-	.macro	svc_exit, rpsr, irq = 0
-	.if	\irq != 0
-	@ IRQs already off
-#ifdef CONFIG_TRACE_IRQFLAGS
-	@ The parent context IRQs must have been enabled to get here in
-	@ the first place, so there's no point checking the PSR I bit.
-	bl	trace_hardirqs_on
-#endif
-	.else
-	@ IRQs off again before pulling preserved data off the stack
-	disable_irq_notrace
-#ifdef CONFIG_TRACE_IRQFLAGS
-	tst	\rpsr, #PSR_I_BIT
-	bleq	trace_hardirqs_on
-	tst	\rpsr, #PSR_I_BIT
-	blne	trace_hardirqs_off
-#endif
-	.endif
-	ldr	lr, [sp, #S_SP]			@ top of the stack
-	ldrd	r0, r1, [sp, #S_LR]		@ calling lr and pc
-
-	@ We must avoid clrex due to Cortex-A15 erratum #830321
-	strex	r2, r1, [sp, #S_LR]		@ clear the exclusive monitor
-
-	stmdb	lr!, {r0, r1, \rpsr}		@ calling lr and rfe context
-	ldmia	sp, {r0 - r12}
-	mov	sp, lr
-	ldr	lr, [sp], #4
-	rfeia	sp!
-	.endm
-
-	@
-	@ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit
-	@
-	@ For full details see non-Thumb implementation above.
-	@
-	.macro  svc_exit_via_fiq
-	add	r0, sp, #S_R2
-	ldr	lr, [sp, #S_LR]
-	ldr	sp, [sp, #S_SP] @ abort is deadly from here onward (it will
-			        @ clobber state restored below)
-	ldmia	r0, {r2 - r12}
-	mov	r1, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
-	msr	cpsr_c, r1
-	sub	r0, #S_R2
-	add	r8, r0, #S_PC
-	ldmia	r0, {r0 - r1}
-	rfeia	r8
-	.endm
-
-#ifdef CONFIG_CPU_V7M
-	/*
-	 * Note we don't need to do clrex here as clearing the local monitor is
-	 * part of each exception entry and exit sequence.
-	 */
-	.macro	restore_user_regs, fast = 0, offset = 0
+#elif defined(CONFIG_CPU_V7M)
+	@ V7M restore.
+	@ Note that we don't need to do clrex here as clearing the local
+	@ monitor is part of the exception entry and exit sequence.
 	.if	\offset
 	add	sp, #\offset
 	.endif
 	v7m_exception_slow_exit ret_r0 = \fast
-	.endm
-#else	/* ifdef CONFIG_CPU_V7M */
-	.macro	restore_user_regs, fast = 0, offset = 0
+#else
+	@ Thumb mode restore
 	mov	r2, sp
 	load_user_sp_lr r2, r3, \offset + S_SP	@ calling sp, lr
 	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
@@ -353,9 +333,8 @@
 	.endif
 	add	sp, sp, #S_FRAME_SIZE - S_SP
 	movs	pc, lr				@ return & move spsr_svc into cpsr
-	.endm
-#endif	/* ifdef CONFIG_CPU_V7M / else */
 #endif	/* !CONFIG_THUMB2_KERNEL */
+	.endm
 
 /*
  * Context tracking subsystem.  Used to instrument transitions
-- 
cgit v1.2.3


From 2190fed67ba6f3e8129513929f2395843645e928 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 20 Aug 2015 10:32:02 +0100
Subject: ARM: entry: provide uaccess assembly macro hooks

Provide hooks into the kernel entry and exit paths to permit control
of userspace visibility to the kernel.  The intended use is:

- on entry to kernel from user, uaccess_disable will be called to
  disable userspace visibility
- on exit from kernel to user, uaccess_enable will be called to
  enable userspace visibility
- on entry from a kernel exception, uaccess_save_and_disable will be
  called to save the current userspace visibility setting, and disable
  access
- on exit from a kernel exception, uaccess_restore will be called to
  restore the userspace visibility as it was before the exception
  occurred.

These hooks allows us to keep userspace visibility disabled for the
vast majority of the kernel, except for localised regions where we
want to explicitly access userspace.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-armv.S   | 30 ++++++++++++++++++++++--------
 arch/arm/kernel/entry-common.S |  2 ++
 arch/arm/kernel/entry-header.S |  3 +++
 3 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index d19adcf6c580..61f00a3f3047 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -149,10 +149,10 @@ ENDPROC(__und_invalid)
 #define SPFIX(code...)
 #endif
 
-	.macro	svc_entry, stack_hole=0, trace=1
+	.macro	svc_entry, stack_hole=0, trace=1, uaccess=1
  UNWIND(.fnstart		)
  UNWIND(.save {r0 - pc}		)
-	sub	sp, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+	sub	sp, sp, #(S_FRAME_SIZE + 8 + \stack_hole - 4)
 #ifdef CONFIG_THUMB2_KERNEL
  SPFIX(	str	r0, [sp]	)	@ temporarily saved
  SPFIX(	mov	r0, sp		)
@@ -167,7 +167,7 @@ ENDPROC(__und_invalid)
 	ldmia	r0, {r3 - r5}
 	add	r7, sp, #S_SP - 4	@ here for interlock avoidance
 	mov	r6, #-1			@  ""  ""      ""       ""
-	add	r2, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+	add	r2, sp, #(S_FRAME_SIZE + 8 + \stack_hole - 4)
  SPFIX(	addeq	r2, r2, #4	)
 	str	r3, [sp, #-4]!		@ save the "real" r0 copied
 					@ from the exception stack
@@ -185,6 +185,11 @@ ENDPROC(__und_invalid)
 	@
 	stmia	r7, {r2 - r6}
 
+	uaccess_save r0
+	.if \uaccess
+	uaccess_disable r0
+	.endif
+
 	.if \trace
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
@@ -194,7 +199,7 @@ ENDPROC(__und_invalid)
 
 	.align	5
 __dabt_svc:
-	svc_entry
+	svc_entry uaccess=0
 	mov	r2, sp
 	dabt_helper
  THUMB(	ldr	r5, [sp, #S_PSR]	)	@ potentially updated CPSR
@@ -368,7 +373,7 @@ ENDPROC(__fiq_abt)
 #error "sizeof(struct pt_regs) must be a multiple of 8"
 #endif
 
-	.macro	usr_entry, trace=1
+	.macro	usr_entry, trace=1, uaccess=1
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)	@ don't unwind the user space
 	sub	sp, sp, #S_FRAME_SIZE
@@ -400,6 +405,10 @@ ENDPROC(__fiq_abt)
  ARM(	stmdb	r0, {sp, lr}^			)
  THUMB(	store_user_sp_lr r0, r1, S_SP - S_PC	)
 
+	.if \uaccess
+	uaccess_disable ip
+	.endif
+
 	@ Enable the alignment trap while in kernel mode
  ATRAP(	teq	r8, r7)
  ATRAP( mcrne	p15, 0, r8, c1, c0, 0)
@@ -435,7 +444,7 @@ ENDPROC(__fiq_abt)
 
 	.align	5
 __dabt_usr:
-	usr_entry
+	usr_entry uaccess=0
 	kuser_cmpxchg_check
 	mov	r2, sp
 	dabt_helper
@@ -458,7 +467,7 @@ ENDPROC(__irq_usr)
 
 	.align	5
 __und_usr:
-	usr_entry
+	usr_entry uaccess=0
 
 	mov	r2, r4
 	mov	r3, r5
@@ -484,6 +493,8 @@ __und_usr:
 1:	ldrt	r0, [r4]
  ARM_BE8(rev	r0, r0)				@ little endian instruction
 
+	uaccess_disable ip
+
 	@ r0 = 32-bit ARM instruction which caused the exception
 	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
 	@ r4 = PC value for the faulting instruction
@@ -518,9 +529,10 @@ __und_usr_thumb:
 2:	ldrht	r5, [r4]
 ARM_BE8(rev16	r5, r5)				@ little endian instruction
 	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
-	blo	__und_usr_fault_16		@ 16bit undefined instruction
+	blo	__und_usr_fault_16_pan		@ 16bit undefined instruction
 3:	ldrht	r0, [r2]
 ARM_BE8(rev16	r0, r0)				@ little endian instruction
+	uaccess_disable ip
 	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
 	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
 	orr	r0, r0, r5, lsl #16
@@ -715,6 +727,8 @@ ENDPROC(no_fp)
 __und_usr_fault_32:
 	mov	r1, #4
 	b	1f
+__und_usr_fault_16_pan:
+	uaccess_disable ip
 __und_usr_fault_16:
 	mov	r1, #2
 1:	mov	r0, sp
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 92828a1dec80..189154980703 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -173,6 +173,8 @@ ENTRY(vector_swi)
  USER(	ldr	scno, [lr, #-4]		)	@ get SWI instruction
 #endif
 
+	uaccess_disable tbl
+
 	adr	tbl, sys_call_table		@ load syscall table pointer
 
 #if defined(CONFIG_OABI_COMPAT)
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index d47b5161b029..0d22ad206d52 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -215,6 +215,7 @@
 	blne	trace_hardirqs_off
 #endif
 	.endif
+	uaccess_restore
 
 #ifndef CONFIG_THUMB2_KERNEL
 	@ ARM mode SVC restore
@@ -258,6 +259,7 @@
 	@ on the stack remains correct).
 	@
 	.macro  svc_exit_via_fiq
+	uaccess_restore
 #ifndef CONFIG_THUMB2_KERNEL
 	@ ARM mode restore
 	mov	r0, sp
@@ -287,6 +289,7 @@
 
 
 	.macro	restore_user_regs, fast = 0, offset = 0
+	uaccess_enable r1, isb=0
 #ifndef CONFIG_THUMB2_KERNEL
 	@ ARM mode restore
 	mov	r2, sp
-- 
cgit v1.2.3


From a5e090acbf545c0a3b04080f8a488b17ec41fe02 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 19 Aug 2015 20:40:41 +0100
Subject: ARM: software-based priviledged-no-access support

Provide a software-based implementation of the priviledged no access
support found in ARMv8.1.

Userspace pages are mapped using a different domain number from the
kernel and IO mappings.  If we switch the user domain to "no access"
when we enter the kernel, we can prevent the kernel from touching
userspace.

However, the kernel needs to be able to access userspace via the
various user accessor functions.  With the wrapping in the previous
patch, we can temporarily enable access when the kernel needs user
access, and re-disable it afterwards.

This allows us to trap non-intended accesses to userspace, eg, caused
by an inadvertent dereference of the LIST_POISON* values, which, with
appropriate user mappings setup, can be made to succeed.  This in turn
can allow use-after-free bugs to be further exploited than would
otherwise be possible.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/process.c     | 36 ++++++++++++++++++++++++++++++------
 arch/arm/kernel/swp_emulate.c |  3 +++
 2 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'arch/arm/kernel')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index e722f9b3c9b1..3f18098dfd08 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -129,12 +129,36 @@ void __show_regs(struct pt_regs *regs)
 	buf[4] = '\0';
 
 #ifndef CONFIG_CPU_V7M
-	printk("Flags: %s  IRQs o%s  FIQs o%s  Mode %s  ISA %s  Segment %s\n",
-		buf, interrupts_enabled(regs) ? "n" : "ff",
-		fast_interrupts_enabled(regs) ? "n" : "ff",
-		processor_modes[processor_mode(regs)],
-		isa_modes[isa_mode(regs)],
-		get_fs() == get_ds() ? "kernel" : "user");
+	{
+		unsigned int domain = get_domain();
+		const char *segment;
+
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+		/*
+		 * Get the domain register for the parent context. In user
+		 * mode, we don't save the DACR, so lets use what it should
+		 * be. For other modes, we place it after the pt_regs struct.
+		 */
+		if (user_mode(regs))
+			domain = DACR_UACCESS_ENABLE;
+		else
+			domain = *(unsigned int *)(regs + 1);
+#endif
+
+		if ((domain & domain_mask(DOMAIN_USER)) ==
+		    domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
+			segment = "none";
+		else if (get_fs() == get_ds())
+			segment = "kernel";
+		else
+			segment = "user";
+
+		printk("Flags: %s  IRQs o%s  FIQs o%s  Mode %s  ISA %s  Segment %s\n",
+			buf, interrupts_enabled(regs) ? "n" : "ff",
+			fast_interrupts_enabled(regs) ? "n" : "ff",
+			processor_modes[processor_mode(regs)],
+			isa_modes[isa_mode(regs)], segment);
+	}
 #else
 	printk("xPSR: %08lx\n", regs->ARM_cpsr);
 #endif
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 1361756782c7..5b26e7efa9ea 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -141,11 +141,14 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
 
 	while (1) {
 		unsigned long temp;
+		unsigned int __ua_flags;
 
+		__ua_flags = uaccess_save_and_enable();
 		if (type == TYPE_SWPB)
 			__user_swpb_asm(*data, address, res, temp);
 		else
 			__user_swp_asm(*data, address, res, temp);
+		uaccess_restore(__ua_flags);
 
 		if (likely(res != -EAGAIN) || signal_pending(current))
 			break;
-- 
cgit v1.2.3