aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorMark Brown <broonie@linaro.org>2013-12-11 22:36:52 +0000
committerMark Brown <broonie@linaro.org>2013-12-11 22:36:52 +0000
commit48a904c88d3768312b638ae2cffce6fa51231acc (patch)
tree3fc0a4e90a15bb9bf61d97a6431daaa6b0513596 /arch
parent09905258e4846b60a6b1e2432c0f1653a2f49641 (diff)
parente101473817ddc5e10919e4e22c3b421a350ad742 (diff)
Merge remote-tracking branch 'lsk/v3.10/topic/arm64-topology' into linux-linaro-lsk
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/Kconfig8
-rw-r--r--arch/arm64/include/asm/topology.h42
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/smp.c12
-rw-r--r--arch/arm64/kernel/topology.c418
-rw-r--r--arch/powerpc/include/asm/prom.h3
-rw-r--r--arch/powerpc/kernel/prom.c43
7 files changed, 483 insertions, 44 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c2b4a765ff4..37cbbdd7248 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -144,6 +144,14 @@ config SMP
If you don't know what to do here, say N.
+config ARM_CPU_TOPOLOGY
+ bool "Support CPU topology definition"
+ depends on SMP
+ default y
+ help
+ Support CPU topology definition, based on configuration
+ provided by the firmware.
+
config NR_CPUS
int "Maximum number of CPUs (2-32)"
range 2 32
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
new file mode 100644
index 00000000000..611edefaeaf
--- /dev/null
+++ b/arch/arm64/include/asm/topology.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_ARM_TOPOLOGY_H
+#define _ASM_ARM_TOPOLOGY_H
+
+#ifdef CONFIG_ARM_CPU_TOPOLOGY
+
+#include <linux/cpumask.h>
+
+struct cputopo_arm {
+ int thread_id;
+ int core_id;
+ int socket_id;
+ cpumask_t thread_sibling;
+ cpumask_t core_sibling;
+};
+
+extern struct cputopo_arm cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
+#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
+#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
+
+#define mc_capable() (cpu_topology[0].socket_id != -1)
+#define smt_capable() (cpu_topology[0].thread_id != -1)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask);
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+static inline int cluster_to_logical_mask(unsigned int socket_id,
+ cpumask_t *cluster_mask) { return -EINVAL; }
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7b4b564961d..4e4c80d1150 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o
arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+arm64-obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index a28d2859932..8d8a1f2af29 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -161,6 +161,11 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
return ret;
}
+static void __cpuinit smp_store_cpu_info(unsigned int cpuid)
+{
+ store_cpu_topology(cpuid);
+}
+
/*
* This is the secondary CPU boot entry. We're using this CPUs
* idle thread stack, but a set of temporary page tables.
@@ -209,6 +214,8 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
local_irq_enable();
local_fiq_enable();
+ smp_store_cpu_info(cpu);
+
/*
* OK, now it's safe to let the boot CPU continue. Wait for
* the CPU migration code to notice that the CPU is online
@@ -389,6 +396,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
int cpu, err;
unsigned int ncores = num_possible_cpus();
+ init_cpu_topology();
+
+ smp_store_cpu_info(smp_processor_id());
+
+
/*
* are we trying to boot more cores than exist?
*/
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
new file mode 100644
index 00000000000..83d6919d4e0
--- /dev/null
+++ b/arch/arm64/kernel/topology.c
@@ -0,0 +1,418 @@
+/*
+ * arch/arm64/kernel/topology.c
+ *
+ * Copyright (C) 2011,2013 Linaro Limited.
+ * Written by: Vincent Guittot
+ *
+ * based on arch/sh/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/cputype.h>
+#include <asm/smp_plat.h>
+#include <asm/topology.h>
+
+/*
+ * cpu power scale management
+ */
+
+/*
+ * cpu power table
+ * This per cpu data structure describes the relative capacity of each core.
+ * On a heteregenous system, cores don't have the same computation capacity
+ * and we reflect that difference in the cpu_power field so the scheduler can
+ * take this difference into account during load balance. A per cpu structure
+ * is preferred because each CPU updates its own cpu_power field during the
+ * load balance except for idle cores. One idle core is selected to run the
+ * rebalance_domains for all idle cores and the cpu_power can be updated
+ * during this sequence.
+ */
+static DEFINE_PER_CPU(unsigned long, cpu_scale);
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+ return per_cpu(cpu_scale, cpu);
+}
+
+static void set_power_scale(unsigned int cpu, unsigned long power)
+{
+ per_cpu(cpu_scale, cpu) = power;
+}
+
+#ifdef CONFIG_OF
+struct cpu_efficiency {
+ const char *compatible;
+ unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+static const struct cpu_efficiency table_efficiency[] = {
+ { "arm,cortex-a57", 3891 },
+ { "arm,cortex-a53", 2048 },
+ { NULL, },
+};
+
+static unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu) __cpu_capacity[cpu]
+
+static unsigned long middle_capacity = 1;
+static int cluster_id;
+
+static int __init get_cpu_for_node(struct device_node *node)
+{
+ struct device_node *cpu_node;
+ int cpu;
+
+ cpu_node = of_parse_phandle(node, "cpu", 0);
+ if (!cpu_node) {
+ pr_crit("%s: Unable to parse CPU phandle\n", node->full_name);
+ return -1;
+ }
+
+ for_each_possible_cpu(cpu) {
+ if (of_get_cpu_node(cpu, NULL) == cpu_node)
+ return cpu;
+ }
+
+ pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+ return -1;
+}
+
+static void __init parse_core(struct device_node *core, int core_id)
+{
+ char name[10];
+ bool leaf = true;
+ int i, cpu;
+ struct device_node *t;
+
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "thread%d", i);
+ t = of_get_child_by_name(core, name);
+ if (t) {
+ leaf = false;
+ cpu = get_cpu_for_node(t);
+ if (cpu) {
+ pr_info("CPU%d: socket %d core %d thread %d\n",
+ cpu, cluster_id, core_id, i);
+ cpu_topology[cpu].socket_id = cluster_id;
+ cpu_topology[cpu].core_id = core_id;
+ cpu_topology[cpu].thread_id = i;
+ } else {
+ pr_err("%s: Can't get CPU for thread\n",
+ t->full_name);
+ }
+ }
+ i++;
+ } while (t);
+
+ cpu = get_cpu_for_node(core);
+ if (cpu >= 0) {
+ if (!leaf) {
+ pr_err("%s: Core has both threads and CPU\n",
+ core->full_name);
+ return;
+ }
+
+ pr_info("CPU%d: socket %d core %d\n",
+ cpu, cluster_id, core_id);
+ cpu_topology[cpu].socket_id = cluster_id;
+ cpu_topology[cpu].core_id = core_id;
+ } else if (leaf) {
+ pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+ }
+}
+
+static void __init parse_cluster(struct device_node *cluster)
+{
+ char name[10];
+ bool leaf = true;
+ bool has_cores = false;
+ struct device_node *c;
+ int core_id = 0;
+ int i;
+
+ /*
+ * First check for child clusters; we currently ignore any
+ * information about the nesting of clusters and present the
+ * scheduler with a flat list of them.
+ */
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "cluster%d", i);
+ c = of_get_child_by_name(cluster, name);
+ if (c) {
+ parse_cluster(c);
+ leaf = false;
+ }
+ i++;
+ } while (c);
+
+ /* Now check for cores */
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "core%d", i);
+ c = of_get_child_by_name(cluster, name);
+ if (c) {
+ has_cores = true;
+
+ if (leaf)
+ parse_core(c, core_id++);
+ else
+ pr_err("%s: Non-leaf cluster with core %s\n",
+ cluster->full_name, name);
+ }
+ i++;
+ } while (c);
+
+ if (leaf && !has_cores)
+ pr_warn("%s: empty cluster\n", cluster->full_name);
+
+ if (leaf)
+ cluster_id++;
+}
+
+/*
+ * Iterate all CPUs' descriptor in DT and compute the efficiency
+ * (as per table_efficiency). Also calculate a middle efficiency
+ * as close as possible to (max{eff_i} - min{eff_i}) / 2
+ * This is later used to scale the cpu_power field such that an
+ * 'average' CPU is of middle power. Also see the comments near
+ * table_efficiency[] and update_cpu_power().
+ */
+static void __init parse_dt_topology(void)
+{
+ const struct cpu_efficiency *cpu_eff;
+ struct device_node *cn = NULL;
+ unsigned long min_capacity = (unsigned long)(-1);
+ unsigned long max_capacity = 0;
+ unsigned long capacity = 0;
+ int alloc_size, cpu;
+
+ alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity);
+ __cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
+
+ cn = of_find_node_by_path("/cpus");
+ if (!cn) {
+ pr_err("No CPU information found in DT\n");
+ return;
+ }
+
+ /*
+ * If topology is provided as a cpu-map it is essentially a
+ * root cluster.
+ */
+ cn = of_find_node_by_name(cn, "cpu-map");
+ if (!cn)
+ return;
+ parse_cluster(cn);
+
+ for_each_possible_cpu(cpu) {
+ const u32 *rate;
+ int len;
+
+ /* Too early to use cpu->of_node */
+ cn = of_get_cpu_node(cpu, NULL);
+ if (!cn) {
+ pr_err("Missing device node for CPU %d\n", cpu);
+ continue;
+ }
+
+ /* check if the cpu is marked as "disabled", if so ignore */
+ if (!of_device_is_available(cn))
+ continue;
+
+ for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
+ if (of_device_is_compatible(cn, cpu_eff->compatible))
+ break;
+
+ if (cpu_eff->compatible == NULL) {
+ pr_warn("%s: Unknown CPU type\n", cn->full_name);
+ continue;
+ }
+
+ rate = of_get_property(cn, "clock-frequency", &len);
+ if (!rate || len != 4) {
+ pr_err("%s: Missing clock-frequency property\n",
+ cn->full_name);
+ continue;
+ }
+
+ capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
+
+ /* Save min capacity of the system */
+ if (capacity < min_capacity)
+ min_capacity = capacity;
+
+ /* Save max capacity of the system */
+ if (capacity > max_capacity)
+ max_capacity = capacity;
+
+ cpu_capacity(cpu) = capacity;
+ }
+
+ /* If min and max capacities are equal we bypass the update of the
+ * cpu_scale because all CPUs have the same capacity. Otherwise, we
+ * compute a middle_capacity factor that will ensure that the capacity
+ * of an 'average' CPU of the system will be as close as possible to
+ * SCHED_POWER_SCALE, which is the default value, but with the
+ * constraint explained near table_efficiency[].
+ */
+ if (min_capacity == max_capacity)
+ return;
+ else if (4 * max_capacity < (3 * (max_capacity + min_capacity)))
+ middle_capacity = (min_capacity + max_capacity)
+ >> (SCHED_POWER_SHIFT+1);
+ else
+ middle_capacity = ((max_capacity / 3)
+ >> (SCHED_POWER_SHIFT-1)) + 1;
+
+}
+
+/*
+ * Look for a customed capacity of a CPU in the cpu_topo_data table during the
+ * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
+ * function returns directly for SMP system.
+ */
+static void update_cpu_power(unsigned int cpu)
+{
+ if (!cpu_capacity(cpu))
+ return;
+
+ set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+
+ pr_info("CPU%u: update cpu_power %lu\n",
+ cpu, arch_scale_freq_power(NULL, cpu));
+}
+
+#else
+static inline void parse_dt_topology(void) {}
+static inline void update_cpu_power(unsigned int cpuid) {}
+#endif
+
+/*
+ * cpu topology table
+ */
+struct cputopo_arm cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &cpu_topology[cpu].core_sibling;
+}
+
+static void update_siblings_masks(unsigned int cpuid)
+{
+ struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+ int cpu;
+
+ /* update core and thread sibling masks */
+ for_each_possible_cpu(cpu) {
+ cpu_topo = &cpu_topology[cpu];
+
+ if (cpuid_topo->socket_id != cpu_topo->socket_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+ if (cpuid_topo->core_id != cpu_topo->core_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+ }
+ smp_wmb();
+}
+
+void store_cpu_topology(unsigned int cpuid)
+{
+ struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
+
+ /* DT should have been parsed by the time we get here */
+ if (cpuid_topo->core_id == -1)
+ pr_info("CPU%u: No topology information configured\n", cpuid);
+ else
+ update_siblings_masks(cpuid);
+
+ update_cpu_power(cpuid);
+}
+
+
+/*
+ * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
+ * @socket_id: cluster HW identifier
+ * @cluster_mask: the cpumask location to be initialized, modified by the
+ * function only if return value == 0
+ *
+ * Return:
+ *
+ * 0 on success
+ * -EINVAL if cluster_mask is NULL or there is no record matching socket_id
+ */
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
+{
+ int cpu;
+
+ if (!cluster_mask)
+ return -EINVAL;
+
+ for_each_online_cpu(cpu) {
+ if (socket_id == topology_physical_package_id(cpu)) {
+ cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * init_cpu_topology is called at boot when only one cpu is running
+ * which prevent simultaneous write access to cpu_topology array
+ */
+void __init init_cpu_topology(void)
+{
+ unsigned int cpu;
+
+ /* init core mask and power*/
+ for_each_possible_cpu(cpu) {
+ struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
+
+ cpu_topo->thread_id = -1;
+ cpu_topo->core_id = -1;
+ cpu_topo->socket_id = -1;
+ cpumask_clear(&cpu_topo->core_sibling);
+ cpumask_clear(&cpu_topo->thread_sibling);
+
+ set_power_scale(cpu, SCHED_POWER_SCALE);
+ }
+ smp_wmb();
+
+ parse_dt_topology();
+}
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index bc2da154f68..ac204e02292 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -43,9 +43,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
extern void kdump_move_device_tree(void);
-/* CPU OF node matching */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
-
/* cache lookup */
struct device_node *of_find_next_cache_node(struct device_node *np);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 8b6f7a99cce..00610a34fb9 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -827,49 +827,10 @@ static int __init prom_reconfig_setup(void)
__initcall(prom_reconfig_setup);
#endif
-/* Find the device node for a given logical cpu number, also returns the cpu
- * local thread number (index in ibm,interrupt-server#s) if relevant and
- * asked for (non NULL)
- */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
- int hardid;
- struct device_node *np;
-
- hardid = get_hard_smp_processor_id(cpu);
-
- for_each_node_by_type(np, "cpu") {
- const u32 *intserv;
- unsigned int plen, t;
-
- /* Check for ibm,ppc-interrupt-server#s. If it doesn't exist
- * fallback to "reg" property and assume no threads
- */
- intserv = of_get_property(np, "ibm,ppc-interrupt-server#s",
- &plen);
- if (intserv == NULL) {
- const u32 *reg = of_get_property(np, "reg", NULL);
- if (reg == NULL)
- continue;
- if (*reg == hardid) {
- if (thread)
- *thread = 0;
- return np;
- }
- } else {
- plen /= sizeof(u32);
- for (t = 0; t < plen; t++) {
- if (hardid == intserv[t]) {
- if (thread)
- *thread = t;
- return np;
- }
- }
- }
- }
- return NULL;
+ return (int)phys_id == get_hard_smp_processor_id(cpu);
}
-EXPORT_SYMBOL(of_get_cpu_node);
#if defined(CONFIG_DEBUG_FS) && defined(DEBUG)
static struct debugfs_blob_wrapper flat_dt_blob;