aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Brown <broonie@linaro.org>2014-05-09 22:09:24 +0100
committerMark Brown <broonie@linaro.org>2014-05-09 22:27:03 +0100
commitee22524dbe4fcbaa8186296d197e73982a935565 (patch)
tree3a30ec09b61048bdea28845126e897446c82f258
parent8ecd48091c61213762407f5ce02d3fa4ede66402 (diff)
parenta84034fddb11f30849dd7ce050689d615995c0d2 (diff)
downloadlinux-linaro-stable-ee22524dbe4fcbaa8186296d197e73982a935565.tar.gz
Merge remote-tracking branch 'lsk/v3.10/topic/arm64-topology' into lsk-v3.10-arm64-hmp
Conflicts: arch/arm64/Kconfig arch/arm64/include/asm/topology.h arch/arm64/kernel/smp.c arch/arm64/kernel/topology.c
-rw-r--r--arch/arm64/Kconfig12
-rw-r--r--arch/arm64/include/asm/topology.h16
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/smp.c1
-rw-r--r--arch/arm64/kernel/topology.c277
5 files changed, 175 insertions, 132 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 195806152794..d8e51d5c57bd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -144,17 +144,9 @@ config SMP
If you don't know what to do here, say N.
-config ARM_CPU_TOPOLOGY
- bool "Support CPU topology definition"
- depends on SMP
- default y
- help
- Support CPU topology definition, based on configuration
- provided by the firmware.
-
config SCHED_MC
bool "Multi-core scheduler support"
- depends on ARM_CPU_TOPOLOGY
+ depends on SMP
help
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
@@ -162,7 +154,7 @@ config SCHED_MC
config SCHED_SMT
bool "SMT scheduler support"
- depends on ARM_CPU_TOPOLOGY
+ depends on SMP
help
Improves the CPU scheduler's decision making when dealing with
MultiThreading at a cost of slightly increased overhead in some
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 983fa7c153a2..98e0aa36aebb 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -1,26 +1,26 @@
-#ifndef _ASM_ARM_TOPOLOGY_H
-#define _ASM_ARM_TOPOLOGY_H
+#ifndef __ASM_TOPOLOGY_H
+#define __ASM_TOPOLOGY_H
-#ifdef CONFIG_ARM_CPU_TOPOLOGY
+#ifdef CONFIG_SMP
#include <linux/cpumask.h>
-struct cputopo_arm {
+struct cpu_topology {
int thread_id;
int core_id;
- int socket_id;
+ int cluster_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
};
-extern struct cputopo_arm cpu_topology[NR_CPUS];
+extern struct cpu_topology cpu_topology[NR_CPUS];
-#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
+#define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id)
#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
-#define mc_capable() (cpu_topology[0].socket_id != -1)
+#define mc_capable() (cpu_topology[0].cluster_id != -1)
#define smt_capable() (cpu_topology[0].thread_id != -1)
void init_cpu_topology(void);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 4e4c80d11509..5a9ed500704f 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -15,6 +15,7 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o
+arm64-obj-$(CONFIG_SMP) += topology.o
arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 84c00cf02ae2..0f018680c3a8 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -397,7 +397,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
smp_store_cpu_info(smp_processor_id());
-
/*
* are we trying to boot more cores than exist?
*/
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 971064a0c6b4..49c94ff29479 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -1,10 +1,10 @@
/*
* arch/arm64/kernel/topology.c
*
- * Copyright (C) 2011,2013 Linaro Limited.
- * Written by: Vincent Guittot
+ * Copyright (C) 2011,2013,2014 Linaro Limited.
*
- * based on arch/sh/kernel/topology.c
+ * Based on the arm32 version written by Vincent Guittot in turn based on
+ * arch/sh/kernel/topology.c
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
@@ -22,15 +22,10 @@
#include <linux/sched.h>
#include <linux/slab.h>
-#include <asm/cputype.h>
#include <asm/smp_plat.h>
#include <asm/topology.h>
/*
- * cpu power scale management
- */
-
-/*
* cpu power table
* This per cpu data structure describes the relative capacity of each core.
* On a heteregenous system, cores don't have the same computation capacity
@@ -53,78 +48,54 @@ static void set_power_scale(unsigned int cpu, unsigned long power)
per_cpu(cpu_scale, cpu) = power;
}
-#ifdef CONFIG_OF
-struct cpu_efficiency {
- const char *compatible;
- unsigned long efficiency;
-};
-
-/*
- * Table of relative efficiency of each processors
- * The efficiency value must fit in 20bit and the final
- * cpu_scale value must be in the range
- * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2
- * in order to return at most 1 when DIV_ROUND_CLOSEST
- * is used to compute the capacity of a CPU.
- * Processors that are not defined in the table,
- * use the default SCHED_POWER_SCALE value for cpu_scale.
- */
-static const struct cpu_efficiency table_efficiency[] = {
- { "arm,cortex-a57", 3891 },
- { "arm,cortex-a53", 2048 },
- { NULL, },
-};
-
-static unsigned long *__cpu_capacity;
-#define cpu_capacity(cpu) __cpu_capacity[cpu]
-
-static unsigned long middle_capacity = 1;
-static int cluster_id;
-
static int __init get_cpu_for_node(struct device_node *node)
{
struct device_node *cpu_node;
int cpu;
cpu_node = of_parse_phandle(node, "cpu", 0);
- if (!cpu_node) {
- pr_crit("%s: Unable to parse CPU phandle\n", node->full_name);
+ if (!cpu_node)
return -1;
- }
for_each_possible_cpu(cpu) {
- if (of_get_cpu_node(cpu, NULL) == cpu_node)
+ if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+ of_node_put(cpu_node);
return cpu;
+ }
}
pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+
+ of_node_put(cpu_node);
return -1;
}
-static void __init parse_core(struct device_node *core, int core_id)
+static int __init parse_core(struct device_node *core, int cluster_id,
+ int core_id)
{
char name[10];
bool leaf = true;
- int i, cpu;
+ int i = 0;
+ int cpu;
struct device_node *t;
- i = 0;
do {
snprintf(name, sizeof(name), "thread%d", i);
t = of_get_child_by_name(core, name);
if (t) {
leaf = false;
cpu = get_cpu_for_node(t);
- if (cpu) {
- pr_info("CPU%d: socket %d core %d thread %d\n",
- cpu, cluster_id, core_id, i);
- cpu_topology[cpu].socket_id = cluster_id;
+ if (cpu >= 0) {
+ cpu_topology[cpu].cluster_id = cluster_id;
cpu_topology[cpu].core_id = core_id;
cpu_topology[cpu].thread_id = i;
} else {
pr_err("%s: Can't get CPU for thread\n",
t->full_name);
+ of_node_put(t);
+ return -EINVAL;
}
+ of_node_put(t);
}
i++;
} while (t);
@@ -134,26 +105,28 @@ static void __init parse_core(struct device_node *core, int core_id)
if (!leaf) {
pr_err("%s: Core has both threads and CPU\n",
core->full_name);
- return;
+ return -EINVAL;
}
- pr_info("CPU%d: socket %d core %d\n",
- cpu, cluster_id, core_id);
- cpu_topology[cpu].socket_id = cluster_id;
+ cpu_topology[cpu].cluster_id = cluster_id;
cpu_topology[cpu].core_id = core_id;
} else if (leaf) {
pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+ return -EINVAL;
}
+
+ return 0;
}
-static void __init parse_cluster(struct device_node *cluster)
+static int __init parse_cluster(struct device_node *cluster, int depth)
{
char name[10];
bool leaf = true;
bool has_cores = false;
struct device_node *c;
+ static int cluster_id __initdata;
int core_id = 0;
- int i;
+ int i, ret;
/*
* First check for child clusters; we currently ignore any
@@ -165,8 +138,11 @@ static void __init parse_cluster(struct device_node *cluster)
snprintf(name, sizeof(name), "cluster%d", i);
c = of_get_child_by_name(cluster, name);
if (c) {
- parse_cluster(c);
leaf = false;
+ ret = parse_cluster(c, depth + 1);
+ of_node_put(c);
+ if (ret != 0)
+ return ret;
}
i++;
} while (c);
@@ -179,11 +155,24 @@ static void __init parse_cluster(struct device_node *cluster)
if (c) {
has_cores = true;
- if (leaf)
- parse_core(c, core_id++);
- else
+ if (depth == 0) {
+ pr_err("%s: cpu-map children should be clusters\n",
+ c->full_name);
+ of_node_put(c);
+ return -EINVAL;
+ }
+
+ if (leaf) {
+ ret = parse_core(c, cluster_id, core_id++);
+ } else {
pr_err("%s: Non-leaf cluster with core %s\n",
cluster->full_name, name);
+ ret = -EINVAL;
+ }
+
+ of_node_put(c);
+ if (ret != 0)
+ return ret;
}
i++;
} while (c);
@@ -193,8 +182,36 @@ static void __init parse_cluster(struct device_node *cluster)
if (leaf)
cluster_id++;
+
+ return 0;
}
+struct cpu_efficiency {
+ const char *compatible;
+ unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+static const struct cpu_efficiency table_efficiency[] = {
+ { "arm,cortex-a57", 3891 },
+ { "arm,cortex-a53", 2048 },
+ { NULL, },
+};
+
+static unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu) __cpu_capacity[cpu]
+
+static unsigned long middle_capacity = 1;
+
/*
* Iterate all CPUs' descriptor in DT and compute the efficiency
* (as per table_efficiency). Also calculate a middle efficiency
@@ -203,32 +220,60 @@ static void __init parse_cluster(struct device_node *cluster)
* 'average' CPU is of middle power. Also see the comments near
* table_efficiency[] and update_cpu_power().
*/
-static void __init parse_dt_topology(void)
+static int __init parse_dt_topology(void)
{
- const struct cpu_efficiency *cpu_eff;
- struct device_node *cn = NULL;
- unsigned long min_capacity = (unsigned long)(-1);
- unsigned long max_capacity = 0;
- unsigned long capacity = 0;
- int alloc_size, cpu;
-
- alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity);
- __cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
+ struct device_node *cn, *map;
+ int ret = 0;
+ int cpu;
cn = of_find_node_by_path("/cpus");
if (!cn) {
pr_err("No CPU information found in DT\n");
- return;
+ return 0;
}
/*
- * If topology is provided as a cpu-map it is essentially a
- * root cluster.
+ * When topology is provided cpu-map is essentially a root
+ * cluster with restricted subnodes.
*/
- cn = of_find_node_by_name(cn, "cpu-map");
- if (!cn)
- return;
- parse_cluster(cn);
+ map = of_get_child_by_name(cn, "cpu-map");
+ if (!map)
+ goto out;
+
+ ret = parse_cluster(map, 0);
+ if (ret != 0)
+ goto out_map;
+
+ /*
+ * Check that all cores are in the topology; the SMP code will
+ * only mark cores described in the DT as possible.
+ */
+ for_each_possible_cpu(cpu) {
+ if (cpu_topology[cpu].cluster_id == -1) {
+ pr_err("CPU%d: No topology information specified\n",
+ cpu);
+ ret = -EINVAL;
+ }
+ }
+
+out_map:
+ of_node_put(map);
+out:
+ of_node_put(cn);
+ return ret;
+}
+
+static void __init parse_dt_cpu_power(void)
+{
+ const struct cpu_efficiency *cpu_eff;
+ struct device_node *cn;
+ unsigned long min_capacity = ULONG_MAX;
+ unsigned long max_capacity = 0;
+ unsigned long capacity = 0;
+ int cpu;
+
+ __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
+ GFP_NOWAIT);
for_each_possible_cpu(cpu) {
const u32 *rate;
@@ -241,10 +286,6 @@ static void __init parse_dt_topology(void)
continue;
}
- /* check if the cpu is marked as "disabled", if so ignore */
- if (!of_device_is_available(cn))
- continue;
-
for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
if (of_device_is_compatible(cn, cpu_eff->compatible))
break;
@@ -289,7 +330,6 @@ static void __init parse_dt_topology(void)
else
middle_capacity = ((max_capacity / 3)
>> (SCHED_POWER_SHIFT-1)) + 1;
-
}
/*
@@ -308,15 +348,10 @@ static void update_cpu_power(unsigned int cpu)
cpu, arch_scale_freq_power(NULL, cpu));
}
-#else
-static inline void parse_dt_topology(void) {}
-static inline void update_cpu_power(unsigned int cpuid) {}
-#endif
-
/*
* cpu topology table
*/
-struct cputopo_arm cpu_topology[NR_CPUS];
+struct cpu_topology cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);
const struct cpumask *cpu_coregroup_mask(int cpu)
@@ -326,14 +361,22 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
static void update_siblings_masks(unsigned int cpuid)
{
- struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+ struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
int cpu;
+ if (cpuid_topo->cluster_id == -1) {
+ /*
+ * DT does not contain topology information for this cpu.
+ */
+ pr_debug("CPU%u: No topology information configured\n", cpuid);
+ return;
+ }
+
/* update core and thread sibling masks */
for_each_possible_cpu(cpu) {
cpu_topo = &cpu_topology[cpu];
- if (cpuid_topo->socket_id != cpu_topo->socket_id)
+ if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
continue;
cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
@@ -347,20 +390,6 @@ static void update_siblings_masks(unsigned int cpuid)
if (cpu != cpuid)
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
}
- smp_wmb();
-}
-
-void store_cpu_topology(unsigned int cpuid)
-{
- struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
-
- /* DT should have been parsed by the time we get here */
- if (cpuid_topo->core_id == -1)
- pr_info("CPU%u: No topology information configured\n", cpuid);
- else
- update_siblings_masks(cpuid);
-
- update_cpu_power(cpuid);
}
#ifdef CONFIG_SCHED_HMP
@@ -511,27 +540,49 @@ int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
return -EINVAL;
}
-/*
- * init_cpu_topology is called at boot when only one cpu is running
- * which prevent simultaneous write access to cpu_topology array
- */
-void __init init_cpu_topology(void)
+void store_cpu_topology(unsigned int cpuid)
+{
+ update_siblings_masks(cpuid);
+ update_cpu_power(cpuid);
+}
+
+static void __init reset_cpu_topology(void)
{
unsigned int cpu;
- /* init core mask and power*/
for_each_possible_cpu(cpu) {
- struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
+ struct cpu_topology *cpu_topo = &cpu_topology[cpu];
cpu_topo->thread_id = -1;
- cpu_topo->core_id = -1;
- cpu_topo->socket_id = -1;
+ cpu_topo->core_id = 0;
+ cpu_topo->cluster_id = -1;
+
cpumask_clear(&cpu_topo->core_sibling);
+ cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
cpumask_clear(&cpu_topo->thread_sibling);
+ cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
+ }
+}
+
+static void __init reset_cpu_power(void)
+{
+ unsigned int cpu;
+ for_each_possible_cpu(cpu)
set_power_scale(cpu, SCHED_POWER_SCALE);
- }
- smp_wmb();
+}
+
+void __init init_cpu_topology(void)
+{
+ reset_cpu_topology();
+
+ /*
+ * Discard anything that was parsed if we hit an error so we
+ * don't use partial information.
+ */
+ if (parse_dt_topology())
+ reset_cpu_topology();
- parse_dt_topology();
+ reset_cpu_power();
+ parse_dt_cpu_power();
}