diff options
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r-- | arch/arm64/kernel/Makefile | 5 | ||||
-rw-r--r-- | arch/arm64/kernel/cpu_ops.c | 99 | ||||
-rw-r--r-- | arch/arm64/kernel/cputable.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/head.S | 12 | ||||
-rw-r--r-- | arch/arm64/kernel/irq.c | 61 | ||||
-rw-r--r-- | arch/arm64/kernel/process.c | 7 | ||||
-rw-r--r-- | arch/arm64/kernel/psci.c | 87 | ||||
-rw-r--r-- | arch/arm64/kernel/setup.c | 7 | ||||
-rw-r--r-- | arch/arm64/kernel/smp.c | 233 | ||||
-rw-r--r-- | arch/arm64/kernel/smp_psci.c | 53 | ||||
-rw-r--r-- | arch/arm64/kernel/smp_spin_table.c | 86 | ||||
-rw-r--r-- | arch/arm64/kernel/topology.c | 537 | ||||
-rw-r--r-- | arch/arm64/kernel/vmlinux.lds.S | 1 |
13 files changed, 1018 insertions, 172 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7b4b564961d..2d145e38ad4 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -9,15 +9,16 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o + hyp-stub.o psci.o cpu_ops.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o +arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c new file mode 100644 index 00000000000..04efea8fe4b --- /dev/null +++ b/arch/arm64/kernel/cpu_ops.c @@ -0,0 +1,99 @@ +/* + * CPU kernel entry/exit control + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/cpu_ops.h> +#include <asm/smp_plat.h> +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/string.h> + +extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations cpu_psci_ops; + +const struct cpu_operations *cpu_ops[NR_CPUS]; + +static const struct cpu_operations *supported_cpu_ops[] __initconst = { +#ifdef CONFIG_SMP + &smp_spin_table_ops, + &cpu_psci_ops, +#endif + NULL, +}; + +static const struct cpu_operations * __init cpu_get_ops(const char *name) +{ + const struct cpu_operations **ops = supported_cpu_ops; + + while (*ops) { + if (!strcmp(name, (*ops)->name)) + return *ops; + + ops++; + } + + return NULL; +} + +/* + * Read a cpu's enable method from the device tree and record it in cpu_ops. + */ +int __init cpu_read_ops(struct device_node *dn, int cpu) +{ + const char *enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method) { + /* + * The boot CPU may not have an enable method (e.g. when + * spin-table is used for secondaries). Don't warn spuriously. + */ + if (cpu != 0) + pr_err("%s: missing enable-method property\n", + dn->full_name); + return -ENOENT; + } + + cpu_ops[cpu] = cpu_get_ops(enable_method); + if (!cpu_ops[cpu]) { + pr_warn("%s: unsupported enable-method property: %s\n", + dn->full_name, enable_method); + return -EOPNOTSUPP; + } + + return 0; +} + +void __init cpu_read_bootcpu_ops(void) +{ + struct device_node *dn = NULL; + u64 mpidr = cpu_logical_map(0); + + while ((dn = of_find_node_by_type(dn, "cpu"))) { + u64 hwid; + const __be32 *prop; + + prop = of_get_property(dn, "reg", NULL); + if (!prop) + continue; + + hwid = of_read_number(prop, of_n_addr_cells(dn)); + if (hwid == mpidr) { + cpu_read_ops(dn, 0); + of_node_put(dn); + return; + } + } +} diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c index 63cfc4a43f4..fd3993cb060 100644 --- a/arch/arm64/kernel/cputable.c +++ b/arch/arm64/kernel/cputable.c @@ -22,7 +22,7 @@ extern unsigned long __cpu_setup(void); -struct cpu_info __initdata cpu_table[] = { +struct cpu_info cpu_table[] = { { .cpu_id_val = 0x000f0000, .cpu_id_mask = 0x000f0000, diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 53dcae49e72..3532ca61371 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -217,7 +217,6 @@ ENTRY(__boot_cpu_mode) .quad PAGE_OFFSET #ifdef CONFIG_SMP - .pushsection .smp.pen.text, "ax" .align 3 1: .quad . .quad secondary_holding_pen_release @@ -242,7 +241,16 @@ pen: ldr x4, [x3] wfe b pen ENDPROC(secondary_holding_pen) - .popsection + + /* + * Secondary entry point that jumps straight into the kernel. Only to + * be used where CPUs are brought online dynamically by the kernel. + */ +ENTRY(secondary_entry) + bl __calc_phys_offset // x2=phys offset + bl el2_setup // Drop to EL1 + b secondary_startup +ENDPROC(secondary_entry) ENTRY(secondary_startup) /* diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index ecb3354292e..473e5dbf8f3 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -81,3 +81,64 @@ void __init init_IRQ(void) if (!handle_arch_irq) panic("No interrupt controller found."); } + +#ifdef CONFIG_HOTPLUG_CPU +static bool migrate_one_irq(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + const struct cpumask *affinity = d->affinity; + struct irq_chip *c; + bool ret = false; + + /* + * If this is a per-CPU interrupt, or the affinity does not + * include this CPU, then we have nothing to do. + */ + if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) + return false; + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + affinity = cpu_online_mask; + ret = true; + } + + c = irq_data_get_irq_chip(d); + if (!c->irq_set_affinity) + pr_debug("IRQ%u: unable to set affinity\n", d->irq); + else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + cpumask_copy(d->affinity, affinity); + + return ret; +} + +/* + * The current CPU has been marked offline. Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any + * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. + */ +void migrate_irqs(void) +{ + unsigned int i; + struct irq_desc *desc; + unsigned long flags; + + local_irq_save(flags); + + for_each_irq_desc(i, desc) { + bool affinity_broken; + + raw_spin_lock(&desc->lock); + affinity_broken = migrate_one_irq(desc); + raw_spin_unlock(&desc->lock); + + if (affinity_broken) + pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", + i, smp_processor_id()); + } + + local_irq_restore(flags); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 46f02c3b501..4caf198ca44 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -102,6 +102,13 @@ void arch_cpu_idle(void) local_irq_enable(); } +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) +{ + cpu_die(); +} +#endif + void machine_shutdown(void) { #ifdef CONFIG_SMP diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 14f73c445ff..4f97db3d736 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -17,12 +17,32 @@ #include <linux/init.h> #include <linux/of.h> +#include <linux/smp.h> #include <asm/compiler.h> +#include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/psci.h> +#include <asm/smp_plat.h> -struct psci_operations psci_ops; +#define PSCI_POWER_STATE_TYPE_STANDBY 0 +#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 + +struct psci_power_state { + u16 id; + u8 type; + u8 affinity_level; +}; + +struct psci_operations { + int (*cpu_suspend)(struct psci_power_state state, + unsigned long entry_point); + int (*cpu_off)(struct psci_power_state state); + int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); + int (*migrate)(unsigned long cpuid); +}; + +static struct psci_operations psci_ops; static int (*invoke_psci_fn)(u64, u64, u64, u64); @@ -209,3 +229,68 @@ out_put_node: of_node_put(np); return err; } + +#ifdef CONFIG_SMP + +static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) +{ + return 0; +} + +static int __init cpu_psci_cpu_prepare(unsigned int cpu) +{ + if (!psci_ops.cpu_on) { + pr_err("no cpu_on method, not booting CPU%d\n", cpu); + return -ENODEV; + } + + return 0; +} + +static int cpu_psci_cpu_boot(unsigned int cpu) +{ + int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); + if (err) + pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); + + return err; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int cpu_psci_cpu_disable(unsigned int cpu) +{ + /* Fail early if we don't have CPU_OFF support */ + if (!psci_ops.cpu_off) + return -EOPNOTSUPP; + return 0; +} + +static void cpu_psci_cpu_die(unsigned int cpu) +{ + int ret; + /* + * There are no known implementations of PSCI actually using the + * power state field, pass a sensible default for now. + */ + struct psci_power_state state = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + ret = psci_ops.cpu_off(state); + + pr_crit("psci: unable to power off CPU%u (%d)\n", cpu, ret); +} +#endif + +const struct cpu_operations cpu_psci_ops = { + .name = "psci", + .cpu_init = cpu_psci_cpu_init, + .cpu_prepare = cpu_psci_cpu_prepare, + .cpu_boot = cpu_psci_cpu_boot, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = cpu_psci_cpu_disable, + .cpu_die = cpu_psci_cpu_die, +#endif +}; + +#endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index add6ea61684..85afdae9cc0 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -45,6 +45,7 @@ #include <asm/cputype.h> #include <asm/elf.h> #include <asm/cputable.h> +#include <asm/cpu_ops.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -97,6 +98,11 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpu_logical_map(cpu); +} + static void __init setup_processor(void) { struct cpu_info *cpu_info; @@ -269,6 +275,7 @@ void __init setup_arch(char **cmdline_p) psci_init(); cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; + cpu_read_bootcpu_ops(); #ifdef CONFIG_SMP smp_init_cpus(); #endif diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 9c93e126328..35085d66459 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/cpu_ops.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -48,13 +49,15 @@ #include <asm/tlbflush.h> #include <asm/ptrace.h> +#define CREATE_TRACE_POINTS +#include <trace/events/arm-ipi.h> + /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core * where to place its SVC stack */ struct secondary_data secondary_data; -volatile unsigned long secondary_holding_pen_release = INVALID_HWID; enum ipi_msg_type { IPI_RESCHEDULE, @@ -63,61 +66,16 @@ enum ipi_msg_type { IPI_CPU_STOP, }; -static DEFINE_RAW_SPINLOCK(boot_lock); - -/* - * Write secondary_holding_pen_release in a way that is guaranteed to be - * visible to all observers, irrespective of whether they're taking part - * in coherency or not. This is necessary for the hotplug code to work - * reliably. - */ -static void __cpuinit write_pen_release(u64 val) -{ - void *start = (void *)&secondary_holding_pen_release; - unsigned long size = sizeof(secondary_holding_pen_release); - - secondary_holding_pen_release = val; - __flush_dcache_area(start, size); -} - /* * Boot a secondary CPU, and assign it the specified idle task. * This also gives us the initial stack to use for this CPU. */ static int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) { - unsigned long timeout; + if (cpu_ops[cpu]->cpu_boot) + return cpu_ops[cpu]->cpu_boot(cpu); - /* - * Set synchronisation state between this boot processor - * and the secondary one - */ - raw_spin_lock(&boot_lock); - - /* - * Update the pen release flag. - */ - write_pen_release(cpu_logical_map(cpu)); - - /* - * Send an event, causing the secondaries to read pen_release. - */ - sev(); - - timeout = jiffies + (1 * HZ); - while (time_before(jiffies, timeout)) { - if (secondary_holding_pen_release == INVALID_HWID) - break; - udelay(10); - } - - /* - * Now the secondary core is starting up let it run its - * calibrations, then wait for it to finish - */ - raw_spin_unlock(&boot_lock); - - return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; + return -EOPNOTSUPP; } static DECLARE_COMPLETION(cpu_running); @@ -158,6 +116,11 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } +static void __cpuinit smp_store_cpu_info(unsigned int cpuid) +{ + store_cpu_topology(cpuid); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -187,17 +150,15 @@ asmlinkage void __cpuinit secondary_start_kernel(void) preempt_disable(); trace_hardirqs_off(); - /* - * Let the primary processor know we're out of the - * pen, then head off into the C entry point - */ - write_pen_release(INVALID_HWID); + if (cpu_ops[cpu]->cpu_postboot) + cpu_ops[cpu]->cpu_postboot(); + + smp_store_cpu_info(cpu); /* - * Synchronise with the boot thread. + * Enable GIC and timers. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + notify_cpu_starting(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for @@ -207,11 +168,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); - /* - * Enable GIC and timers. - */ - notify_cpu_starting(cpu); - local_irq_enable(); local_fiq_enable(); @@ -221,43 +177,117 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_startup_entry(CPUHP_ONLINE); } -void __init smp_cpus_done(unsigned int max_cpus) +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_disable(unsigned int cpu) { - unsigned long bogosum = loops_per_jiffy * num_online_cpus(); + /* + * If we don't have a cpu_die method, abort before we reach the point + * of no return. CPU0 may not have an cpu_ops, so test for it. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) + return -EOPNOTSUPP; - pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - num_online_cpus(), bogosum / (500000/HZ), - (bogosum / (5000/HZ)) % 100); + /* + * We may need to abort a hot unplug for some other mechanism-specific + * reason. + */ + if (cpu_ops[cpu]->cpu_disable) + return cpu_ops[cpu]->cpu_disable(cpu); + + return 0; } -void __init smp_prepare_boot_cpu(void) +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) { -} + unsigned int cpu = smp_processor_id(); + int ret; -static void (*smp_cross_call)(const struct cpumask *, unsigned int); + ret = op_cpu_disable(cpu); + if (ret) + return ret; -static const struct smp_enable_ops *enable_ops[] __initconst = { - &smp_spin_table_ops, - &smp_psci_ops, - NULL, -}; + /* + * Take this CPU offline. Once we clear this, we can't return, + * and we must not schedule until we're ready to give up the cpu. + */ + set_cpu_online(cpu, false); -static const struct smp_enable_ops *smp_enable_ops[NR_CPUS]; + /* + * OK - migrate IRQs away from this CPU + */ + migrate_irqs(); -static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name) -{ - const struct smp_enable_ops **ops = enable_ops; + /* + * Remove this CPU from the vm mask set of all processes. + */ + clear_tasks_mm_cpumask(cpu); + + return 0; +} - while (*ops) { - if (!strcmp(name, (*ops)->name)) - return *ops; +static DECLARE_COMPLETION(cpu_died); - ops++; +/* + * called on the thread which is asking for a CPU to be shutdown - + * waits until shutdown has completed, or it is timed out. + */ +void __cpu_die(unsigned int cpu) +{ + if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { + pr_crit("CPU%u: cpu didn't die\n", cpu); + return; } + pr_notice("CPU%u: shutdown\n", cpu); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + * + * Note that we disable IRQs here, but do not re-enable them + * before returning to the caller. This is also the behaviour + * of the other hotplug-cpu capable cores, so presumably coming + * out of idle fixes this. + */ +void cpu_die(void) +{ + unsigned int cpu = smp_processor_id(); + + idle_task_exit(); + + local_irq_disable(); + + /* Tell __cpu_die() that this CPU is now safe to dispose of */ + complete(&cpu_died); + + /* + * Actually shutdown the CPU. This must never fail. The specific hotplug + * mechanism must perform all required cache maintenance to ensure that + * no dirty lines are lost in the process of shutting down the CPU. + */ + cpu_ops[cpu]->cpu_die(cpu); + + BUG(); +} +#endif + +void __init smp_cpus_done(unsigned int max_cpus) +{ + unsigned long bogosum = loops_per_jiffy * num_online_cpus(); - return NULL; + pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + num_online_cpus(), bogosum / (500000/HZ), + (bogosum / (5000/HZ)) % 100); } +void __init smp_prepare_boot_cpu(void) +{ +} + +static void (*smp_cross_call)(const struct cpumask *, unsigned int); + /* * Enumerate the possible CPU set from the device tree and build the * cpu logical map array containing MPIDR values related to logical @@ -265,9 +295,8 @@ static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name) */ void __init smp_init_cpus(void) { - const char *enable_method; struct device_node *dn = NULL; - int i, cpu = 1; + unsigned int i, cpu = 1; bool bootcpu_valid = false; while ((dn = of_find_node_by_type(dn, "cpu"))) { @@ -336,25 +365,10 @@ void __init smp_init_cpus(void) if (cpu >= NR_CPUS) goto next; - /* - * We currently support only the "spin-table" enable-method. - */ - enable_method = of_get_property(dn, "enable-method", NULL); - if (!enable_method) { - pr_err("%s: missing enable-method property\n", - dn->full_name); + if (cpu_read_ops(dn, cpu) != 0) goto next; - } - - smp_enable_ops[cpu] = smp_get_enable_ops(enable_method); - if (!smp_enable_ops[cpu]) { - pr_err("%s: invalid enable-method property: %s\n", - dn->full_name, enable_method); - goto next; - } - - if (smp_enable_ops[cpu]->init_cpu(dn, cpu)) + if (cpu_ops[cpu]->cpu_init(dn, cpu)) goto next; pr_debug("cpu logical map 0x%llx\n", hwid); @@ -384,8 +398,13 @@ next: void __init smp_prepare_cpus(unsigned int max_cpus) { - int cpu, err; - unsigned int ncores = num_possible_cpus(); + int err; + unsigned int cpu, ncores = num_possible_cpus(); + + init_cpu_topology(); + + smp_store_cpu_info(smp_processor_id()); + /* * are we trying to boot more cores than exist? @@ -412,10 +431,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (cpu == smp_processor_id()) continue; - if (!smp_enable_ops[cpu]) + if (!cpu_ops[cpu]) continue; - err = smp_enable_ops[cpu]->prepare_cpu(cpu); + err = cpu_ops[cpu]->cpu_prepare(cpu); if (err) continue; diff --git a/arch/arm64/kernel/smp_psci.c b/arch/arm64/kernel/smp_psci.c deleted file mode 100644 index 0c533301be7..00000000000 --- a/arch/arm64/kernel/smp_psci.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * PSCI SMP initialisation - * - * Copyright (C) 2013 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/init.h> -#include <linux/of.h> -#include <linux/smp.h> - -#include <asm/psci.h> -#include <asm/smp_plat.h> - -static int __init smp_psci_init_cpu(struct device_node *dn, int cpu) -{ - return 0; -} - -static int __init smp_psci_prepare_cpu(int cpu) -{ - int err; - - if (!psci_ops.cpu_on) { - pr_err("psci: no cpu_on method, not booting CPU%d\n", cpu); - return -ENODEV; - } - - err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen)); - if (err) { - pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); - return err; - } - - return 0; -} - -const struct smp_enable_ops smp_psci_ops __initconst = { - .name = "psci", - .init_cpu = smp_psci_init_cpu, - .prepare_cpu = smp_psci_prepare_cpu, -}; diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 7c35fa682f7..27f08367a6e 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -16,15 +16,39 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/delay.h> #include <linux/init.h> #include <linux/of.h> #include <linux/smp.h> #include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/cputype.h> +#include <asm/smp_plat.h> + +extern void secondary_holding_pen(void); +volatile unsigned long secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; +static DEFINE_RAW_SPINLOCK(boot_lock); -static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) +/* + * Write secondary_holding_pen_release in a way that is guaranteed to be + * visible to all observers, irrespective of whether they're taking part + * in coherency or not. This is necessary for the hotplug code to work + * reliably. + */ +static void write_pen_release(u64 val) +{ + void *start = (void *)&secondary_holding_pen_release; + unsigned long size = sizeof(secondary_holding_pen_release); + + secondary_holding_pen_release = val; + __flush_dcache_area(start, size); +} + + +static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) { /* * Determine the address from which the CPU is polling. @@ -40,7 +64,7 @@ static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) return 0; } -static int __init smp_spin_table_prepare_cpu(int cpu) +static int smp_spin_table_cpu_prepare(unsigned int cpu) { void **release_addr; @@ -59,8 +83,60 @@ static int __init smp_spin_table_prepare_cpu(int cpu) return 0; } -const struct smp_enable_ops smp_spin_table_ops __initconst = { +static int smp_spin_table_cpu_boot(unsigned int cpu) +{ + unsigned long timeout; + + /* + * Set synchronisation state between this boot processor + * and the secondary one + */ + raw_spin_lock(&boot_lock); + + /* + * Update the pen release flag. + */ + write_pen_release(cpu_logical_map(cpu)); + + /* + * Send an event, causing the secondaries to read pen_release. + */ + sev(); + + timeout = jiffies + (1 * HZ); + while (time_before(jiffies, timeout)) { + if (secondary_holding_pen_release == INVALID_HWID) + break; + udelay(10); + } + + /* + * Now the secondary core is starting up let it run its + * calibrations, then wait for it to finish + */ + raw_spin_unlock(&boot_lock); + + return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; +} + +void smp_spin_table_cpu_postboot(void) +{ + /* + * Let the primary processor know we're out of the pen. + */ + write_pen_release(INVALID_HWID); + + /* + * Synchronise with the boot thread. + */ + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); +} + +const struct cpu_operations smp_spin_table_ops = { .name = "spin-table", - .init_cpu = smp_spin_table_init_cpu, - .prepare_cpu = smp_spin_table_prepare_cpu, + .cpu_init = smp_spin_table_cpu_init, + .cpu_prepare = smp_spin_table_cpu_prepare, + .cpu_boot = smp_spin_table_cpu_boot, + .cpu_postboot = smp_spin_table_cpu_postboot, }; diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 00000000000..971064a0c6b --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,537 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2011,2013 Linaro Limited. + * Written by: Vincent Guittot + * + * based on arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cputype.h> +#include <asm/smp_plat.h> +#include <asm/topology.h> + +/* + * cpu power scale management + */ + +/* + * cpu power table + * This per cpu data structure describes the relative capacity of each core. + * On a heteregenous system, cores don't have the same computation capacity + * and we reflect that difference in the cpu_power field so the scheduler can + * take this difference into account during load balance. A per cpu structure + * is preferred because each CPU updates its own cpu_power field during the + * load balance except for idle cores. One idle core is selected to run the + * rebalance_domains for all idle cores and the cpu_power can be updated + * during this sequence. + */ +static DEFINE_PER_CPU(unsigned long, cpu_scale); + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_power_scale(unsigned int cpu, unsigned long power) +{ + per_cpu(cpu_scale, cpu) = power; +} + +#ifdef CONFIG_OF +struct cpu_efficiency { + const char *compatible; + unsigned long efficiency; +}; + +/* + * Table of relative efficiency of each processors + * The efficiency value must fit in 20bit and the final + * cpu_scale value must be in the range + * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * in order to return at most 1 when DIV_ROUND_CLOSEST + * is used to compute the capacity of a CPU. + * Processors that are not defined in the table, + * use the default SCHED_POWER_SCALE value for cpu_scale. + */ +static const struct cpu_efficiency table_efficiency[] = { + { "arm,cortex-a57", 3891 }, + { "arm,cortex-a53", 2048 }, + { NULL, }, +}; + +static unsigned long *__cpu_capacity; +#define cpu_capacity(cpu) __cpu_capacity[cpu] + +static unsigned long middle_capacity = 1; +static int cluster_id; + +static int __init get_cpu_for_node(struct device_node *node) +{ + struct device_node *cpu_node; + int cpu; + + cpu_node = of_parse_phandle(node, "cpu", 0); + if (!cpu_node) { + pr_crit("%s: Unable to parse CPU phandle\n", node->full_name); + return -1; + } + + for_each_possible_cpu(cpu) { + if (of_get_cpu_node(cpu, NULL) == cpu_node) + return cpu; + } + + pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + return -1; +} + +static void __init parse_core(struct device_node *core, int core_id) +{ + char name[10]; + bool leaf = true; + int i, cpu; + struct device_node *t; + + i = 0; + do { + snprintf(name, sizeof(name), "thread%d", i); + t = of_get_child_by_name(core, name); + if (t) { + leaf = false; + cpu = get_cpu_for_node(t); + if (cpu) { + pr_info("CPU%d: socket %d core %d thread %d\n", + cpu, cluster_id, core_id, i); + cpu_topology[cpu].socket_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + cpu_topology[cpu].thread_id = i; + } else { + pr_err("%s: Can't get CPU for thread\n", + t->full_name); + } + } + i++; + } while (t); + + cpu = get_cpu_for_node(core); + if (cpu >= 0) { + if (!leaf) { + pr_err("%s: Core has both threads and CPU\n", + core->full_name); + return; + } + + pr_info("CPU%d: socket %d core %d\n", + cpu, cluster_id, core_id); + cpu_topology[cpu].socket_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + } else if (leaf) { + pr_err("%s: Can't get CPU for leaf core\n", core->full_name); + } +} + +static void __init parse_cluster(struct device_node *cluster) +{ + char name[10]; + bool leaf = true; + bool has_cores = false; + struct device_node *c; + int core_id = 0; + int i; + + /* + * First check for child clusters; we currently ignore any + * information about the nesting of clusters and present the + * scheduler with a flat list of them. + */ + i = 0; + do { + snprintf(name, sizeof(name), "cluster%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + parse_cluster(c); + leaf = false; + } + i++; + } while (c); + + /* Now check for cores */ + i = 0; + do { + snprintf(name, sizeof(name), "core%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + has_cores = true; + + if (leaf) + parse_core(c, core_id++); + else + pr_err("%s: Non-leaf cluster with core %s\n", + cluster->full_name, name); + } + i++; + } while (c); + + if (leaf && !has_cores) + pr_warn("%s: empty cluster\n", cluster->full_name); + + if (leaf) + cluster_id++; +} + +/* + * Iterate all CPUs' descriptor in DT and compute the efficiency + * (as per table_efficiency). Also calculate a middle efficiency + * as close as possible to (max{eff_i} - min{eff_i}) / 2 + * This is later used to scale the cpu_power field such that an + * 'average' CPU is of middle power. Also see the comments near + * table_efficiency[] and update_cpu_power(). + */ +static void __init parse_dt_topology(void) +{ + const struct cpu_efficiency *cpu_eff; + struct device_node *cn = NULL; + unsigned long min_capacity = (unsigned long)(-1); + unsigned long max_capacity = 0; + unsigned long capacity = 0; + int alloc_size, cpu; + + alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity); + __cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT); + + cn = of_find_node_by_path("/cpus"); + if (!cn) { + pr_err("No CPU information found in DT\n"); + return; + } + + /* + * If topology is provided as a cpu-map it is essentially a + * root cluster. + */ + cn = of_find_node_by_name(cn, "cpu-map"); + if (!cn) + return; + parse_cluster(cn); + + for_each_possible_cpu(cpu) { + const u32 *rate; + int len; + + /* Too early to use cpu->of_node */ + cn = of_get_cpu_node(cpu, NULL); + if (!cn) { + pr_err("Missing device node for CPU %d\n", cpu); + continue; + } + + /* check if the cpu is marked as "disabled", if so ignore */ + if (!of_device_is_available(cn)) + continue; + + for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) + if (of_device_is_compatible(cn, cpu_eff->compatible)) + break; + + if (cpu_eff->compatible == NULL) { + pr_warn("%s: Unknown CPU type\n", cn->full_name); + continue; + } + + rate = of_get_property(cn, "clock-frequency", &len); + if (!rate || len != 4) { + pr_err("%s: Missing clock-frequency property\n", + cn->full_name); + continue; + } + + capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; + + /* Save min capacity of the system */ + if (capacity < min_capacity) + min_capacity = capacity; + + /* Save max capacity of the system */ + if (capacity > max_capacity) + max_capacity = capacity; + + cpu_capacity(cpu) = capacity; + } + + /* If min and max capacities are equal we bypass the update of the + * cpu_scale because all CPUs have the same capacity. Otherwise, we + * compute a middle_capacity factor that will ensure that the capacity + * of an 'average' CPU of the system will be as close as possible to + * SCHED_POWER_SCALE, which is the default value, but with the + * constraint explained near table_efficiency[]. + */ + if (min_capacity == max_capacity) + return; + else if (4 * max_capacity < (3 * (max_capacity + min_capacity))) + middle_capacity = (min_capacity + max_capacity) + >> (SCHED_POWER_SHIFT+1); + else + middle_capacity = ((max_capacity / 3) + >> (SCHED_POWER_SHIFT-1)) + 1; + +} + +/* + * Look for a customed capacity of a CPU in the cpu_topo_data table during the + * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the + * function returns directly for SMP system. + */ +static void update_cpu_power(unsigned int cpu) +{ + if (!cpu_capacity(cpu)) + return; + + set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); + + pr_info("CPU%u: update cpu_power %lu\n", + cpu, arch_scale_freq_power(NULL, cpu)); +} + +#else +static inline void parse_dt_topology(void) {} +static inline void update_cpu_power(unsigned int cpuid) {} +#endif + +/* + * cpu topology table + */ +struct cputopo_arm cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->socket_id != cpu_topo->socket_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } + smp_wmb(); +} + +void store_cpu_topology(unsigned int cpuid) +{ + struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid]; + + /* DT should have been parsed by the time we get here */ + if (cpuid_topo->core_id == -1) + pr_info("CPU%u: No topology information configured\n", cpuid); + else + update_siblings_masks(cpuid); + + update_cpu_power(cpuid); +} + +#ifdef CONFIG_SCHED_HMP + +/* + * Retrieve logical cpu index corresponding to a given MPIDR[23:0] + * - mpidr: MPIDR[23:0] to be used for the look-up + * + * Returns the cpu logical index or -EINVAL on look-up error + */ +static inline int get_logical_index(u32 mpidr) +{ + int cpu; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpu_logical_map(cpu) == mpidr) + return cpu; + return -EINVAL; +} + +static const char * const little_cores[] = { + "arm,cortex-a53", + NULL, +}; + +static bool is_little_cpu(struct device_node *cn) +{ + const char * const *lc; + for (lc = little_cores; *lc; lc++) + if (of_device_is_compatible(cn, *lc)) + return true; + return false; +} + +void __init arch_get_fast_and_slow_cpus(struct cpumask *fast, + struct cpumask *slow) +{ + struct device_node *cn = NULL; + int cpu; + + cpumask_clear(fast); + cpumask_clear(slow); + + /* + * Use the config options if they are given. This helps testing + * HMP scheduling on systems without a big.LITTLE architecture. + */ + if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) { + if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast)) + WARN(1, "Failed to parse HMP fast cpu mask!\n"); + if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow)) + WARN(1, "Failed to parse HMP slow cpu mask!\n"); + return; + } + + /* + * Else, parse device tree for little cores. + */ + while ((cn = of_find_node_by_type(cn, "cpu"))) { + + const u32 *mpidr; + int len; + + mpidr = of_get_property(cn, "reg", &len); + if (!mpidr || len != 8) { + pr_err("%s missing reg property\n", cn->full_name); + continue; + } + + cpu = get_logical_index(be32_to_cpup(mpidr+1)); + if (cpu == -EINVAL) { + pr_err("couldn't get logical index for mpidr %x\n", + be32_to_cpup(mpidr+1)); + break; + } + + if (is_little_cpu(cn)) + cpumask_set_cpu(cpu, slow); + else + cpumask_set_cpu(cpu, fast); + } + + if (!cpumask_empty(fast) && !cpumask_empty(slow)) + return; + + /* + * We didn't find both big and little cores so let's call all cores + * fast as this will keep the system running, with all cores being + * treated equal. + */ + cpumask_setall(fast); + cpumask_clear(slow); +} + +struct cpumask hmp_slow_cpu_mask; + +void __init arch_get_hmp_domains(struct list_head *hmp_domains_list) +{ + struct cpumask hmp_fast_cpu_mask; + struct hmp_domain *domain; + + arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask); + + /* + * Initialize hmp_domains + * Must be ordered with respect to compute capacity. + * Fastest domain at head of list. + */ + if(!cpumask_empty(&hmp_slow_cpu_mask)) { + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); + } + domain = (struct hmp_domain *) + kmalloc(sizeof(struct hmp_domain), GFP_KERNEL); + cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask); + cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus); + list_add(&domain->hmp_domains, hmp_domains_list); +} +#endif /* CONFIG_SCHED_HMP */ + +/* + * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster + * @socket_id: cluster HW identifier + * @cluster_mask: the cpumask location to be initialized, modified by the + * function only if return value == 0 + * + * Return: + * + * 0 on success + * -EINVAL if cluster_mask is NULL or there is no record matching socket_id + */ +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask) +{ + int cpu; + + if (!cluster_mask) + return -EINVAL; + + for_each_online_cpu(cpu) { + if (socket_id == topology_physical_package_id(cpu)) { + cpumask_copy(cluster_mask, topology_core_cpumask(cpu)); + return 0; + } + } + + return -EINVAL; +} + +/* + * init_cpu_topology is called at boot when only one cpu is running + * which prevent simultaneous write access to cpu_topology array + */ +void __init init_cpu_topology(void) +{ + unsigned int cpu; + + /* init core mask and power*/ + for_each_possible_cpu(cpu) { + struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); + + cpu_topo->thread_id = -1; + cpu_topo->core_id = -1; + cpu_topo->socket_id = -1; + cpumask_clear(&cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + + set_power_scale(cpu, SCHED_POWER_SCALE); + } + smp_wmb(); + + parse_dt_topology(); +} diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3fae2be8b01..2c8a95b539c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -41,7 +41,6 @@ SECTIONS } .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ - *(.smp.pen.text) __exception_text_start = .; *(.exception.text) __exception_text_end = .; |