diff options
-rw-r--r-- | kernel/sched/core.c | 11 | ||||
-rw-r--r-- | kernel/sched/fair.c | 142 | ||||
-rw-r--r-- | kernel/sched/sched.h | 1 |
3 files changed, 142 insertions, 12 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3e326f9208f..5f242330ef8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1407,7 +1407,11 @@ void scheduler_ipi(void) { if (llist_empty(&this_rq()->wake_list) && !tick_nohz_full_cpu(smp_processor_id()) - && !got_nohz_idle_kick()) + && !got_nohz_idle_kick() +#ifdef CONFIG_SCHED_HMP + && !this_rq()->wake_for_idle_pull +#endif + ) return; /* @@ -1434,6 +1438,11 @@ void scheduler_ipi(void) this_rq()->idle_balance = 1; raise_softirq_irqoff(SCHED_SOFTIRQ); } +#ifdef CONFIG_SCHED_HMP + else if (unlikely(this_rq()->wake_for_idle_pull)) + raise_softirq_irqoff(SCHED_SOFTIRQ); +#endif + irq_exit(); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 345865ec285..494a2534c37 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -39,6 +39,9 @@ */ #include <linux/cpufreq.h> #endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ +#ifdef CONFIG_SCHED_HMP +#include <linux/cpuidle.h> +#endif #include "sched.h" @@ -3514,6 +3517,110 @@ static const int hmp_max_tasks = 5; extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list); +#ifdef CONFIG_CPU_IDLE +/* + * hmp_idle_pull: + * + * In this version we have stopped using forced up migrations when we + * detect that a task running on a little CPU should be moved to a bigger + * CPU. In most cases, the bigger CPU is in a deep sleep state and a forced + * migration means we stop the task immediately but need to wait for the + * target CPU to wake up before we can restart the task which is being + * moved. Instead, we now wake a big CPU with an IPI and ask it to pull + * a task when ready. This allows the task to continue executing on its + * current CPU, reducing the amount of time that the task is stalled for. + * + * keepalive timers: + * + * The keepalive timer is used as a way to keep a CPU engaged in an + * idle pull operation out of idle while waiting for the source + * CPU to stop and move the task. Ideally this would not be necessary + * and we could impose a temporary zero-latency requirement on the + * current CPU, but in the current QoS framework this will result in + * all CPUs in the system being unable to enter idle states which is + * not desirable. The timer does not perform any work when it expires. + */ +struct hmp_keepalive { + bool init; + ktime_t delay; /* if zero, no need for timer */ + struct hrtimer timer; +}; +DEFINE_PER_CPU(struct hmp_keepalive, hmp_cpu_keepalive); + +/* setup per-cpu keepalive timers */ +static enum hrtimer_restart hmp_cpu_keepalive_notify(struct hrtimer *hrtimer) +{ + return HRTIMER_NORESTART; +} + +/* + * Work out if any of the idle states have an exit latency too high for us. + * ns_delay is passed in containing the max we are willing to tolerate. + * If there are none, set ns_delay to zero. + * If there are any, set ns_delay to + * ('target_residency of state with shortest too-big latency' - 1) * 1000. + */ +static void hmp_keepalive_delay(unsigned int *ns_delay) +{ + struct cpuidle_driver *drv; + drv = cpuidle_driver_ref(); + if (drv) { + unsigned int us_delay = UINT_MAX; + unsigned int us_max_delay = *ns_delay / 1000; + int idx; + /* if cpuidle states are guaranteed to be sorted we + * could stop at the first match. + */ + for (idx = 0; idx < drv->state_count; idx++) { + if (drv->states[idx].exit_latency > us_max_delay && + drv->states[idx].target_residency < us_delay) { + us_delay = drv->states[idx].target_residency; + } + } + if (us_delay == UINT_MAX) + *ns_delay = 0; /* no timer required */ + else + *ns_delay = 1000 * (us_delay - 1); + } + cpuidle_driver_unref(); +} + +static void hmp_cpu_keepalive_trigger(void) +{ + int cpu = smp_processor_id(); + struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu); + if (!keepalive->init) { + unsigned int ns_delay = 100000; /* tolerate 100usec delay */ + + hrtimer_init(&keepalive->timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + keepalive->timer.function = hmp_cpu_keepalive_notify; + + hmp_keepalive_delay(&ns_delay); + keepalive->delay = ns_to_ktime(ns_delay); + keepalive->init = true; + } + if (ktime_to_ns(keepalive->delay)) + hrtimer_start(&keepalive->timer, + keepalive->delay, HRTIMER_MODE_REL_PINNED); +} + +static void hmp_cpu_keepalive_cancel(int cpu) +{ + struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu); + if (keepalive->init) + hrtimer_cancel(&keepalive->timer); +} +#else /* !CONFIG_CPU_IDLE */ +static void hmp_cpu_keepalive_trigger(void) +{ +} + +static void hmp_cpu_keepalive_cancel(int cpu) +{ +} +#endif + /* Setup hmp_domains */ static int __init hmp_cpu_mask_setup(void) { @@ -3574,6 +3681,8 @@ static void hmp_offline_cpu(int cpu) if(domain) cpumask_clear_cpu(cpu, &domain->cpus); + + hmp_cpu_keepalive_cancel(cpu); } /* * Needed to determine heaviest tasks etc. @@ -7017,7 +7126,7 @@ static void hmp_force_up_migration(int this_cpu) target = cpu_rq(cpu); raw_spin_lock_irqsave(&target->lock, flags); curr = target->cfs.curr; - if (!curr) { + if (!curr || target->active_balance) { raw_spin_unlock_irqrestore(&target->lock, flags); continue; } @@ -7034,16 +7143,13 @@ static void hmp_force_up_migration(int this_cpu) curr = hmp_get_heaviest_task(curr, 1); p = task_of(curr); if (hmp_up_migration(cpu, &target_cpu, curr)) { - if (!target->active_balance) { - get_task_struct(p); - target->push_cpu = target_cpu; - target->migrate_task = p; - got_target = 1; - trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE); - hmp_next_up_delay(&p->se, target->push_cpu); - } + cpu_rq(target_cpu)->wake_for_idle_pull = 1; + raw_spin_unlock_irqrestore(&target->lock, flags); + spin_unlock(&hmp_force_migration); + smp_send_reschedule(target_cpu); + return; } - if (!got_target && !target->active_balance) { + if (!got_target) { /* * For now we just check the currently running task. * Selecting the lightest task for offloading will @@ -7065,7 +7171,7 @@ static void hmp_force_up_migration(int this_cpu) * is not currently running move it, otherwise let the * CPU stopper take care of it. */ - if (got_target && !target->active_balance) { + if (got_target) { if (!task_running(target, p)) { trace_sched_hmp_migrate_force_running(p, 0); hmp_migrate_runnable_task(target); @@ -7171,6 +7277,8 @@ static unsigned int hmp_idle_pull(int this_cpu) raw_spin_unlock_irqrestore(&target->lock, flags); if (force) { + /* start timer to keep us awake */ + hmp_cpu_keepalive_trigger(); stop_one_cpu_nowait(cpu_of(target), hmp_active_task_migration_cpu_stop, target, &target->active_balance_work); @@ -7194,6 +7302,18 @@ static void run_rebalance_domains(struct softirq_action *h) enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE; +#ifdef CONFIG_SCHED_HMP + /* shortcut for hmp idle pull wakeups */ + if (unlikely(this_rq->wake_for_idle_pull)) { + this_rq->wake_for_idle_pull = 0; + if (hmp_idle_pull(this_cpu)) { + /* break out unless running nohz idle as well */ + if (idle != CPU_IDLE) + return; + } + } +#endif + hmp_force_up_migration(this_cpu); rebalance_domains(this_cpu, idle); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 27f51ac8670..fbcda9339fc 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -466,6 +466,7 @@ struct rq { struct cpu_stop_work active_balance_work; #ifdef CONFIG_SCHED_HMP struct task_struct *migrate_task; + int wake_for_idle_pull; #endif /* cpu of this runqueue: */ int cpu; |