diff options
author | Mark Brown <broonie@linaro.org> | 2013-10-11 19:26:24 +0100 |
---|---|---|
committer | Mark Brown <broonie@linaro.org> | 2013-10-11 19:26:24 +0100 |
commit | fa4b900fcaa6092a43fd60fe7dd72f8df654bcde (patch) | |
tree | a093c2bf5bdeda9b4b18dd00c029904998a9fa0c /kernel/sched/fair.c | |
parent | a3dfd8c06351968f2ec42feb1ae8dfab8b481225 (diff) | |
parent | b574d25f35fb523c471535299ccab2bc7f6ed3ea (diff) |
Merge remote-tracking branch 'lsk/v3.10/topic/big.LITTLE' into linux-linaro-lsk
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 302 |
1 files changed, 220 insertions, 82 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1d171effac2..643da90f3a7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1226,11 +1226,7 @@ struct hmp_global_attr { int (*from_sysfs)(int); }; -#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE -#define HMP_DATA_SYSFS_MAX 4 -#else -#define HMP_DATA_SYSFS_MAX 3 -#endif +#define HMP_DATA_SYSFS_MAX 8 struct hmp_data_struct { #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE @@ -1688,6 +1684,7 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable) __update_tg_runnable_avg(&rq->avg, &rq->cfs); trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio); trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg); + trace_sched_rq_nr_running(cpu_of(rq), rq->nr_running, rq->nr_iowait.counter); } /* Add the load generated by se into cfs_rq's child load-average */ @@ -3664,25 +3661,46 @@ static struct sched_entity *hmp_get_lightest_task( * Migration thresholds should be in the range [0..1023] * hmp_up_threshold: min. load required for migrating tasks to a faster cpu * hmp_down_threshold: max. load allowed for tasks migrating to a slower cpu - * The default values (512, 256) offer good responsiveness, but may need - * tweaking suit particular needs. * * hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio) * hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms) * hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms) + * + * Small Task Packing: + * We can choose to fill the littlest CPUs in an HMP system rather than + * the typical spreading mechanic. This behavior is controllable using + * two variables. + * hmp_packing_enabled: runtime control over pack/spread + * hmp_full_threshold: Consider a CPU with this much unweighted load full */ -unsigned int hmp_up_threshold = 512; -unsigned int hmp_down_threshold = 256; +unsigned int hmp_up_threshold = 700; +unsigned int hmp_down_threshold = 512; #ifdef CONFIG_SCHED_HMP_PRIO_FILTER unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL); #endif unsigned int hmp_next_up_threshold = 4096; unsigned int hmp_next_down_threshold = 4096; +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING +unsigned int hmp_packing_enabled = 1; +#ifndef CONFIG_ARCH_VEXPRESS_TC2 +unsigned int hmp_full_threshold = (NICE_0_LOAD * 9) / 8; +#else +/* TC2 has a sharp consumption curve @ around 800Mhz, so + we aim to spread the load around that frequency. */ +unsigned int hmp_full_threshold = 650; /* 80% of the 800Mhz freq * NICE_0_LOAD */ +#endif +#endif + static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se); static unsigned int hmp_down_migration(int cpu, struct sched_entity *se); static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, - int *min_cpu); + int *min_cpu, struct cpumask *affinity); + +static inline struct hmp_domain *hmp_smallest_domain(void) +{ + return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains); +} /* Check if cpu is in fastest hmp_domain */ static inline unsigned int hmp_cpu_is_fastest(int cpu) @@ -3722,22 +3740,23 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu) /* * Selects a cpu in previous (faster) hmp_domain - * Note that cpumask_any_and() returns the first cpu in the cpumask */ static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk, int cpu) { int lowest_cpu=NR_CPUS; - __always_unused int lowest_ratio = hmp_domain_min_load(hmp_faster_domain(cpu), &lowest_cpu); - /* - * If the lowest-loaded CPU in the domain is allowed by the task affinity - * select that one, otherwise select one which is allowed - */ - if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk))) - return lowest_cpu; + __always_unused int lowest_ratio; + struct hmp_domain *hmp; + + if (hmp_cpu_is_fastest(cpu)) + hmp = hmp_cpu_domain(cpu); else - return cpumask_any_and(&hmp_faster_domain(cpu)->cpus, - tsk_cpus_allowed(tsk)); + hmp = hmp_faster_domain(cpu); + + lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu, + tsk_cpus_allowed(tsk)); + + return lowest_cpu; } /* @@ -3756,18 +3775,54 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk, else hmp = hmp_slower_domain(cpu); - lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu); - /* - * If the lowest-loaded CPU in the domain is allowed by the task affinity - * select that one, otherwise select one which is allowed - */ - if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk))) - return lowest_cpu; - else - return cpumask_any_and(&hmp_slower_domain(cpu)->cpus, - tsk_cpus_allowed(tsk)); + lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu, + tsk_cpus_allowed(tsk)); + + return lowest_cpu; } +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING +/* + * Select the 'best' candidate little CPU to wake up on. + * Implements a packing strategy which examines CPU in + * logical CPU order, and selects the first which will + * have at least 10% capacity available, according to + * both tracked load of the runqueue and the task. + */ +static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk, + int cpu) { + int tmp_cpu; + unsigned long estimated_load; + struct hmp_domain *hmp; + struct sched_avg *avg; + struct cpumask allowed_hmp_cpus; + + if(!hmp_packing_enabled || + tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100)) + return hmp_select_slower_cpu(tsk, cpu); + + if (hmp_cpu_is_slowest(cpu)) + hmp = hmp_cpu_domain(cpu); + else + hmp = hmp_slower_domain(cpu); + /* respect affinity */ + cpumask_and(&allowed_hmp_cpus, &hmp->cpus, + tsk_cpus_allowed(tsk)); + + for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) { + avg = &cpu_rq(tmp_cpu)->avg; + /* estimate new rq load if we add this task */ + estimated_load = avg->load_avg_ratio + + tsk->se.avg.load_avg_ratio; + if (estimated_load <= hmp_full_threshold) { + cpu = tmp_cpu; + break; + } + } + /* if no match was found, the task uses the initial value */ + return cpu; +} +#endif static inline void hmp_next_up_delay(struct sched_entity *se, int cpu) { /* hack - always use clock from first online CPU */ @@ -3891,6 +3946,15 @@ static int hmp_freqinvar_from_sysfs(int value) return value; } #endif +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING +/* packing value must be non-negative */ +static int hmp_packing_from_sysfs(int value) +{ + if (value < 0) + return -1; + return value; +} +#endif static void hmp_attr_add( const char *name, int *value, @@ -3943,6 +4007,16 @@ static int hmp_attr_init(void) NULL, hmp_freqinvar_from_sysfs); #endif +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING + hmp_attr_add("packing_enable", + &hmp_packing_enabled, + NULL, + hmp_freqinvar_from_sysfs); + hmp_attr_add("packing_limit", + &hmp_full_threshold, + NULL, + hmp_packing_from_sysfs); +#endif hmp_data.attr_group.name = "hmp"; hmp_data.attr_group.attrs = hmp_data.attributes; ret = sysfs_create_group(kernel_kobj, @@ -3951,9 +4025,24 @@ static int hmp_attr_init(void) } late_initcall(hmp_attr_init); #endif /* CONFIG_HMP_VARIABLE_SCALE */ - +/* + * return the load of the lowest-loaded CPU in a given HMP domain + * min_cpu optionally points to an int to receive the CPU. + * affinity optionally points to a cpumask containing the + * CPUs to be considered. note: + * + min_cpu = NR_CPUS only if no CPUs are in the set of + * affinity && hmp_domain cpus + * + min_cpu will always otherwise equal one of the CPUs in + * the hmp domain + * + when more than one CPU has the same load, the one which + * is least-recently-disturbed by an HMP migration will be + * selected + * + if all CPUs are equally loaded or idle and the times are + * all the same, the first in the set will be used + * + if affinity is not set, cpu_online_mask is used + */ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, - int *min_cpu) + int *min_cpu, struct cpumask *affinity) { int cpu; int min_cpu_runnable_temp = NR_CPUS; @@ -3962,8 +4051,15 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd, unsigned long min_runnable_load = INT_MAX; unsigned long contrib; struct sched_avg *avg; + struct cpumask temp_cpumask; + /* + * only look at CPUs allowed if specified, + * otherwise look at all online CPUs in the + * right HMP domain + */ + cpumask_and(&temp_cpumask, &hmpd->cpus, affinity ? affinity : cpu_online_mask); - for_each_cpu_mask(cpu, hmpd->cpus) { + for_each_cpu_mask(cpu, temp_cpumask) { avg = &cpu_rq(cpu)->avg; /* used for both up and down migration */ curr_last_migration = avg->hmp_last_up_migration ? @@ -4025,27 +4121,36 @@ static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se) return NR_CPUS; /* Is there an idle CPU in the current domain */ - min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL); - if (min_usage == 0) + min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL, NULL); + if (min_usage == 0) { + trace_sched_hmp_offload_abort(cpu, min_usage, "load"); return NR_CPUS; + } /* Is the task alone on the cpu? */ - if (cpu_rq(cpu)->cfs.h_nr_running < 2) + if (cpu_rq(cpu)->cfs.h_nr_running < 2) { + trace_sched_hmp_offload_abort(cpu, + cpu_rq(cpu)->cfs.h_nr_running, "nr_running"); return NR_CPUS; + } /* Is the task actually starving? */ /* >=25% ratio running/runnable = starving */ - if (hmp_task_starvation(se) > 768) + if (hmp_task_starvation(se) > 768) { + trace_sched_hmp_offload_abort(cpu, hmp_task_starvation(se), + "starvation"); return NR_CPUS; + } /* Does the slower domain have any idle CPUs? */ - min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu); - if (min_usage > 0) - return NR_CPUS; + min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu, + tsk_cpus_allowed(task_of(se))); - if (cpumask_test_cpu(dest_cpu, &hmp_slower_domain(cpu)->cpus)) + if (min_usage == 0) { + trace_sched_hmp_offload_succeed(cpu, dest_cpu); return dest_cpu; - + } else + trace_sched_hmp_offload_abort(cpu,min_usage,"slowdomain"); return NR_CPUS; } #endif /* CONFIG_SCHED_HMP */ @@ -4077,30 +4182,13 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) #ifdef CONFIG_SCHED_HMP /* always put non-kernel forking tasks on a big domain */ if (p->mm && (sd_flag & SD_BALANCE_FORK)) { - if(hmp_cpu_is_fastest(prev_cpu)) { - struct hmp_domain *hmpdom = list_entry(&hmp_cpu_domain(prev_cpu)->hmp_domains, struct hmp_domain, hmp_domains); - __always_unused int lowest_ratio = hmp_domain_min_load(hmpdom, &new_cpu); - if (new_cpu != NR_CPUS && - cpumask_test_cpu(new_cpu, - tsk_cpus_allowed(p))) { - hmp_next_up_delay(&p->se, new_cpu); - return new_cpu; - } else { - new_cpu = cpumask_any_and( - &hmp_faster_domain(cpu)->cpus, - tsk_cpus_allowed(p)); - if (new_cpu < nr_cpu_ids) { - hmp_next_up_delay(&p->se, new_cpu); - return new_cpu; - } - } - } else { - new_cpu = hmp_select_faster_cpu(p, prev_cpu); - if (new_cpu != NR_CPUS) { - hmp_next_up_delay(&p->se, new_cpu); - return new_cpu; - } + new_cpu = hmp_select_faster_cpu(p, prev_cpu); + if (new_cpu != NR_CPUS) { + hmp_next_up_delay(&p->se, new_cpu); + return new_cpu; } + /* failed to perform HMP fork balance, use normal balance */ + new_cpu = cpu; } #endif @@ -4179,16 +4267,24 @@ unlock: rcu_read_unlock(); #ifdef CONFIG_SCHED_HMP + prev_cpu = task_cpu(p); + if (hmp_up_migration(prev_cpu, &new_cpu, &p->se)) { hmp_next_up_delay(&p->se, new_cpu); - trace_sched_hmp_migrate(p, new_cpu, 0); + trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP); return new_cpu; } if (hmp_down_migration(prev_cpu, &p->se)) { +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING + new_cpu = hmp_best_little_cpu(p, prev_cpu); +#else new_cpu = hmp_select_slower_cpu(p, prev_cpu); - hmp_next_down_delay(&p->se, new_cpu); - trace_sched_hmp_migrate(p, new_cpu, 0); - return new_cpu; +#endif + if (new_cpu != prev_cpu) { + hmp_next_down_delay(&p->se, new_cpu); + trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP); + return new_cpu; + } } /* Make sure that the task stays in its previous hmp domain */ if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus)) @@ -6155,16 +6251,49 @@ static struct { unsigned long next_balance; /* in jiffy units */ } nohz ____cacheline_aligned; +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING +/* + * Decide if the tasks on the busy CPUs in the + * littlest domain would benefit from an idle balance + */ +static int hmp_packing_ilb_needed(int cpu) +{ + struct hmp_domain *hmp; + /* always allow ilb on non-slowest domain */ + if (!hmp_cpu_is_slowest(cpu)) + return 1; + + hmp = hmp_cpu_domain(cpu); + for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) { + /* only idle balance if a CPU is loaded over threshold */ + if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold) + return 1; + } + return 0; +} +#endif + static inline int find_new_ilb(int call_cpu) { int ilb = cpumask_first(nohz.idle_cpus_mask); #ifdef CONFIG_SCHED_HMP + int ilb_needed = 1; + /* restrict nohz balancing to occur in the same hmp domain */ ilb = cpumask_first_and(nohz.idle_cpus_mask, &((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus); + +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING + if (ilb < nr_cpu_ids) + ilb_needed = hmp_packing_ilb_needed(ilb); #endif + + if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb)) + return ilb; +#else if (ilb < nr_cpu_ids && idle_cpu(ilb)) return ilb; +#endif return nr_cpu_ids; } @@ -6490,11 +6619,9 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { } static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se) { struct task_struct *p = task_of(se); + int temp_target_cpu; u64 now; - if (target_cpu) - *target_cpu = NR_CPUS; - if (hmp_cpu_is_fastest(cpu)) return 0; @@ -6517,13 +6644,12 @@ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_enti * idle CPU or 1023 for any partly-busy one. * Be explicit about requirement for an idle CPU. */ - if (hmp_domain_min_load(hmp_faster_domain(cpu), target_cpu) != 0) - return 0; - - if (cpumask_intersects(&hmp_faster_domain(cpu)->cpus, - tsk_cpus_allowed(p))) + if (hmp_domain_min_load(hmp_faster_domain(cpu), &temp_target_cpu, + tsk_cpus_allowed(p)) == 0 && temp_target_cpu != NR_CPUS) { + if(target_cpu) + *target_cpu = temp_target_cpu; return 1; - + } return 0; } @@ -6533,8 +6659,14 @@ static unsigned int hmp_down_migration(int cpu, struct sched_entity *se) struct task_struct *p = task_of(se); u64 now; - if (hmp_cpu_is_slowest(cpu)) + if (hmp_cpu_is_slowest(cpu)) { +#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING + if(hmp_packing_enabled) + return 1; + else +#endif return 0; + } #ifdef CONFIG_SCHED_HMP_PRIO_FILTER /* Filter by task priority */ @@ -6703,6 +6835,7 @@ static int hmp_active_task_migration_cpu_stop(void *data) rcu_read_unlock(); double_unlock_balance(busiest_rq, target_rq); out_unlock: + put_task_struct(p); busiest_rq->active_balance = 0; raw_spin_unlock_irq(&busiest_rq->lock); return 0; @@ -6776,6 +6909,7 @@ static int hmp_idle_pull_cpu_stop(void *data) rcu_read_unlock(); double_unlock_balance(busiest_rq, target_rq); out_unlock: + put_task_struct(p); busiest_rq->active_balance = 0; raw_spin_unlock_irq(&busiest_rq->lock); return 0; @@ -6821,11 +6955,12 @@ static void hmp_force_up_migration(int this_cpu) p = task_of(curr); if (hmp_up_migration(cpu, &target_cpu, curr)) { if (!target->active_balance) { + get_task_struct(p); target->active_balance = 1; target->push_cpu = target_cpu; target->migrate_task = p; force = 1; - trace_sched_hmp_migrate(p, target->push_cpu, 1); + trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE); hmp_next_up_delay(&p->se, target->push_cpu); } } @@ -6836,12 +6971,14 @@ static void hmp_force_up_migration(int this_cpu) * require extensive book keeping. */ curr = hmp_get_lightest_task(orig, 1); + p = task_of(curr); target->push_cpu = hmp_offload_down(cpu, curr); if (target->push_cpu < NR_CPUS) { + get_task_struct(p); target->active_balance = 1; target->migrate_task = p; force = 1; - trace_sched_hmp_migrate(p, target->push_cpu, 2); + trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_OFFLOAD); hmp_next_down_delay(&p->se, target->push_cpu); } } @@ -6916,11 +7053,12 @@ static unsigned int hmp_idle_pull(int this_cpu) /* now we have a candidate */ raw_spin_lock_irqsave(&target->lock, flags); if (!target->active_balance && task_rq(p) == target) { + get_task_struct(p); target->active_balance = 1; target->push_cpu = this_cpu; target->migrate_task = p; force = 1; - trace_sched_hmp_migrate(p, target->push_cpu, 3); + trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_IDLE_PULL); hmp_next_up_delay(&p->se, target->push_cpu); } raw_spin_unlock_irqrestore(&target->lock, flags); |