aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
authorMark Brown <broonie@linaro.org>2013-10-11 19:26:24 +0100
committerMark Brown <broonie@linaro.org>2013-10-11 19:26:24 +0100
commitfa4b900fcaa6092a43fd60fe7dd72f8df654bcde (patch)
treea093c2bf5bdeda9b4b18dd00c029904998a9fa0c /kernel/sched/fair.c
parenta3dfd8c06351968f2ec42feb1ae8dfab8b481225 (diff)
parentb574d25f35fb523c471535299ccab2bc7f6ed3ea (diff)
Merge remote-tracking branch 'lsk/v3.10/topic/big.LITTLE' into linux-linaro-lsk
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c302
1 files changed, 220 insertions, 82 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1d171effac2..643da90f3a7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1226,11 +1226,7 @@ struct hmp_global_attr {
int (*from_sysfs)(int);
};
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-#define HMP_DATA_SYSFS_MAX 4
-#else
-#define HMP_DATA_SYSFS_MAX 3
-#endif
+#define HMP_DATA_SYSFS_MAX 8
struct hmp_data_struct {
#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
@@ -1688,6 +1684,7 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
__update_tg_runnable_avg(&rq->avg, &rq->cfs);
trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio);
trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg);
+ trace_sched_rq_nr_running(cpu_of(rq), rq->nr_running, rq->nr_iowait.counter);
}
/* Add the load generated by se into cfs_rq's child load-average */
@@ -3664,25 +3661,46 @@ static struct sched_entity *hmp_get_lightest_task(
* Migration thresholds should be in the range [0..1023]
* hmp_up_threshold: min. load required for migrating tasks to a faster cpu
* hmp_down_threshold: max. load allowed for tasks migrating to a slower cpu
- * The default values (512, 256) offer good responsiveness, but may need
- * tweaking suit particular needs.
*
* hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio)
* hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms)
* hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms)
+ *
+ * Small Task Packing:
+ * We can choose to fill the littlest CPUs in an HMP system rather than
+ * the typical spreading mechanic. This behavior is controllable using
+ * two variables.
+ * hmp_packing_enabled: runtime control over pack/spread
+ * hmp_full_threshold: Consider a CPU with this much unweighted load full
*/
-unsigned int hmp_up_threshold = 512;
-unsigned int hmp_down_threshold = 256;
+unsigned int hmp_up_threshold = 700;
+unsigned int hmp_down_threshold = 512;
#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL);
#endif
unsigned int hmp_next_up_threshold = 4096;
unsigned int hmp_next_down_threshold = 4096;
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+unsigned int hmp_packing_enabled = 1;
+#ifndef CONFIG_ARCH_VEXPRESS_TC2
+unsigned int hmp_full_threshold = (NICE_0_LOAD * 9) / 8;
+#else
+/* TC2 has a sharp consumption curve @ around 800Mhz, so
+ we aim to spread the load around that frequency. */
+unsigned int hmp_full_threshold = 650; /* 80% of the 800Mhz freq * NICE_0_LOAD */
+#endif
+#endif
+
static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se);
static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
- int *min_cpu);
+ int *min_cpu, struct cpumask *affinity);
+
+static inline struct hmp_domain *hmp_smallest_domain(void)
+{
+ return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains);
+}
/* Check if cpu is in fastest hmp_domain */
static inline unsigned int hmp_cpu_is_fastest(int cpu)
@@ -3722,22 +3740,23 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu)
/*
* Selects a cpu in previous (faster) hmp_domain
- * Note that cpumask_any_and() returns the first cpu in the cpumask
*/
static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk,
int cpu)
{
int lowest_cpu=NR_CPUS;
- __always_unused int lowest_ratio = hmp_domain_min_load(hmp_faster_domain(cpu), &lowest_cpu);
- /*
- * If the lowest-loaded CPU in the domain is allowed by the task affinity
- * select that one, otherwise select one which is allowed
- */
- if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk)))
- return lowest_cpu;
+ __always_unused int lowest_ratio;
+ struct hmp_domain *hmp;
+
+ if (hmp_cpu_is_fastest(cpu))
+ hmp = hmp_cpu_domain(cpu);
else
- return cpumask_any_and(&hmp_faster_domain(cpu)->cpus,
- tsk_cpus_allowed(tsk));
+ hmp = hmp_faster_domain(cpu);
+
+ lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu,
+ tsk_cpus_allowed(tsk));
+
+ return lowest_cpu;
}
/*
@@ -3756,18 +3775,54 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
else
hmp = hmp_slower_domain(cpu);
- lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu);
- /*
- * If the lowest-loaded CPU in the domain is allowed by the task affinity
- * select that one, otherwise select one which is allowed
- */
- if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk)))
- return lowest_cpu;
- else
- return cpumask_any_and(&hmp_slower_domain(cpu)->cpus,
- tsk_cpus_allowed(tsk));
+ lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu,
+ tsk_cpus_allowed(tsk));
+
+ return lowest_cpu;
}
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Select the 'best' candidate little CPU to wake up on.
+ * Implements a packing strategy which examines CPU in
+ * logical CPU order, and selects the first which will
+ * have at least 10% capacity available, according to
+ * both tracked load of the runqueue and the task.
+ */
+static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk,
+ int cpu) {
+ int tmp_cpu;
+ unsigned long estimated_load;
+ struct hmp_domain *hmp;
+ struct sched_avg *avg;
+ struct cpumask allowed_hmp_cpus;
+
+ if(!hmp_packing_enabled ||
+ tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100))
+ return hmp_select_slower_cpu(tsk, cpu);
+
+ if (hmp_cpu_is_slowest(cpu))
+ hmp = hmp_cpu_domain(cpu);
+ else
+ hmp = hmp_slower_domain(cpu);
+ /* respect affinity */
+ cpumask_and(&allowed_hmp_cpus, &hmp->cpus,
+ tsk_cpus_allowed(tsk));
+
+ for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) {
+ avg = &cpu_rq(tmp_cpu)->avg;
+ /* estimate new rq load if we add this task */
+ estimated_load = avg->load_avg_ratio +
+ tsk->se.avg.load_avg_ratio;
+ if (estimated_load <= hmp_full_threshold) {
+ cpu = tmp_cpu;
+ break;
+ }
+ }
+ /* if no match was found, the task uses the initial value */
+ return cpu;
+}
+#endif
static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
{
/* hack - always use clock from first online CPU */
@@ -3891,6 +3946,15 @@ static int hmp_freqinvar_from_sysfs(int value)
return value;
}
#endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/* packing value must be non-negative */
+static int hmp_packing_from_sysfs(int value)
+{
+ if (value < 0)
+ return -1;
+ return value;
+}
+#endif
static void hmp_attr_add(
const char *name,
int *value,
@@ -3943,6 +4007,16 @@ static int hmp_attr_init(void)
NULL,
hmp_freqinvar_from_sysfs);
#endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+ hmp_attr_add("packing_enable",
+ &hmp_packing_enabled,
+ NULL,
+ hmp_freqinvar_from_sysfs);
+ hmp_attr_add("packing_limit",
+ &hmp_full_threshold,
+ NULL,
+ hmp_packing_from_sysfs);
+#endif
hmp_data.attr_group.name = "hmp";
hmp_data.attr_group.attrs = hmp_data.attributes;
ret = sysfs_create_group(kernel_kobj,
@@ -3951,9 +4025,24 @@ static int hmp_attr_init(void)
}
late_initcall(hmp_attr_init);
#endif /* CONFIG_HMP_VARIABLE_SCALE */
-
+/*
+ * return the load of the lowest-loaded CPU in a given HMP domain
+ * min_cpu optionally points to an int to receive the CPU.
+ * affinity optionally points to a cpumask containing the
+ * CPUs to be considered. note:
+ * + min_cpu = NR_CPUS only if no CPUs are in the set of
+ * affinity && hmp_domain cpus
+ * + min_cpu will always otherwise equal one of the CPUs in
+ * the hmp domain
+ * + when more than one CPU has the same load, the one which
+ * is least-recently-disturbed by an HMP migration will be
+ * selected
+ * + if all CPUs are equally loaded or idle and the times are
+ * all the same, the first in the set will be used
+ * + if affinity is not set, cpu_online_mask is used
+ */
static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
- int *min_cpu)
+ int *min_cpu, struct cpumask *affinity)
{
int cpu;
int min_cpu_runnable_temp = NR_CPUS;
@@ -3962,8 +4051,15 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
unsigned long min_runnable_load = INT_MAX;
unsigned long contrib;
struct sched_avg *avg;
+ struct cpumask temp_cpumask;
+ /*
+ * only look at CPUs allowed if specified,
+ * otherwise look at all online CPUs in the
+ * right HMP domain
+ */
+ cpumask_and(&temp_cpumask, &hmpd->cpus, affinity ? affinity : cpu_online_mask);
- for_each_cpu_mask(cpu, hmpd->cpus) {
+ for_each_cpu_mask(cpu, temp_cpumask) {
avg = &cpu_rq(cpu)->avg;
/* used for both up and down migration */
curr_last_migration = avg->hmp_last_up_migration ?
@@ -4025,27 +4121,36 @@ static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se)
return NR_CPUS;
/* Is there an idle CPU in the current domain */
- min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL);
- if (min_usage == 0)
+ min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL, NULL);
+ if (min_usage == 0) {
+ trace_sched_hmp_offload_abort(cpu, min_usage, "load");
return NR_CPUS;
+ }
/* Is the task alone on the cpu? */
- if (cpu_rq(cpu)->cfs.h_nr_running < 2)
+ if (cpu_rq(cpu)->cfs.h_nr_running < 2) {
+ trace_sched_hmp_offload_abort(cpu,
+ cpu_rq(cpu)->cfs.h_nr_running, "nr_running");
return NR_CPUS;
+ }
/* Is the task actually starving? */
/* >=25% ratio running/runnable = starving */
- if (hmp_task_starvation(se) > 768)
+ if (hmp_task_starvation(se) > 768) {
+ trace_sched_hmp_offload_abort(cpu, hmp_task_starvation(se),
+ "starvation");
return NR_CPUS;
+ }
/* Does the slower domain have any idle CPUs? */
- min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu);
- if (min_usage > 0)
- return NR_CPUS;
+ min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu,
+ tsk_cpus_allowed(task_of(se)));
- if (cpumask_test_cpu(dest_cpu, &hmp_slower_domain(cpu)->cpus))
+ if (min_usage == 0) {
+ trace_sched_hmp_offload_succeed(cpu, dest_cpu);
return dest_cpu;
-
+ } else
+ trace_sched_hmp_offload_abort(cpu,min_usage,"slowdomain");
return NR_CPUS;
}
#endif /* CONFIG_SCHED_HMP */
@@ -4077,30 +4182,13 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
#ifdef CONFIG_SCHED_HMP
/* always put non-kernel forking tasks on a big domain */
if (p->mm && (sd_flag & SD_BALANCE_FORK)) {
- if(hmp_cpu_is_fastest(prev_cpu)) {
- struct hmp_domain *hmpdom = list_entry(&hmp_cpu_domain(prev_cpu)->hmp_domains, struct hmp_domain, hmp_domains);
- __always_unused int lowest_ratio = hmp_domain_min_load(hmpdom, &new_cpu);
- if (new_cpu != NR_CPUS &&
- cpumask_test_cpu(new_cpu,
- tsk_cpus_allowed(p))) {
- hmp_next_up_delay(&p->se, new_cpu);
- return new_cpu;
- } else {
- new_cpu = cpumask_any_and(
- &hmp_faster_domain(cpu)->cpus,
- tsk_cpus_allowed(p));
- if (new_cpu < nr_cpu_ids) {
- hmp_next_up_delay(&p->se, new_cpu);
- return new_cpu;
- }
- }
- } else {
- new_cpu = hmp_select_faster_cpu(p, prev_cpu);
- if (new_cpu != NR_CPUS) {
- hmp_next_up_delay(&p->se, new_cpu);
- return new_cpu;
- }
+ new_cpu = hmp_select_faster_cpu(p, prev_cpu);
+ if (new_cpu != NR_CPUS) {
+ hmp_next_up_delay(&p->se, new_cpu);
+ return new_cpu;
}
+ /* failed to perform HMP fork balance, use normal balance */
+ new_cpu = cpu;
}
#endif
@@ -4179,16 +4267,24 @@ unlock:
rcu_read_unlock();
#ifdef CONFIG_SCHED_HMP
+ prev_cpu = task_cpu(p);
+
if (hmp_up_migration(prev_cpu, &new_cpu, &p->se)) {
hmp_next_up_delay(&p->se, new_cpu);
- trace_sched_hmp_migrate(p, new_cpu, 0);
+ trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
return new_cpu;
}
if (hmp_down_migration(prev_cpu, &p->se)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+ new_cpu = hmp_best_little_cpu(p, prev_cpu);
+#else
new_cpu = hmp_select_slower_cpu(p, prev_cpu);
- hmp_next_down_delay(&p->se, new_cpu);
- trace_sched_hmp_migrate(p, new_cpu, 0);
- return new_cpu;
+#endif
+ if (new_cpu != prev_cpu) {
+ hmp_next_down_delay(&p->se, new_cpu);
+ trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
+ return new_cpu;
+ }
}
/* Make sure that the task stays in its previous hmp domain */
if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus))
@@ -6155,16 +6251,49 @@ static struct {
unsigned long next_balance; /* in jiffy units */
} nohz ____cacheline_aligned;
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Decide if the tasks on the busy CPUs in the
+ * littlest domain would benefit from an idle balance
+ */
+static int hmp_packing_ilb_needed(int cpu)
+{
+ struct hmp_domain *hmp;
+ /* always allow ilb on non-slowest domain */
+ if (!hmp_cpu_is_slowest(cpu))
+ return 1;
+
+ hmp = hmp_cpu_domain(cpu);
+ for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) {
+ /* only idle balance if a CPU is loaded over threshold */
+ if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold)
+ return 1;
+ }
+ return 0;
+}
+#endif
+
static inline int find_new_ilb(int call_cpu)
{
int ilb = cpumask_first(nohz.idle_cpus_mask);
#ifdef CONFIG_SCHED_HMP
+ int ilb_needed = 1;
+
/* restrict nohz balancing to occur in the same hmp domain */
ilb = cpumask_first_and(nohz.idle_cpus_mask,
&((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus);
+
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+ if (ilb < nr_cpu_ids)
+ ilb_needed = hmp_packing_ilb_needed(ilb);
#endif
+
+ if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb))
+ return ilb;
+#else
if (ilb < nr_cpu_ids && idle_cpu(ilb))
return ilb;
+#endif
return nr_cpu_ids;
}
@@ -6490,11 +6619,9 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se)
{
struct task_struct *p = task_of(se);
+ int temp_target_cpu;
u64 now;
- if (target_cpu)
- *target_cpu = NR_CPUS;
-
if (hmp_cpu_is_fastest(cpu))
return 0;
@@ -6517,13 +6644,12 @@ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_enti
* idle CPU or 1023 for any partly-busy one.
* Be explicit about requirement for an idle CPU.
*/
- if (hmp_domain_min_load(hmp_faster_domain(cpu), target_cpu) != 0)
- return 0;
-
- if (cpumask_intersects(&hmp_faster_domain(cpu)->cpus,
- tsk_cpus_allowed(p)))
+ if (hmp_domain_min_load(hmp_faster_domain(cpu), &temp_target_cpu,
+ tsk_cpus_allowed(p)) == 0 && temp_target_cpu != NR_CPUS) {
+ if(target_cpu)
+ *target_cpu = temp_target_cpu;
return 1;
-
+ }
return 0;
}
@@ -6533,8 +6659,14 @@ static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
struct task_struct *p = task_of(se);
u64 now;
- if (hmp_cpu_is_slowest(cpu))
+ if (hmp_cpu_is_slowest(cpu)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+ if(hmp_packing_enabled)
+ return 1;
+ else
+#endif
return 0;
+ }
#ifdef CONFIG_SCHED_HMP_PRIO_FILTER
/* Filter by task priority */
@@ -6703,6 +6835,7 @@ static int hmp_active_task_migration_cpu_stop(void *data)
rcu_read_unlock();
double_unlock_balance(busiest_rq, target_rq);
out_unlock:
+ put_task_struct(p);
busiest_rq->active_balance = 0;
raw_spin_unlock_irq(&busiest_rq->lock);
return 0;
@@ -6776,6 +6909,7 @@ static int hmp_idle_pull_cpu_stop(void *data)
rcu_read_unlock();
double_unlock_balance(busiest_rq, target_rq);
out_unlock:
+ put_task_struct(p);
busiest_rq->active_balance = 0;
raw_spin_unlock_irq(&busiest_rq->lock);
return 0;
@@ -6821,11 +6955,12 @@ static void hmp_force_up_migration(int this_cpu)
p = task_of(curr);
if (hmp_up_migration(cpu, &target_cpu, curr)) {
if (!target->active_balance) {
+ get_task_struct(p);
target->active_balance = 1;
target->push_cpu = target_cpu;
target->migrate_task = p;
force = 1;
- trace_sched_hmp_migrate(p, target->push_cpu, 1);
+ trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE);
hmp_next_up_delay(&p->se, target->push_cpu);
}
}
@@ -6836,12 +6971,14 @@ static void hmp_force_up_migration(int this_cpu)
* require extensive book keeping.
*/
curr = hmp_get_lightest_task(orig, 1);
+ p = task_of(curr);
target->push_cpu = hmp_offload_down(cpu, curr);
if (target->push_cpu < NR_CPUS) {
+ get_task_struct(p);
target->active_balance = 1;
target->migrate_task = p;
force = 1;
- trace_sched_hmp_migrate(p, target->push_cpu, 2);
+ trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_OFFLOAD);
hmp_next_down_delay(&p->se, target->push_cpu);
}
}
@@ -6916,11 +7053,12 @@ static unsigned int hmp_idle_pull(int this_cpu)
/* now we have a candidate */
raw_spin_lock_irqsave(&target->lock, flags);
if (!target->active_balance && task_rq(p) == target) {
+ get_task_struct(p);
target->active_balance = 1;
target->push_cpu = this_cpu;
target->migrate_task = p;
force = 1;
- trace_sched_hmp_migrate(p, target->push_cpu, 3);
+ trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_IDLE_PULL);
hmp_next_up_delay(&p->se, target->push_cpu);
}
raw_spin_unlock_irqrestore(&target->lock, flags);