Merge remote-tracking branch 'lsk/v3.10/topic/big.LITTLE' into linux-linaro-lsk

author: Mark Brown <broonie@linaro.org> 2013-10-11 19:26:24 +0100
committer: Mark Brown <broonie@linaro.org> 2013-10-11 19:26:24 +0100
commit: fa4b900fcaa6092a43fd60fe7dd72f8df654bcde (patch)
tree: a093c2bf5bdeda9b4b18dd00c029904998a9fa0c /kernel/sched/fair.c
parent: a3dfd8c06351968f2ec42feb1ae8dfab8b481225 (diff)
parent: b574d25f35fb523c471535299ccab2bc7f6ed3ea (diff)
1 files changed, 220 insertions, 82 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1d171effac2..643da90f3a7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1226,11 +1226,7 @@ struct hmp_global_attr {
 	int (*from_sysfs)(int);
 };
 
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-#define HMP_DATA_SYSFS_MAX 4
-#else
-#define HMP_DATA_SYSFS_MAX 3
-#endif
+#define HMP_DATA_SYSFS_MAX 8
 
 struct hmp_data_struct {
 #ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
@@ -1688,6 +1684,7 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 	__update_tg_runnable_avg(&rq->avg, &rq->cfs);
 	trace_sched_rq_runnable_ratio(cpu_of(rq), rq->avg.load_avg_ratio);
 	trace_sched_rq_runnable_load(cpu_of(rq), rq->cfs.runnable_load_avg);
+	trace_sched_rq_nr_running(cpu_of(rq), rq->nr_running, rq->nr_iowait.counter);
 }
 
 /* Add the load generated by se into cfs_rq's child load-average */
@@ -3664,25 +3661,46 @@ static struct sched_entity *hmp_get_lightest_task(
  * Migration thresholds should be in the range [0..1023]
  * hmp_up_threshold: min. load required for migrating tasks to a faster cpu
  * hmp_down_threshold: max. load allowed for tasks migrating to a slower cpu
- * The default values (512, 256) offer good responsiveness, but may need
- * tweaking suit particular needs.
  *
  * hmp_up_prio: Only up migrate task with high priority (<hmp_up_prio)
  * hmp_next_up_threshold: Delay before next up migration (1024 ~= 1 ms)
  * hmp_next_down_threshold: Delay before next down migration (1024 ~= 1 ms)
+ *
+ * Small Task Packing:
+ * We can choose to fill the littlest CPUs in an HMP system rather than
+ * the typical spreading mechanic. This behavior is controllable using
+ * two variables.
+ * hmp_packing_enabled: runtime control over pack/spread
+ * hmp_full_threshold: Consider a CPU with this much unweighted load full
  */
-unsigned int hmp_up_threshold = 512;
-unsigned int hmp_down_threshold = 256;
+unsigned int hmp_up_threshold = 700;
+unsigned int hmp_down_threshold = 512;
 #ifdef CONFIG_SCHED_HMP_PRIO_FILTER
 unsigned int hmp_up_prio = NICE_TO_PRIO(CONFIG_SCHED_HMP_PRIO_FILTER_VAL);
 #endif
 unsigned int hmp_next_up_threshold = 4096;
 unsigned int hmp_next_down_threshold = 4096;
 
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+unsigned int hmp_packing_enabled = 1;
+#ifndef CONFIG_ARCH_VEXPRESS_TC2
+unsigned int hmp_full_threshold = (NICE_0_LOAD * 9) / 8;
+#else
+/* TC2 has a sharp consumption curve @ around 800Mhz, so
+   we aim to spread the load around that frequency. */
+unsigned int hmp_full_threshold = 650;  /*  80% of the 800Mhz freq * NICE_0_LOAD */
+#endif
+#endif
+
 static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se);
 static unsigned int hmp_down_migration(int cpu, struct sched_entity *se);
 static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
-						int *min_cpu);
+						int *min_cpu, struct cpumask *affinity);
+
+static inline struct hmp_domain *hmp_smallest_domain(void)
+{
+	return list_entry(hmp_domains.prev, struct hmp_domain, hmp_domains);
+}
 
 /* Check if cpu is in fastest hmp_domain */
 static inline unsigned int hmp_cpu_is_fastest(int cpu)
@@ -3722,22 +3740,23 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu)
 
 /*
  * Selects a cpu in previous (faster) hmp_domain
- * Note that cpumask_any_and() returns the first cpu in the cpumask
  */
 static inline unsigned int hmp_select_faster_cpu(struct task_struct *tsk,
 							int cpu)
 {
 	int lowest_cpu=NR_CPUS;
-	__always_unused int lowest_ratio = hmp_domain_min_load(hmp_faster_domain(cpu), &lowest_cpu);
-	/*
-	 * If the lowest-loaded CPU in the domain is allowed by the task affinity
-	 * select that one, otherwise select one which is allowed
-	 */
-	if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk)))
-		return lowest_cpu;
+	__always_unused int lowest_ratio;
+	struct hmp_domain *hmp;
+
+	if (hmp_cpu_is_fastest(cpu))
+		hmp = hmp_cpu_domain(cpu);
 	else
-		return cpumask_any_and(&hmp_faster_domain(cpu)->cpus,
-				tsk_cpus_allowed(tsk));
+		hmp = hmp_faster_domain(cpu);
+
+	lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu,
+			tsk_cpus_allowed(tsk));
+
+	return lowest_cpu;
 }
 
 /*
@@ -3756,18 +3775,54 @@ static inline unsigned int hmp_select_slower_cpu(struct task_struct *tsk,
 	else
 		hmp = hmp_slower_domain(cpu);
 
-	lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu);
-	/*
-	 * If the lowest-loaded CPU in the domain is allowed by the task affinity
-	 * select that one, otherwise select one which is allowed
-	 */
-	if(lowest_cpu != NR_CPUS && cpumask_test_cpu(lowest_cpu,tsk_cpus_allowed(tsk)))
-		return lowest_cpu;
-	else
-		return cpumask_any_and(&hmp_slower_domain(cpu)->cpus,
-				tsk_cpus_allowed(tsk));
+	lowest_ratio = hmp_domain_min_load(hmp, &lowest_cpu,
+			tsk_cpus_allowed(tsk));
+
+	return lowest_cpu;
 }
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Select the 'best' candidate little CPU to wake up on.
+ * Implements a packing strategy which examines CPU in
+ * logical CPU order, and selects the first which will
+ * have at least 10% capacity available, according to
+ * both tracked load of the runqueue and the task.
+ */
+static inline unsigned int hmp_best_little_cpu(struct task_struct *tsk,
+		int cpu) {
+	int tmp_cpu;
+	unsigned long estimated_load;
+	struct hmp_domain *hmp;
+	struct sched_avg *avg;
+	struct cpumask allowed_hmp_cpus;
+
+	if(!hmp_packing_enabled ||
+			tsk->se.avg.load_avg_ratio > ((NICE_0_LOAD * 90)/100))
+		return hmp_select_slower_cpu(tsk, cpu);
+
+	if (hmp_cpu_is_slowest(cpu))
+		hmp = hmp_cpu_domain(cpu);
+	else
+		hmp = hmp_slower_domain(cpu);
 
+	/* respect affinity */
+	cpumask_and(&allowed_hmp_cpus, &hmp->cpus,
+			tsk_cpus_allowed(tsk));
+
+	for_each_cpu_mask(tmp_cpu, allowed_hmp_cpus) {
+		avg = &cpu_rq(tmp_cpu)->avg;
+		/* estimate new rq load if we add this task */
+		estimated_load = avg->load_avg_ratio +
+				tsk->se.avg.load_avg_ratio;
+		if (estimated_load <= hmp_full_threshold) {
+			cpu = tmp_cpu;
+			break;
+		}
+	}
+	/* if no match was found, the task uses the initial value */
+	return cpu;
+}
+#endif
 static inline void hmp_next_up_delay(struct sched_entity *se, int cpu)
 {
 	/* hack - always use clock from first online CPU */
@@ -3891,6 +3946,15 @@ static int hmp_freqinvar_from_sysfs(int value)
 	return value;
 }
 #endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/* packing value must be non-negative */
+static int hmp_packing_from_sysfs(int value)
+{
+	if (value < 0)
+		return -1;
+	return value;
+}
+#endif
 static void hmp_attr_add(
 	const char *name,
 	int *value,
@@ -3943,6 +4007,16 @@ static int hmp_attr_init(void)
 		NULL,
 		hmp_freqinvar_from_sysfs);
 #endif
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+	hmp_attr_add("packing_enable",
+		&hmp_packing_enabled,
+		NULL,
+		hmp_freqinvar_from_sysfs);
+	hmp_attr_add("packing_limit",
+		&hmp_full_threshold,
+		NULL,
+		hmp_packing_from_sysfs);
+#endif
 	hmp_data.attr_group.name = "hmp";
 	hmp_data.attr_group.attrs = hmp_data.attributes;
 	ret = sysfs_create_group(kernel_kobj,
@@ -3951,9 +4025,24 @@ static int hmp_attr_init(void)
 }
 late_initcall(hmp_attr_init);
 #endif /* CONFIG_HMP_VARIABLE_SCALE */
-
+/*
+ * return the load of the lowest-loaded CPU in a given HMP domain
+ * min_cpu optionally points to an int to receive the CPU.
+ * affinity optionally points to a cpumask containing the
+ * CPUs to be considered. note:
+ *   + min_cpu = NR_CPUS only if no CPUs are in the set of
+ *     affinity && hmp_domain cpus
+ *   + min_cpu will always otherwise equal one of the CPUs in
+ *     the hmp domain
+ *   + when more than one CPU has the same load, the one which
+ *     is least-recently-disturbed by an HMP migration will be
+ *     selected
+ *   + if all CPUs are equally loaded or idle and the times are
+ *     all the same, the first in the set will be used
+ *   + if affinity is not set, cpu_online_mask is used
+ */
 static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
-						int *min_cpu)
+						int *min_cpu, struct cpumask *affinity)
 {
 	int cpu;
 	int min_cpu_runnable_temp = NR_CPUS;
@@ -3962,8 +4051,15 @@ static inline unsigned int hmp_domain_min_load(struct hmp_domain *hmpd,
 	unsigned long min_runnable_load = INT_MAX;
 	unsigned long contrib;
 	struct sched_avg *avg;
+	struct cpumask temp_cpumask;
+	/*
+	 * only look at CPUs allowed if specified,
+	 * otherwise look at all online CPUs in the
+	 * right HMP domain
+	 */
+	cpumask_and(&temp_cpumask, &hmpd->cpus, affinity ? affinity : cpu_online_mask);
 
-	for_each_cpu_mask(cpu, hmpd->cpus) {
+	for_each_cpu_mask(cpu, temp_cpumask) {
 		avg = &cpu_rq(cpu)->avg;
 		/* used for both up and down migration */
 		curr_last_migration = avg->hmp_last_up_migration ?
@@ -4025,27 +4121,36 @@ static inline unsigned int hmp_offload_down(int cpu, struct sched_entity *se)
 		return NR_CPUS;
 
 	/* Is there an idle CPU in the current domain */
-	min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL);
-	if (min_usage == 0)
+	min_usage = hmp_domain_min_load(hmp_cpu_domain(cpu), NULL, NULL);
+	if (min_usage == 0) {
+		trace_sched_hmp_offload_abort(cpu, min_usage, "load");
 		return NR_CPUS;
+	}
 
 	/* Is the task alone on the cpu? */
-	if (cpu_rq(cpu)->cfs.h_nr_running < 2)
+	if (cpu_rq(cpu)->cfs.h_nr_running < 2) {
+		trace_sched_hmp_offload_abort(cpu,
+			cpu_rq(cpu)->cfs.h_nr_running, "nr_running");
 		return NR_CPUS;
+	}
 
 	/* Is the task actually starving? */
 	/* >=25% ratio running/runnable = starving */
-	if (hmp_task_starvation(se) > 768)
+	if (hmp_task_starvation(se) > 768) {
+		trace_sched_hmp_offload_abort(cpu, hmp_task_starvation(se),
+			"starvation");
 		return NR_CPUS;
+	}
 
 	/* Does the slower domain have any idle CPUs? */
-	min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu);
-	if (min_usage > 0)
-		return NR_CPUS;
+	min_usage = hmp_domain_min_load(hmp_slower_domain(cpu), &dest_cpu,
+			tsk_cpus_allowed(task_of(se)));
 
-	if (cpumask_test_cpu(dest_cpu, &hmp_slower_domain(cpu)->cpus))
+	if (min_usage == 0) {
+		trace_sched_hmp_offload_succeed(cpu, dest_cpu);
 		return dest_cpu;
-
+	} else
+		trace_sched_hmp_offload_abort(cpu,min_usage,"slowdomain");
 	return NR_CPUS;
 }
 #endif /* CONFIG_SCHED_HMP */
@@ -4077,30 +4182,13 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 #ifdef CONFIG_SCHED_HMP
 	/* always put non-kernel forking tasks on a big domain */
 	if (p->mm && (sd_flag & SD_BALANCE_FORK)) {
-		if(hmp_cpu_is_fastest(prev_cpu)) {
-			struct hmp_domain *hmpdom = list_entry(&hmp_cpu_domain(prev_cpu)->hmp_domains, struct hmp_domain, hmp_domains);
-			__always_unused int lowest_ratio = hmp_domain_min_load(hmpdom, &new_cpu);
-			if (new_cpu != NR_CPUS &&
-				cpumask_test_cpu(new_cpu,
-						tsk_cpus_allowed(p))) {
-				hmp_next_up_delay(&p->se, new_cpu);
-				return new_cpu;
-			} else {
-				new_cpu = cpumask_any_and(
-						&hmp_faster_domain(cpu)->cpus,
-						tsk_cpus_allowed(p));
-				if (new_cpu < nr_cpu_ids) {
-					hmp_next_up_delay(&p->se, new_cpu);
-					return new_cpu;
-				}
-			}
-		} else {
-			new_cpu = hmp_select_faster_cpu(p, prev_cpu);
-			if (new_cpu != NR_CPUS) {
-				hmp_next_up_delay(&p->se, new_cpu);
-				return new_cpu;
-			}
+		new_cpu = hmp_select_faster_cpu(p, prev_cpu);
+		if (new_cpu != NR_CPUS) {
+			hmp_next_up_delay(&p->se, new_cpu);
+			return new_cpu;
 		}
+		/* failed to perform HMP fork balance, use normal balance */
+		new_cpu = cpu;
 	}
 #endif
 
@@ -4179,16 +4267,24 @@ unlock:
 	rcu_read_unlock();
 
 #ifdef CONFIG_SCHED_HMP
+	prev_cpu = task_cpu(p);
+
 	if (hmp_up_migration(prev_cpu, &new_cpu, &p->se)) {
 		hmp_next_up_delay(&p->se, new_cpu);
-		trace_sched_hmp_migrate(p, new_cpu, 0);
+		trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
 		return new_cpu;
 	}
 	if (hmp_down_migration(prev_cpu, &p->se)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+		new_cpu = hmp_best_little_cpu(p, prev_cpu);
+#else
 		new_cpu = hmp_select_slower_cpu(p, prev_cpu);
-		hmp_next_down_delay(&p->se, new_cpu);
-		trace_sched_hmp_migrate(p, new_cpu, 0);
-		return new_cpu;
+#endif
+		if (new_cpu != prev_cpu) {
+			hmp_next_down_delay(&p->se, new_cpu);
+			trace_sched_hmp_migrate(p, new_cpu, HMP_MIGRATE_WAKEUP);
+			return new_cpu;
+		}
 	}
 	/* Make sure that the task stays in its previous hmp domain */
 	if (!cpumask_test_cpu(new_cpu, &hmp_cpu_domain(prev_cpu)->cpus))
@@ -6155,16 +6251,49 @@ static struct {
 	unsigned long next_balance;     /* in jiffy units */
 } nohz ____cacheline_aligned;
 
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+/*
+ * Decide if the tasks on the busy CPUs in the
+ * littlest domain would benefit from an idle balance
+ */
+static int hmp_packing_ilb_needed(int cpu)
+{
+	struct hmp_domain *hmp;
+	/* always allow ilb on non-slowest domain */
+	if (!hmp_cpu_is_slowest(cpu))
+		return 1;
+
+	hmp = hmp_cpu_domain(cpu);
+	for_each_cpu_and(cpu, &hmp->cpus, nohz.idle_cpus_mask) {
+		/* only idle balance if a CPU is loaded over threshold */
+		if (cpu_rq(cpu)->avg.load_avg_ratio > hmp_full_threshold)
+			return 1;
+	}
+	return 0;
+}
+#endif
+
 static inline int find_new_ilb(int call_cpu)
 {
 	int ilb = cpumask_first(nohz.idle_cpus_mask);
 #ifdef CONFIG_SCHED_HMP
+	int ilb_needed = 1;
+
 	/* restrict nohz balancing to occur in the same hmp domain */
 	ilb = cpumask_first_and(nohz.idle_cpus_mask,
 			&((struct hmp_domain *)hmp_cpu_domain(call_cpu))->cpus);
+
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+	if (ilb < nr_cpu_ids)
+		ilb_needed = hmp_packing_ilb_needed(ilb);
 #endif
+
+	if (ilb_needed && ilb < nr_cpu_ids && idle_cpu(ilb))
+		return ilb;
+#else
 	if (ilb < nr_cpu_ids && idle_cpu(ilb))
 		return ilb;
+#endif
 
 	return nr_cpu_ids;
 }
@@ -6490,11 +6619,9 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
 static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se)
 {
 	struct task_struct *p = task_of(se);
+	int temp_target_cpu;
 	u64 now;
 
-	if (target_cpu)
-		*target_cpu = NR_CPUS;
-
 	if (hmp_cpu_is_fastest(cpu))
 		return 0;
 
@@ -6517,13 +6644,12 @@ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_enti
 	 * idle CPU or 1023 for any partly-busy one.
 	 * Be explicit about requirement for an idle CPU.
 	 */
-	if (hmp_domain_min_load(hmp_faster_domain(cpu), target_cpu) != 0)
-		return 0;
-
-	if (cpumask_intersects(&hmp_faster_domain(cpu)->cpus,
-			tsk_cpus_allowed(p)))
+	if (hmp_domain_min_load(hmp_faster_domain(cpu), &temp_target_cpu,
+			tsk_cpus_allowed(p)) == 0 && temp_target_cpu != NR_CPUS) {
+		if(target_cpu)
+			*target_cpu = temp_target_cpu;
 		return 1;
-
+	}
 	return 0;
 }
 
@@ -6533,8 +6659,14 @@ static unsigned int hmp_down_migration(int cpu, struct sched_entity *se)
 	struct task_struct *p = task_of(se);
 	u64 now;
 
-	if (hmp_cpu_is_slowest(cpu))
+	if (hmp_cpu_is_slowest(cpu)) {
+#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
+		if(hmp_packing_enabled)
+			return 1;
+		else
+#endif
 		return 0;
+	}
 
 #ifdef CONFIG_SCHED_HMP_PRIO_FILTER
 	/* Filter by task priority */
@@ -6703,6 +6835,7 @@ static int hmp_active_task_migration_cpu_stop(void *data)
 	rcu_read_unlock();
 	double_unlock_balance(busiest_rq, target_rq);
 out_unlock:
+	put_task_struct(p);
 	busiest_rq->active_balance = 0;
 	raw_spin_unlock_irq(&busiest_rq->lock);
 	return 0;
@@ -6776,6 +6909,7 @@ static int hmp_idle_pull_cpu_stop(void *data)
 	rcu_read_unlock();
 	double_unlock_balance(busiest_rq, target_rq);
 out_unlock:
+	put_task_struct(p);
 	busiest_rq->active_balance = 0;
 	raw_spin_unlock_irq(&busiest_rq->lock);
 	return 0;
@@ -6821,11 +6955,12 @@ static void hmp_force_up_migration(int this_cpu)
 		p = task_of(curr);
 		if (hmp_up_migration(cpu, &target_cpu, curr)) {
 			if (!target->active_balance) {
+				get_task_struct(p);
 				target->active_balance = 1;
 				target->push_cpu = target_cpu;
 				target->migrate_task = p;
 				force = 1;
-				trace_sched_hmp_migrate(p, target->push_cpu, 1);
+				trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE);
 				hmp_next_up_delay(&p->se, target->push_cpu);
 			}
 		}
@@ -6836,12 +6971,14 @@ static void hmp_force_up_migration(int this_cpu)
 			 * require extensive book keeping.
 			 */
 			curr = hmp_get_lightest_task(orig, 1);
+			p = task_of(curr);
 			target->push_cpu = hmp_offload_down(cpu, curr);
 			if (target->push_cpu < NR_CPUS) {
+				get_task_struct(p);
 				target->active_balance = 1;
 				target->migrate_task = p;
 				force = 1;
-				trace_sched_hmp_migrate(p, target->push_cpu, 2);
+				trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_OFFLOAD);
 				hmp_next_down_delay(&p->se, target->push_cpu);
 			}
 		}
@@ -6916,11 +7053,12 @@ static unsigned int hmp_idle_pull(int this_cpu)
 	/* now we have a candidate */
 	raw_spin_lock_irqsave(&target->lock, flags);
 	if (!target->active_balance && task_rq(p) == target) {
+		get_task_struct(p);
 		target->active_balance = 1;
 		target->push_cpu = this_cpu;
 		target->migrate_task = p;
 		force = 1;
-		trace_sched_hmp_migrate(p, target->push_cpu, 3);
+		trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_IDLE_PULL);
 		hmp_next_up_delay(&p->se, target->push_cpu);
 	}
 	raw_spin_unlock_irqrestore(&target->lock, flags);
author	Mark Brown <broonie@linaro.org>	2013-10-11 19:26:24 +0100
committer	Mark Brown <broonie@linaro.org>	2013-10-11 19:26:24 +0100
commit	fa4b900fcaa6092a43fd60fe7dd72f8df654bcde (patch)
tree	a093c2bf5bdeda9b4b18dd00c029904998a9fa0c /kernel/sched/fair.c
parent	a3dfd8c06351968f2ec42feb1ae8dfab8b481225 (diff)
parent	b574d25f35fb523c471535299ccab2bc7f6ed3ea (diff)