Merge branch 'sched/4.1' into qcom/nextqcom/next

author: Daniel Lezcano <daniel.lezcano@linaro.org> 2015-04-09 16:42:03 +0200
committer: Daniel Lezcano <daniel.lezcano@linaro.org> 2015-04-09 16:42:03 +0200
commit: aaa76620788fbe3483786a1436bd6f7abaa22bf2 (patch)
tree: faa4c7c9998af5e508c7b43fdf4cc6cb5f2316e7
parent: 8cb3a2dfb116925da331ce743b81fde4d9201267 (diff)
parent: 1e646f272f5bd635385106cf12ac34f5f9d83b1a (diff)
4 files changed, 91 insertions, 17 deletions
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 080bd2dbde4b..5e6c6bec97de 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -158,21 +158,54 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	int entered_state;
 
 	struct cpuidle_state *target_state = &drv->states[index];
-	ktime_t time_start, time_end;
 	s64 diff;
 
 	trace_cpu_idle_rcuidle(index, dev->cpu);
-	time_start = ktime_get();
 
+	/*
+	 * Store the idle start time for this cpu, this information
+	 * will be used by cpuidle to measure how long the cpu has
+	 * been idle and by the scheduler to prevent to wake it up too
+	 * early
+	 */
+	target_state->idle_stamp = ktime_to_us(ktime_get());
+
+	/*
+	 * The enter to the low level idle routine. This call will block
+	 * until an interrupt occurs meaning it is the end of the idle
+	 * period
+	 */
 	entered_state = target_state->enter(dev, drv, index);
 
-	time_end = ktime_get();
+	/*
+	 * Measure as soon as possible the duration of the idle
+	 * period. It MUST be done before re-enabling the interrupt in
+	 * order to prevent to add in the idle time measurement the
+	 * interrupt handling duration
+	 */
+	diff = ktime_to_us(ktime_sub_us(ktime_get(), target_state->idle_stamp));
+
+	/*
+	 * Reset the idle time stamp as the scheduler may think the cpu is idle
+	 * while it is in the process of waking up
+	 */
+	target_state->idle_stamp = 0;
+
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
+	/*
+	 * The cpuidle_enter_coupled uses the cpuidle_enter function.
+	 * Don't re-enable the interrupts and let the enter_coupled
+	 * function to wait for all cpus to sync and to enable the
+	 * interrupts again from there
+	 */
 	if (!cpuidle_state_is_coupled(dev, drv, entered_state))
 		local_irq_enable();
 
-	diff = ktime_to_us(ktime_sub(time_end, time_start));
+	/*
+	 * The idle duration will be casted to an integer, prevent to
+	 * overflow by setting a boundary to INT_MAX
+	 */
 	if (diff > INT_MAX)
 		diff = INT_MAX;
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 306178d7309f..2facce63ddbf 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -44,6 +44,7 @@ struct cpuidle_state {
 	int		power_usage; /* in mW */
 	unsigned int	target_residency; /* in US */
 	bool		disabled; /* disabled on all CPUs */
+	u64             idle_stamp;
 
 	int (*enter)	(struct cpuidle_device *dev,
 			struct cpuidle_driver *drv,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bcfe32088b37..8cb822a23e99 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4640,21 +4640,48 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 		if (idle_cpu(i)) {
 			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
-			if (idle && idle->exit_latency < min_exit_latency) {
+
+			if (idle) {
+
 				/*
-				 * We give priority to a CPU whose idle state
-				 * has the smallest exit latency irrespective
-				 * of any idle timestamp.
+				 * When we want to save energy,
+				 * exclude cpu which did not reach the
+				 * break even point in the idle state
+				 * if there is already another cpu
+				 * selected
 				 */
-				min_exit_latency = idle->exit_latency;
-				latest_idle_timestamp = rq->idle_stamp;
-				shallowest_idle_cpu = i;
-			} else if ((!idle || idle->exit_latency == min_exit_latency) &&
-				   rq->idle_stamp > latest_idle_timestamp) {
+				if (sched_feat(ENERGY_IDLE) && shallowest_idle_cpu != -1 &&
+				    ((ktime_to_us(ktime_get()) - idle->idle_stamp <
+				      idle->target_residency)))
+						continue;
+
+				if (idle->exit_latency < min_exit_latency) {
+					/*
+					 * We give priority to a CPU
+					 * whose idle state has the
+					 * smallest exit latency
+					 * irrespective of any idle
+					 * timestamp
+					 */
+					min_exit_latency = idle->exit_latency;
+					latest_idle_timestamp = idle->idle_stamp;
+					shallowest_idle_cpu = i;
+				} else if (idle->exit_latency == min_exit_latency &&
+					   idle->idle_stamp > latest_idle_timestamp) {
+					/*
+					 * If the CPU is in the same
+					 * idle state, choose the more
+					 * recent one as it might have
+					 * a warmer cache
+					 */
+					latest_idle_timestamp = idle->idle_stamp;
+					shallowest_idle_cpu = i;
+				}
+			} else if (rq->idle_stamp > latest_idle_timestamp) {
 				/*
-				 * If equal or no active idle state, then
-				 * the most recently idled CPU might have
-				 * a warmer cache.
+				 * If no active idle state, then the
+				 * most recent idled CPU might have a
+				 * warmer cache
 				 */
 				latest_idle_timestamp = rq->idle_stamp;
 				shallowest_idle_cpu = i;
@@ -4668,7 +4695,15 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 		}
 	}
 
-	return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
+	/*
+	 * If there is a non idle cpu different from the current one,
+	 * let's use it if we want to save energy by not waking up an
+	 * idle cpu, otherwise let's use the shallowest idle cpu
+	 */
+	if (sched_feat(ENERGY_IDLE) && least_loaded_cpu != this_cpu)
+		return least_loaded_cpu;
+	else
+		return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
 }
 
 /*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 90284d117fe6..c5deec70cbfe 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -61,6 +61,11 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
 
 /*
+ * Apply energy saving agressive policy when idle
+ */
+SCHED_FEAT(ENERGY_IDLE, false)
+
+/*
  * Apply the automatic NUMA scheduling policy. Enabled automatically
  * at runtime if running on a NUMA machine. Can be controlled via
  * numa_balancing=
author	Daniel Lezcano <daniel.lezcano@linaro.org>	2015-04-09 16:42:03 +0200
committer	Daniel Lezcano <daniel.lezcano@linaro.org>	2015-04-09 16:42:03 +0200
commit	aaa76620788fbe3483786a1436bd6f7abaa22bf2 (patch)
tree	faa4c7c9998af5e508c7b43fdf4cc6cb5f2316e7
parent	8cb3a2dfb116925da331ce743b81fde4d9201267 (diff)
parent	1e646f272f5bd635385106cf12ac34f5f9d83b1a (diff)