aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arm/small_task_packing.txt136
-rw-r--r--arch/arm/Kconfig31
-rw-r--r--include/trace/events/sched.h49
-rw-r--r--kernel/sched/fair.c218
4 files changed, 381 insertions, 53 deletions
diff --git a/Documentation/arm/small_task_packing.txt b/Documentation/arm/small_task_packing.txt
new file mode 100644
index 000000000000..43f0a8b80234
--- /dev/null
+++ b/Documentation/arm/small_task_packing.txt
@@ -0,0 +1,136 @@
+Small Task Packing in the big.LITTLE MP Reference Patch Set
+
+What is small task packing?
+----
+Simply that the scheduler will fit as many small tasks on a single CPU
+as possible before using other CPUs. A small task is defined as one
+whose tracked load is less than 90% of a NICE_0 task. This is a change
+from the usual behavior since the scheduler will normally use an idle
+CPU for a waking task unless that task is considered cache hot.
+
+
+How is it implemented?
+----
+Since all small tasks must wake up relatively frequently, the main
+requirement for packing small tasks is to select a partly-busy CPU when
+waking rather than looking for an idle CPU. We use the tracked load of
+the CPU runqueue to determine how heavily loaded each CPU is and the
+tracked load of the task to determine if it will fit on the CPU. We
+always start with the lowest-numbered CPU in a sched domain and stop
+looking when we find a CPU with enough space for the task.
+
+Some further tweaks are necessary to suppress load balancing when the
+CPU is not fully loaded, otherwise the scheduler attempts to spread
+tasks evenly across the domain.
+
+
+How does it interact with the HMP patches?
+----
+Firstly, we only enable packing on the little domain. The intent is that
+the big domain is intended to spread tasks amongst the available CPUs
+one-task-per-CPU. The little domain however is attempting to use as
+little power as possible while servicing its tasks.
+
+Secondly, since we offload big tasks onto little CPUs in order to try
+to devote one CPU to each task, we have a threshold above which we do
+not try to pack a task and instead will select an idle CPU if possible.
+This maintains maximum forward progress for busy tasks temporarily
+demoted from big CPUs.
+
+
+Can the behaviour be tuned?
+----
+Yes, the load level of a 'full' CPU can be easily modified in the source
+and is exposed through sysfs as /sys/kernel/hmp/packing_limit to be
+changed at runtime. The presence of the packing behaviour is controlled
+by CONFIG_SCHED_HMP_LITTLE_PACKING and can be disabled at run-time
+using /sys/kernel/hmp/packing_enable.
+The definition of a small task is hard coded as 90% of NICE_0_LOAD
+and cannot be modified at run time.
+
+
+Why do I need to tune it?
+----
+The optimal configuration is likely to be different depending upon the
+design and manufacturing of your SoC.
+
+In the main, there are two system effects from enabling small task
+packing.
+
+1. CPU operating point may increase
+2. wakeup latency of tasks may be increased
+
+There are also likely to be secondary effects from loading one CPU
+rather than spreading tasks.
+
+Note that all of these system effects are dependent upon the workload
+under consideration.
+
+
+CPU Operating Point
+----
+The primary impact of loading one CPU with a number of light tasks is to
+increase the compute requirement of that CPU since it is no longer idle
+as often. Increased compute requirement causes an increase in the
+frequency of the CPU through CPUfreq.
+
+Consider this example:
+We have a system with 3 CPUs which can operate at any frequency between
+350MHz and 1GHz. The system has 6 tasks which would each produce 10%
+load at 1GHz. The scheduler has frequency-invariant load scaling
+enabled. Our DVFS governor aims for 80% utilization at the chosen
+frequency.
+
+Without task packing, these tasks will be spread out amongst all CPUs
+such that each has 2. This will produce roughly 20% system load, and
+the frequency of the package will remain at 350MHz.
+
+With task packing set to the default packing_limit, all of these tasks
+will sit on one CPU and require a package frequency of ~750MHz to reach
+80% utilization. (0.75 = 0.6 * 0.8).
+
+When a package operates on a single frequency domain, all CPUs in that
+package share frequency and voltage.
+
+Depending upon the SoC implementation there can be a significant amount
+of energy lost to leakage from idle CPUs. The decision about how
+loaded a CPU must be to be considered 'full' is therefore controllable
+through sysfs (sys/kernel/hmp/packing_limit) and directly in the code.
+
+Continuing the example, lets set packing_limit to 450 which means we
+will pack tasks until the total load of all running tasks >= 450. In
+practise, this is very similar to a 55% idle 1Ghz CPU.
+
+Now we are only able to place 4 tasks on CPU0, and two will overflow
+onto CPU1. CPU0 will have a load of 40% and CPU1 will have a load of
+20%. In order to still hit 80% utilization, CPU0 now only needs to
+operate at (0.4*0.8=0.32) 320MHz, which means that the lowest operating
+point will be selected, the same as in the non-packing case, except that
+now CPU2 is no longer needed and can be power-gated.
+
+In order to use less energy, the saving from power-gating CPU2 must be
+more than the energy spent running CPU0 for the extra cycles. This
+depends upon the SoC implementation.
+
+This is obviously a contrived example requiring all the tasks to
+be runnable at the same time, but it illustrates the point.
+
+
+Wakeup Latency
+----
+This is an unavoidable consequence of trying to pack tasks together
+rather than giving them a CPU each. If you cannot find an acceptable
+level of wakeup latency, you should turn packing off.
+
+Cyclictest is a good test application for determining the added latency
+when configuring packing.
+
+
+Why is it turned off for the VersatileExpress V2P_CA15A7 CoreTile?
+----
+Simply, this core tile only has power gating for the whole A7 package.
+When small task packing is enabled, all our low-energy use cases
+normally fit onto one A7 CPU. We therefore end up with 2 mostly-idle
+CPUs and one mostly-busy CPU. This decreases the amount of time
+available where the whole package is idle and can be turned off.
+
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 426e541171ff..1116be551be5 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1510,6 +1510,17 @@ config SCHED_HMP
There is currently no support for migration of task groups, hence
!SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
+ When turned on, this option adds sys/kernel/hmp directory which
+ contains the following files:
+ up_threshold - the load average threshold used for up migration
+ (0 - 1023)
+ down_threshold - the load average threshold used for down migration
+ (0 - 1023)
+ hmp_domains - a list of cpumasks for the present HMP domains,
+ starting with the 'biggest' and ending with the
+ 'smallest'.
+ Note that both the threshold files can be written at runtime to
+ control scheduler behaviour.
config SCHED_HMP_PRIO_FILTER
bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
@@ -1544,28 +1555,24 @@ config HMP_VARIABLE_SCALE
bool "Allows changing the load tracking scale through sysfs"
depends on SCHED_HMP
help
- When turned on, this option exports the thresholds and load average
- period value for the load tracking patches through sysfs.
+ When turned on, this option exports the load average period value
+ for the load tracking patches through sysfs.
The values can be modified to change the rate of load accumulation
- and the thresholds used for HMP migration.
- The load_avg_period_ms is the time in ms to reach a load average of
- 0.5 for an idle task of 0 load average ratio that start a busy loop.
- The up_threshold and down_threshold is the value to go to a faster
- CPU or to go back to a slower cpu.
- The {up,down}_threshold are devided by 1024 before being compared
- to the load average.
- For examples, with load_avg_period_ms = 128 and up_threshold = 512,
+ used for HMP migration. 'load_avg_period_ms' is the time in ms to
+ reach a load average of 0.5 for an idle task of 0 load average
+ ratio which becomes 100% busy.
+ For example, with load_avg_period_ms = 128 and up_threshold = 512,
a running task with a load of 0 will be migrated to a bigger CPU after
128ms, because after 128ms its load_avg_ratio is 0.5 and the real
up_threshold is 0.5.
This patch has the same behavior as changing the Y of the load
average computation to
(1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
- but it remove intermadiate overflows in computation.
+ but removes intermediate overflows in computation.
config HMP_FREQUENCY_INVARIANT_SCALE
bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
- depends on HMP_VARIABLE_SCALE && CPU_FREQ
+ depends on SCHED_HMP && CPU_FREQ
help
Scales the current load contribution in line with the frequency
of the CPU that the task was executed on.
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 66dc53bca19a..2afcb71857fd 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -580,6 +580,55 @@ TRACE_EVENT(sched_task_usage_ratio,
);
/*
+ * Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations,
+ * marking the forced transition of runnable or running tasks.
+ */
+TRACE_EVENT(sched_hmp_migrate_force_running,
+
+ TP_PROTO(struct task_struct *tsk, int running),
+
+ TP_ARGS(tsk, running),
+
+ TP_STRUCT__entry(
+ __array(char, comm, TASK_COMM_LEN)
+ __field(int, running)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+ __entry->running = running;
+ ),
+
+ TP_printk("running=%d comm=%s",
+ __entry->running, __entry->comm)
+);
+
+/*
+ * Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations,
+ * marking the forced transition of runnable or running
+ * tasks when a task is about to go idle.
+ */
+TRACE_EVENT(sched_hmp_migrate_idle_running,
+
+ TP_PROTO(struct task_struct *tsk, int running),
+
+ TP_ARGS(tsk, running),
+
+ TP_STRUCT__entry(
+ __array(char, comm, TASK_COMM_LEN)
+ __field(int, running)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+ __entry->running = running;
+ ),
+
+ TP_printk("running=%d comm=%s",
+ __entry->running, __entry->comm)
+);
+
+/*
* Tracepoint for HMP (CONFIG_SCHED_HMP) task migrations.
*/
#define HMP_MIGRATE_WAKEUP 0
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 643da90f3a7a..22913a60001d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -31,7 +31,6 @@
#include <linux/task_work.h>
#include <trace/events/sched.h>
-#ifdef CONFIG_HMP_VARIABLE_SCALE
#include <linux/sysfs.h>
#include <linux/vmalloc.h>
#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
@@ -40,7 +39,6 @@
*/
#include <linux/cpufreq.h>
#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
-#endif /* CONFIG_HMP_VARIABLE_SCALE */
#include "sched.h"
@@ -1212,8 +1210,6 @@ static u32 __compute_runnable_contrib(u64 n)
return contrib + runnable_avg_yN_sum[n];
}
-#ifdef CONFIG_HMP_VARIABLE_SCALE
-
#define HMP_VARIABLE_SCALE_SHIFT 16ULL
struct hmp_global_attr {
struct attribute attr;
@@ -1224,6 +1220,7 @@ struct hmp_global_attr {
int *value;
int (*to_sysfs)(int);
int (*from_sysfs)(int);
+ ssize_t (*to_sysfs_text)(char *buf, int buf_size);
};
#define HMP_DATA_SYSFS_MAX 8
@@ -1294,7 +1291,6 @@ struct cpufreq_extents {
static struct cpufreq_extents freq_scale[CONFIG_NR_CPUS];
#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
-#endif /* CONFIG_HMP_VARIABLE_SCALE */
/* We can represent the historical contribution to runnable average as the
* coefficients of a geometric series. To do this we sub-divide our runnable
@@ -1340,9 +1336,8 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
#endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */
delta = now - sa->last_runnable_update;
-#ifdef CONFIG_HMP_VARIABLE_SCALE
+
delta = hmp_variable_scale_convert(delta);
-#endif
/*
* This should only happen when time goes backwards, which it
* unfortunately does during sched clock init when we swap over to TSC.
@@ -3843,7 +3838,6 @@ static inline void hmp_next_down_delay(struct sched_entity *se, int cpu)
cpu_rq(cpu)->avg.hmp_last_up_migration = 0;
}
-#ifdef CONFIG_HMP_VARIABLE_SCALE
/*
* Heterogenous multiprocessor (HMP) optimizations
*
@@ -3876,27 +3870,35 @@ static inline void hmp_next_down_delay(struct sched_entity *se, int cpu)
* The scale factor hmp_data.multiplier is a fixed point
* number: (32-HMP_VARIABLE_SCALE_SHIFT).HMP_VARIABLE_SCALE_SHIFT
*/
-static u64 hmp_variable_scale_convert(u64 delta)
+static inline u64 hmp_variable_scale_convert(u64 delta)
{
+#ifdef CONFIG_HMP_VARIABLE_SCALE
u64 high = delta >> 32ULL;
u64 low = delta & 0xffffffffULL;
low *= hmp_data.multiplier;
high *= hmp_data.multiplier;
return (low >> HMP_VARIABLE_SCALE_SHIFT)
+ (high << (32ULL - HMP_VARIABLE_SCALE_SHIFT));
+#else
+ return delta;
+#endif
}
static ssize_t hmp_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
- ssize_t ret = 0;
struct hmp_global_attr *hmp_attr =
container_of(attr, struct hmp_global_attr, attr);
- int temp = *(hmp_attr->value);
+ int temp;
+
+ if (hmp_attr->to_sysfs_text != NULL)
+ return hmp_attr->to_sysfs_text(buf, PAGE_SIZE);
+
+ temp = *(hmp_attr->value);
if (hmp_attr->to_sysfs != NULL)
temp = hmp_attr->to_sysfs(temp);
- ret = sprintf(buf, "%d\n", temp);
- return ret;
+
+ return (ssize_t)sprintf(buf, "%d\n", temp);
}
static ssize_t hmp_store(struct kobject *a, struct attribute *attr,
@@ -3925,11 +3927,31 @@ static ssize_t hmp_store(struct kobject *a, struct attribute *attr,
return ret;
}
+static ssize_t hmp_print_domains(char *outbuf, int outbufsize)
+{
+ char buf[64];
+ const char nospace[] = "%s", space[] = " %s";
+ const char *fmt = nospace;
+ struct hmp_domain *domain;
+ struct list_head *pos;
+ int outpos = 0;
+ list_for_each(pos, &hmp_domains) {
+ domain = list_entry(pos, struct hmp_domain, hmp_domains);
+ if (cpumask_scnprintf(buf, 64, &domain->possible_cpus)) {
+ outpos += sprintf(outbuf+outpos, fmt, buf);
+ fmt = space;
+ }
+ }
+ strcat(outbuf, "\n");
+ return outpos+1;
+}
+
+#ifdef CONFIG_HMP_VARIABLE_SCALE
static int hmp_period_tofrom_sysfs(int value)
{
return (LOAD_AVG_PERIOD << HMP_VARIABLE_SCALE_SHIFT) / value;
}
-
+#endif
/* max value for threshold is 1024 */
static int hmp_theshold_from_sysfs(int value)
{
@@ -3937,9 +3959,10 @@ static int hmp_theshold_from_sysfs(int value)
return -1;
return value;
}
-#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
-/* freqinvar control is only 0,1 off/on */
-static int hmp_freqinvar_from_sysfs(int value)
+#if defined(CONFIG_SCHED_HMP_LITTLE_PACKING) || \
+ defined(CONFIG_HMP_FREQUENCY_INVARIANT_SCALE)
+/* toggle control is only 0,1 off/on */
+static int hmp_toggle_from_sysfs(int value)
{
if (value < 0 || value > 1)
return -1;
@@ -3959,7 +3982,9 @@ static void hmp_attr_add(
const char *name,
int *value,
int (*to_sysfs)(int),
- int (*from_sysfs)(int))
+ int (*from_sysfs)(int),
+ ssize_t (*to_sysfs_text)(char *, int),
+ umode_t mode)
{
int i = 0;
while (hmp_data.attributes[i] != NULL) {
@@ -3967,13 +3992,17 @@ static void hmp_attr_add(
if (i >= HMP_DATA_SYSFS_MAX)
return;
}
- hmp_data.attr[i].attr.mode = 0644;
+ if (mode)
+ hmp_data.attr[i].attr.mode = mode;
+ else
+ hmp_data.attr[i].attr.mode = 0644;
hmp_data.attr[i].show = hmp_show;
hmp_data.attr[i].store = hmp_store;
hmp_data.attr[i].attr.name = name;
hmp_data.attr[i].value = value;
hmp_data.attr[i].to_sysfs = to_sysfs;
hmp_data.attr[i].from_sysfs = from_sysfs;
+ hmp_data.attr[i].to_sysfs_text = to_sysfs_text;
hmp_data.attributes[i] = &hmp_data.attr[i].attr;
hmp_data.attributes[i + 1] = NULL;
}
@@ -3982,40 +4011,59 @@ static int hmp_attr_init(void)
{
int ret;
memset(&hmp_data, sizeof(hmp_data), 0);
+ hmp_attr_add("hmp_domains",
+ NULL,
+ NULL,
+ NULL,
+ hmp_print_domains,
+ 0444);
+ hmp_attr_add("up_threshold",
+ &hmp_up_threshold,
+ NULL,
+ hmp_theshold_from_sysfs,
+ NULL,
+ 0);
+ hmp_attr_add("down_threshold",
+ &hmp_down_threshold,
+ NULL,
+ hmp_theshold_from_sysfs,
+ NULL,
+ 0);
+#ifdef CONFIG_HMP_VARIABLE_SCALE
/* by default load_avg_period_ms == LOAD_AVG_PERIOD
* meaning no change
*/
hmp_data.multiplier = hmp_period_tofrom_sysfs(LOAD_AVG_PERIOD);
-
hmp_attr_add("load_avg_period_ms",
&hmp_data.multiplier,
hmp_period_tofrom_sysfs,
- hmp_period_tofrom_sysfs);
- hmp_attr_add("up_threshold",
- &hmp_up_threshold,
- NULL,
- hmp_theshold_from_sysfs);
- hmp_attr_add("down_threshold",
- &hmp_down_threshold,
+ hmp_period_tofrom_sysfs,
NULL,
- hmp_theshold_from_sysfs);
+ 0);
+#endif
#ifdef CONFIG_HMP_FREQUENCY_INVARIANT_SCALE
/* default frequency-invariant scaling ON */
hmp_data.freqinvar_load_scale_enabled = 1;
hmp_attr_add("frequency_invariant_load_scale",
&hmp_data.freqinvar_load_scale_enabled,
NULL,
- hmp_freqinvar_from_sysfs);
+ hmp_toggle_from_sysfs,
+ NULL,
+ 0);
#endif
#ifdef CONFIG_SCHED_HMP_LITTLE_PACKING
hmp_attr_add("packing_enable",
&hmp_packing_enabled,
NULL,
- hmp_freqinvar_from_sysfs);
+ hmp_toggle_from_sysfs,
+ NULL,
+ 0);
hmp_attr_add("packing_limit",
&hmp_full_threshold,
NULL,
- hmp_packing_from_sysfs);
+ hmp_packing_from_sysfs,
+ NULL,
+ 0);
#endif
hmp_data.attr_group.name = "hmp";
hmp_data.attr_group.attrs = hmp_data.attributes;
@@ -4024,7 +4072,6 @@ static int hmp_attr_init(void)
return 0;
}
late_initcall(hmp_attr_init);
-#endif /* CONFIG_HMP_VARIABLE_SCALE */
/*
* return the load of the lowest-loaded CPU in a given HMP domain
* min_cpu optionally points to an int to receive the CPU.
@@ -6915,6 +6962,69 @@ out_unlock:
return 0;
}
+/*
+ * Move task in a runnable state to another CPU.
+ *
+ * Tailored on 'active_load_balance_stop_cpu' with slight
+ * modification to locking and pre-transfer checks. Note
+ * rq->lock must be held before calling.
+ */
+static void hmp_migrate_runnable_task(struct rq *rq)
+{
+ struct sched_domain *sd;
+ int src_cpu = cpu_of(rq);
+ struct rq *src_rq = rq;
+ int dst_cpu = rq->push_cpu;
+ struct rq *dst_rq = cpu_rq(dst_cpu);
+ struct task_struct *p = rq->migrate_task;
+ /*
+ * One last check to make sure nobody else is playing
+ * with the source rq.
+ */
+ if (src_rq->active_balance)
+ return;
+
+ if (src_rq->nr_running <= 1)
+ return;
+
+ if (task_rq(p) != src_rq)
+ return;
+ /*
+ * Not sure if this applies here but one can never
+ * be too cautious
+ */
+ BUG_ON(src_rq == dst_rq);
+
+ double_lock_balance(src_rq, dst_rq);
+
+ rcu_read_lock();
+ for_each_domain(dst_cpu, sd) {
+ if (cpumask_test_cpu(src_cpu, sched_domain_span(sd)))
+ break;
+ }
+
+ if (likely(sd)) {
+ struct lb_env env = {
+ .sd = sd,
+ .dst_cpu = dst_cpu,
+ .dst_rq = dst_rq,
+ .src_cpu = src_cpu,
+ .src_rq = src_rq,
+ .idle = CPU_IDLE,
+ };
+
+ schedstat_inc(sd, alb_count);
+
+ if (move_specific_task(&env, p))
+ schedstat_inc(sd, alb_pushed);
+ else
+ schedstat_inc(sd, alb_failed);
+ }
+
+ rcu_read_unlock();
+ double_unlock_balance(src_rq, dst_rq);
+}
+
static DEFINE_SPINLOCK(hmp_force_migration);
/*
@@ -6927,13 +7037,14 @@ static void hmp_force_up_migration(int this_cpu)
struct sched_entity *curr, *orig;
struct rq *target;
unsigned long flags;
- unsigned int force;
+ unsigned int force, got_target;
struct task_struct *p;
if (!spin_trylock(&hmp_force_migration))
return;
for_each_online_cpu(cpu) {
force = 0;
+ got_target = 0;
target = cpu_rq(cpu);
raw_spin_lock_irqsave(&target->lock, flags);
curr = target->cfs.curr;
@@ -6956,15 +7067,14 @@ static void hmp_force_up_migration(int this_cpu)
if (hmp_up_migration(cpu, &target_cpu, curr)) {
if (!target->active_balance) {
get_task_struct(p);
- target->active_balance = 1;
target->push_cpu = target_cpu;
target->migrate_task = p;
- force = 1;
+ got_target = 1;
trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE);
hmp_next_up_delay(&p->se, target->push_cpu);
}
}
- if (!force && !target->active_balance) {
+ if (!got_target && !target->active_balance) {
/*
* For now we just check the currently running task.
* Selecting the lightest task for offloading will
@@ -6975,14 +7085,29 @@ static void hmp_force_up_migration(int this_cpu)
target->push_cpu = hmp_offload_down(cpu, curr);
if (target->push_cpu < NR_CPUS) {
get_task_struct(p);
- target->active_balance = 1;
target->migrate_task = p;
- force = 1;
+ got_target = 1;
trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_OFFLOAD);
hmp_next_down_delay(&p->se, target->push_cpu);
}
}
+ /*
+ * We have a target with no active_balance. If the task
+ * is not currently running move it, otherwise let the
+ * CPU stopper take care of it.
+ */
+ if (got_target && !target->active_balance) {
+ if (!task_running(target, p)) {
+ trace_sched_hmp_migrate_force_running(p, 0);
+ hmp_migrate_runnable_task(target);
+ } else {
+ target->active_balance = 1;
+ force = 1;
+ }
+ }
+
raw_spin_unlock_irqrestore(&target->lock, flags);
+
if (force)
stop_one_cpu_nowait(cpu_of(target),
hmp_active_task_migration_cpu_stop,
@@ -7002,7 +7127,7 @@ static unsigned int hmp_idle_pull(int this_cpu)
int cpu;
struct sched_entity *curr, *orig;
struct hmp_domain *hmp_domain = NULL;
- struct rq *target, *rq;
+ struct rq *target = NULL, *rq;
unsigned long flags, ratio = 0;
unsigned int force = 0;
struct task_struct *p = NULL;
@@ -7054,14 +7179,25 @@ static unsigned int hmp_idle_pull(int this_cpu)
raw_spin_lock_irqsave(&target->lock, flags);
if (!target->active_balance && task_rq(p) == target) {
get_task_struct(p);
- target->active_balance = 1;
target->push_cpu = this_cpu;
target->migrate_task = p;
- force = 1;
trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_IDLE_PULL);
hmp_next_up_delay(&p->se, target->push_cpu);
+ /*
+ * if the task isn't running move it right away.
+ * Otherwise setup the active_balance mechanic and let
+ * the CPU stopper do its job.
+ */
+ if (!task_running(target, p)) {
+ trace_sched_hmp_migrate_idle_running(p, 0);
+ hmp_migrate_runnable_task(target);
+ } else {
+ target->active_balance = 1;
+ force = 1;
+ }
}
raw_spin_unlock_irqrestore(&target->lock, flags);
+
if (force) {
stop_one_cpu_nowait(cpu_of(target),
hmp_idle_pull_cpu_stop,