aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Guittot <vincent.guittot@linaro.org>2015-02-02 13:38:27 +0100
committerVincent Guittot <vincent.guittot@linaro.org>2015-02-02 13:38:27 +0100
commit535883342fd96de91597048a78158faf8e2c59c6 (patch)
treeb13e25cf53e374416a1efc1b01e185ff8ccf7446
parent15bc600daf64ec7478988611650bb18c5a03664d (diff)
parent30ae92e267453cc86953200c0f78db0b0995d631 (diff)
Merge branch 'test-sched-cpuidle-tc2' into test-sched-all-tc2
-rw-r--r--drivers/acpi/processor_idle.c10
-rw-r--r--drivers/ata/libata-sff.c9
-rw-r--r--drivers/ata/pata_mpiix.c2
-rw-r--r--drivers/cpuidle/Kconfig4
-rw-r--r--drivers/cpuidle/cpuidle.c92
-rw-r--r--drivers/cpuidle/driver.c32
-rw-r--r--drivers/cpuidle/governors/Makefile1
-rw-r--r--drivers/cpuidle/governors/irq.c33
-rw-r--r--drivers/cpuidle/governors/ladder.c32
-rw-r--r--drivers/cpuidle/governors/menu.c205
-rw-r--r--drivers/cpuidle/sysfs.c156
-rw-r--r--drivers/idle/intel_idle.c2
-rw-r--r--include/linux/cpuidle.h35
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--include/linux/irq.h6
-rw-r--r--include/linux/irqdesc.h5
-rw-r--r--include/trace/events/irq.h45
-rw-r--r--kernel/irq/Kconfig5
-rw-r--r--kernel/irq/Makefile1
-rw-r--r--kernel/irq/internals.h18
-rw-r--r--kernel/irq/irqdesc.c2
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/irq/proc.c67
-rw-r--r--kernel/irq/timings.c338
-rw-r--r--kernel/sched/fair.c58
-rw-r--r--kernel/sched/features.h5
-rw-r--r--kernel/sched/idle.c55
27 files changed, 989 insertions, 234 deletions
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 87b704e41877..5d0bcd21a822 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -782,7 +782,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
!pr->flags.has_cst &&
!(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
- return acpi_idle_enter_c1(dev, drv, CPUIDLE_DRIVER_STATE_START);
+ return acpi_idle_enter_c1(dev, drv, 0);
#endif
/*
@@ -830,7 +830,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
!pr->flags.has_cst &&
!(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
- return acpi_idle_enter_c1(dev, drv, CPUIDLE_DRIVER_STATE_START);
+ return acpi_idle_enter_c1(dev, drv, 0);
#endif
if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
@@ -905,7 +905,7 @@ struct cpuidle_driver acpi_idle_driver = {
static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr,
struct cpuidle_device *dev)
{
- int i, count = CPUIDLE_DRIVER_STATE_START;
+ int i, count = 0;
struct acpi_processor_cx *cx;
if (!pr->flags.power_setup_done)
@@ -950,7 +950,7 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr,
*/
static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
{
- int i, count = CPUIDLE_DRIVER_STATE_START;
+ int i, count = 0;
struct acpi_processor_cx *cx;
struct cpuidle_state *state;
struct cpuidle_driver *drv = &acpi_idle_driver;
@@ -985,6 +985,8 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
state->flags = 0;
switch (cx->type) {
case ACPI_STATE_C1:
+ if (cx->entry_method != ACPI_CSTATE_FFH)
+ state->flags |= CPUIDLE_FLAG_TIME_INVALID;
state->enter = acpi_idle_enter_c1;
state->enter_dead = acpi_idle_play_dead;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index db90aa35cb71..062297326f07 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2440,7 +2440,8 @@ int ata_pci_sff_activate_host(struct ata_host *host,
int i;
rc = devm_request_irq(dev, pdev->irq, irq_handler,
- IRQF_SHARED, drv_name, host);
+ IRQF_SHARED | IRQF_TIMINGS,
+ drv_name, host);
if (rc)
goto out;
@@ -2452,7 +2453,8 @@ int ata_pci_sff_activate_host(struct ata_host *host,
} else if (legacy_mode) {
if (!ata_port_is_dummy(host->ports[0])) {
rc = devm_request_irq(dev, ATA_PRIMARY_IRQ(pdev),
- irq_handler, IRQF_SHARED,
+ irq_handler,
+ IRQF_SHARED | IRQF_TIMINGS,
drv_name, host);
if (rc)
goto out;
@@ -2463,7 +2465,8 @@ int ata_pci_sff_activate_host(struct ata_host *host,
if (!ata_port_is_dummy(host->ports[1])) {
rc = devm_request_irq(dev, ATA_SECONDARY_IRQ(pdev),
- irq_handler, IRQF_SHARED,
+ irq_handler,
+ IRQF_SHARED | IRQF_TIMINGS,
drv_name, host);
if (rc)
goto out;
diff --git a/drivers/ata/pata_mpiix.c b/drivers/ata/pata_mpiix.c
index 202b4d601393..b7663242cd60 100644
--- a/drivers/ata/pata_mpiix.c
+++ b/drivers/ata/pata_mpiix.c
@@ -208,7 +208,7 @@ static int mpiix_init_one(struct pci_dev *dev, const struct pci_device_id *id)
ata_sff_std_ports(&ap->ioaddr);
/* activate host */
- return ata_host_activate(host, irq, ata_sff_interrupt, IRQF_SHARED,
+ return ata_host_activate(host, irq, ata_sff_interrupt, IRQF_SHARED | IRQF_TIMINGS,
&mpiix_sht);
}
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index c5029c1209b4..1aae78bc9d95 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -25,6 +25,10 @@ config CPU_IDLE_GOV_MENU
bool "Menu governor (for tickless system)"
default y
+config CPU_IDLE_GOV_IRQ
+ bool "Irq governor (for tickless system)"
+ default y
+
config DT_IDLE_STATES
bool
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 125150dc6e81..1924f4e0a3e7 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -8,16 +8,12 @@
* This code is licenced under the GPL.
*/
-#include <linux/clockchips.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
-#include <linux/sched.h>
#include <linux/notifier.h>
#include <linux/pm_qos.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
-#include <linux/ktime.h>
-#include <linux/hrtimer.h>
#include <linux/module.h>
#include <trace/events/power.h>
@@ -58,7 +54,7 @@ int cpuidle_play_dead(void)
return -ENODEV;
/* Find lowest-power state that supports long-term idle */
- for (i = drv->state_count - 1; i >= CPUIDLE_DRIVER_STATE_START; i--)
+ for (i = drv->state_count - 1; i >= 0; i--)
if (drv->states[i].enter_dead)
return drv->states[i].enter_dead(dev, i);
@@ -81,24 +77,33 @@ void cpuidle_use_deepest_state(bool enable)
}
/**
- * cpuidle_find_deepest_state - Find the state of the greatest exit latency.
+ * cpuidle_find_state - Find an idle state given the constraints
+ *
* @drv: cpuidle driver for a given CPU.
* @dev: cpuidle device for a given CPU.
+ *
+ * Returns an index of the state fulfilling the time constraint passed as
+ * parameter, -1 otherwise
+ *
*/
-static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+int cpuidle_find_state(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ unsigned int sleep_time, unsigned int latency_req)
{
- unsigned int latency_req = 0;
- int i, ret = CPUIDLE_DRIVER_STATE_START - 1;
+ int i, ret = -1;
- for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
+ for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
- if (s->disabled || su->disable || s->exit_latency <= latency_req)
+ if (s->disabled || su->disable)
+ continue;
+
+ if (s->target_residency > sleep_time)
+ continue;
+
+ if (s->exit_latency > latency_req)
continue;
- latency_req = s->exit_latency;
ret = i;
}
return ret;
@@ -116,21 +121,54 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
int entered_state;
struct cpuidle_state *target_state = &drv->states[index];
- ktime_t time_start, time_end;
s64 diff;
trace_cpu_idle_rcuidle(index, dev->cpu);
- time_start = ktime_get();
+ /*
+ * Store the idle start time for this cpu, this information
+ * will be used by cpuidle to measure how long the cpu has
+ * been idle and by the scheduler to prevent to wake it up too
+ * early
+ */
+ target_state->idle_stamp = ktime_to_us(ktime_get());
+
+ /*
+ * The enter the low level idle routine. This call will block
+ * until an interrupt occurs meaning it is the end of the idle
+ * period
+ */
entered_state = target_state->enter(dev, drv, index);
- time_end = ktime_get();
+ /*
+ * Measure as soon as possible the duration of the idle
+ * period. It MUST be done before re-enabling the interrupt in
+ * order to prevent to add in the idle time measurement the
+ * interrupt handling duration
+ */
+ diff = ktime_to_us(ktime_sub_us(ktime_get(), target_state->idle_stamp));
+
+ /*
+ * Reset the idle time stamp as the scheduler thinks the cpu is idle
+ * while it is in the process of waking up
+ */
+ target_state->idle_stamp = 0;
+
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
+ /*
+ * The cpuidle_enter_coupled uses the cpuidle_enter function.
+ * Don't re-enable the interrupts and let the enter_coupled
+ * function to wait for all cpus to sync and to enable the
+ * interrupts again from there
+ */
if (!cpuidle_state_is_coupled(dev, drv, entered_state))
local_irq_enable();
- diff = ktime_to_us(ktime_sub(time_end, time_start));
+ /*
+ * The idle duration will be casted to an integer, prevent to
+ * overflow by setting a boundary to INT_MAX
+ */
if (diff > INT_MAX)
diff = INT_MAX;
@@ -143,6 +181,16 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
*/
dev->states_usage[entered_state].time += dev->last_residency;
dev->states_usage[entered_state].usage++;
+
+ if (diff < drv->states[entered_state].target_residency) {
+ atomic_inc(&dev->over_estimate);
+ } else if (entered_state < (drv->state_count - 1) &&
+ diff >=
+ drv->states[entered_state + 1].target_residency) {
+ atomic_inc(&dev->under_estimate);
+ } else {
+ atomic_inc(&dev->right_estimate);
+ }
} else {
dev->last_residency = 0;
}
@@ -155,10 +203,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
*
* @drv: the cpuidle driver
* @dev: the cpuidle device
+ * @latency_req: the latency constraint when choosing an idle state
+ * @next_timer_event: the duration until the timer expires
*
* Returns the index of the idle state.
*/
-int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ int latency_req, s64 next_timer_event)
{
if (off || !initialized)
return -ENODEV;
@@ -167,9 +218,10 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
return -EBUSY;
if (unlikely(use_deepest_state))
- return cpuidle_find_deepest_state(drv, dev);
+ return cpuidle_find_state(drv, dev, UINT_MAX, UINT_MAX);
- return cpuidle_curr_governor->select(drv, dev);
+ return cpuidle_curr_governor->select(drv, dev, latency_req,
+ next_timer_event);
}
/**
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 2697e87d5b34..16d67aa1c801 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -178,36 +178,6 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
}
}
-#ifdef CONFIG_ARCH_HAS_CPU_RELAX
-static int poll_idle(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index)
-{
- local_irq_enable();
- if (!current_set_polling_and_test()) {
- while (!need_resched())
- cpu_relax();
- }
- current_clr_polling();
-
- return index;
-}
-
-static void poll_idle_init(struct cpuidle_driver *drv)
-{
- struct cpuidle_state *state = &drv->states[0];
-
- snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
- snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
- state->exit_latency = 0;
- state->target_residency = 0;
- state->power_usage = -1;
- state->enter = poll_idle;
- state->disabled = false;
-}
-#else
-static void poll_idle_init(struct cpuidle_driver *drv) {}
-#endif /* !CONFIG_ARCH_HAS_CPU_RELAX */
-
/**
* __cpuidle_register_driver: register the driver
* @drv: a valid pointer to a struct cpuidle_driver
@@ -241,8 +211,6 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv)
on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
(void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1);
- poll_idle_init(drv);
-
return 0;
}
diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile
index 1b512722689f..8804ee2f550c 100644
--- a/drivers/cpuidle/governors/Makefile
+++ b/drivers/cpuidle/governors/Makefile
@@ -4,3 +4,4 @@
obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o
+obj-$(CONFIG_CPU_IDLE_GOV_IRQ) += irq.o
diff --git a/drivers/cpuidle/governors/irq.c b/drivers/cpuidle/governors/irq.c
new file mode 100644
index 000000000000..de99f4545255
--- /dev/null
+++ b/drivers/cpuidle/governors/irq.c
@@ -0,0 +1,33 @@
+/*
+ * irq.c - the irq governor
+ *
+ * Copyright (C) 2014 Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+*/
+#include <linux/ktime.h>
+#include <linux/irq.h>
+#include <linux/cpuidle.h>
+
+static int select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ int latency_req, s64 next_timer_event)
+{
+ s64 next_irq_event = irqt_get_next_prediction(dev->cpu);
+ s64 next_event = next_irq_event ?
+ min(next_irq_event, next_timer_event) : next_timer_event;
+
+ return cpuidle_find_state(drv, dev, next_event, latency_req);
+}
+
+static struct cpuidle_governor irq_governor = {
+ .name = "irq",
+ .rating = 30,
+ .select = select,
+ .owner = THIS_MODULE,
+};
+
+static int __init irq_init(void)
+{
+ return cpuidle_register_governor(&irq_governor);
+}
+
+postcore_initcall(irq_init);
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 401c0106ed34..5a76dff631b9 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -64,22 +64,21 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
* @dev: the CPU
*/
static int ladder_select_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev,
+ int latency_req, s64 next_timer_event)
{
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
struct ladder_device_state *last_state;
int last_residency, last_idx = ldev->last_state_idx;
- int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
-
- /* Special case when user has set very strict latency requirement */
- if (unlikely(latency_req == 0)) {
- ladder_do_selection(ldev, last_idx, 0);
- return 0;
- }
last_state = &ldev->states[last_idx];
- last_residency = cpuidle_get_last_residency(dev) - drv->states[last_idx].exit_latency;
+ if (!(drv->states[last_idx].flags & CPUIDLE_FLAG_TIME_INVALID)) {
+ last_residency = cpuidle_get_last_residency(dev) - \
+ drv->states[last_idx].exit_latency;
+ }
+ else
+ last_residency = last_state->threshold.promotion_time + 1;
/* consider promotion */
if (last_idx < drv->state_count - 1 &&
@@ -96,13 +95,13 @@ static int ladder_select_state(struct cpuidle_driver *drv,
}
/* consider demotion */
- if (last_idx > CPUIDLE_DRIVER_STATE_START &&
+ if (last_idx > 0 &&
(drv->states[last_idx].disabled ||
dev->states_usage[last_idx].disable ||
drv->states[last_idx].exit_latency > latency_req)) {
int i;
- for (i = last_idx - 1; i > CPUIDLE_DRIVER_STATE_START; i--) {
+ for (i = last_idx - 1; i > 0; i--) {
if (drv->states[i].exit_latency <= latency_req)
break;
}
@@ -110,7 +109,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
return i;
}
- if (last_idx > CPUIDLE_DRIVER_STATE_START &&
+ if (last_idx > 0 &&
last_residency < last_state->threshold.demotion_time) {
last_state->stats.demotion_count++;
last_state->stats.promotion_count = 0;
@@ -137,9 +136,9 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
struct ladder_device_state *lstate;
struct cpuidle_state *state;
- ldev->last_state_idx = CPUIDLE_DRIVER_STATE_START;
+ ldev->last_state_idx = 0;
- for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
+ for (i = 0; i < drv->state_count; i++) {
state = &drv->states[i];
lstate = &ldev->states[i];
@@ -151,7 +150,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
if (i < drv->state_count - 1)
lstate->threshold.promotion_time = state->exit_latency;
- if (i > CPUIDLE_DRIVER_STATE_START)
+ if (i > 0)
lstate->threshold.demotion_time = state->exit_latency;
}
@@ -166,8 +165,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
static void ladder_reflect(struct cpuidle_device *dev, int index)
{
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
- if (index > 0)
- ldev->last_state_idx = index;
+ ldev->last_state_idx = index;
}
static struct cpuidle_governor ladder_governor = {
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 40580794e23d..ac2be02de5c6 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -13,10 +13,6 @@
#include <linux/kernel.h>
#include <linux/cpuidle.h>
#include <linux/pm_qos.h>
-#include <linux/time.h>
-#include <linux/ktime.h>
-#include <linux/hrtimer.h>
-#include <linux/tick.h>
#include <linux/sched.h>
#include <linux/math64.h>
#include <linux/module.h>
@@ -188,7 +184,6 @@ static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned lo
static DEFINE_PER_CPU(struct menu_device, menu_devices);
-static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
/* This implements DIV_ROUND_CLOSEST but avoids 64 bit division */
static u64 div_round64(u64 dividend, u32 divisor)
@@ -196,13 +191,87 @@ static u64 div_round64(u64 dividend, u32 divisor)
return div_u64(dividend + (divisor / 2), divisor);
}
+/**
+ * menu_update - attempts to guess what happened after entry
+ * @drv: cpuidle driver containing state data
+ * @dev: the CPU
+ */
+static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+{
+ struct menu_device *data = this_cpu_ptr(&menu_devices);
+ int last_idx = data->last_state_idx;
+ struct cpuidle_state *target = &drv->states[last_idx];
+ unsigned int measured_us;
+ unsigned int new_factor;
+
+ /*
+ * Try to figure out how much time passed between entry to low
+ * power state and occurrence of the wakeup event.
+ *
+ * If the entered idle state didn't support residency measurements,
+ * we are basically lost in the dark how much time passed.
+ * As a compromise, assume we slept for the whole expected time.
+ *
+ * Any measured amount of time will include the exit latency.
+ * Since we are interested in when the wakeup begun, not when it
+ * was completed, we must subtract the exit latency. However, if
+ * the measured amount of time is less than the exit latency,
+ * assume the state was never reached and the exit latency is 0.
+ */
+ if (unlikely(target->flags & CPUIDLE_FLAG_TIME_INVALID)) {
+ /* Use timer value as is */
+ measured_us = data->next_timer_us;
+
+ } else {
+ /* Use measured value */
+ measured_us = cpuidle_get_last_residency(dev);
+
+ /* Deduct exit latency */
+ if (measured_us > target->exit_latency)
+ measured_us -= target->exit_latency;
+
+ /* Make sure our coefficients do not exceed unity */
+ if (measured_us > data->next_timer_us)
+ measured_us = data->next_timer_us;
+ }
+
+ /* Update our correction ratio */
+ new_factor = data->correction_factor[data->bucket];
+ new_factor -= new_factor / DECAY;
+
+ if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
+ new_factor += RESOLUTION * measured_us / data->next_timer_us;
+ else
+ /*
+ * we were idle so long that we count it as a perfect
+ * prediction
+ */
+ new_factor += RESOLUTION;
+
+ /*
+ * We don't want 0 as factor; we always want at least
+ * a tiny bit of estimated time. Fortunately, due to rounding,
+ * new_factor will stay nonzero regardless of measured_us values
+ * and the compiler can eliminate this test as long as DECAY > 1.
+ */
+ if (DECAY == 1 && unlikely(new_factor == 0))
+ new_factor = 1;
+
+ data->correction_factor[data->bucket] = new_factor;
+
+ /* update the repeating-pattern data */
+ data->intervals[data->interval_ptr++] = measured_us;
+ if (data->interval_ptr >= INTERVALS)
+ data->interval_ptr = 0;
+}
+
/*
* Try detecting repeating patterns by keeping track of the last 8
* intervals, and checking if the standard deviation of that set
* of points is below a threshold. If it is... then use the
* average of these 8 points as the estimated value.
*/
-static void get_typical_interval(struct menu_device *data)
+static unsigned int get_typical_interval(struct menu_device *data)
{
int i, divisor;
unsigned int max, thresh;
@@ -259,11 +328,8 @@ again:
if (likely(stddev <= ULONG_MAX)) {
stddev = int_sqrt(stddev);
if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
- || stddev <= 20) {
- if (data->next_timer_us > avg)
- data->predicted_us = avg;
- return;
- }
+ || stddev <= 20)
+ return avg;
}
/*
@@ -276,7 +342,7 @@ again:
* with sporadic activity with a bunch of short pauses.
*/
if ((divisor * 4) <= INTERVALS * 3)
- return;
+ return 0;
thresh = max - 1;
goto again;
@@ -287,12 +353,12 @@ again:
* @drv: cpuidle driver containing state data
* @dev: the CPU
*/
-static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ int latency_req, s64 next_timer_event)
{
struct menu_device *data = this_cpu_ptr(&menu_devices);
- int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
- int i;
unsigned int interactivity_req;
+ unsigned int interactivity_overrride_us;
unsigned long nr_iowaiters, cpu_load;
if (data->needs_update) {
@@ -300,14 +366,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
data->needs_update = 0;
}
- data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
-
- /* Special case when user has set very strict latency requirement */
- if (unlikely(latency_req == 0))
- return 0;
+ data->last_state_idx = 0;
/* determine the expected residency time, round up */
- data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
+ data->next_timer_us = next_timer_event;
get_iowait_load(&nr_iowaiters, &cpu_load);
data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
@@ -321,7 +383,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
data->correction_factor[data->bucket],
RESOLUTION * DECAY);
- get_typical_interval(data);
+ interactivity_overrride_us = get_typical_interval(data);
+ if (interactivity_overrride_us &&
+ data->next_timer_us > interactivity_overrride_us)
+ data->predicted_us = interactivity_overrride_us;
/*
* Performance multiplier defines a minimum predicted idle
@@ -333,31 +398,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
latency_req = interactivity_req;
/*
- * We want to default to C1 (hlt), not to busy polling
- * unless the timer is happening really really soon.
- */
- if (data->next_timer_us > 5 &&
- !drv->states[CPUIDLE_DRIVER_STATE_START].disabled &&
- dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0)
- data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
-
- /*
* Find the idle state with the lowest power while satisfying
* our constraints.
*/
- for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
- struct cpuidle_state *s = &drv->states[i];
- struct cpuidle_state_usage *su = &dev->states_usage[i];
-
- if (s->disabled || su->disable)
- continue;
- if (s->target_residency > data->predicted_us)
- continue;
- if (s->exit_latency > latency_req)
- continue;
-
- data->last_state_idx = i;
- }
+ data->last_state_idx = cpuidle_find_state(drv, dev, data->predicted_us,
+ latency_req);
return data->last_state_idx;
}
@@ -374,77 +419,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
{
struct menu_device *data = this_cpu_ptr(&menu_devices);
data->last_state_idx = index;
- if (index >= 0)
- data->needs_update = 1;
-}
-
-/**
- * menu_update - attempts to guess what happened after entry
- * @drv: cpuidle driver containing state data
- * @dev: the CPU
- */
-static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
-{
- struct menu_device *data = this_cpu_ptr(&menu_devices);
- int last_idx = data->last_state_idx;
- struct cpuidle_state *target = &drv->states[last_idx];
- unsigned int measured_us;
- unsigned int new_factor;
-
- /*
- * Try to figure out how much time passed between entry to low
- * power state and occurrence of the wakeup event.
- *
- * If the entered idle state didn't support residency measurements,
- * we use them anyway if they are short, and if long,
- * truncate to the whole expected time.
- *
- * Any measured amount of time will include the exit latency.
- * Since we are interested in when the wakeup begun, not when it
- * was completed, we must subtract the exit latency. However, if
- * the measured amount of time is less than the exit latency,
- * assume the state was never reached and the exit latency is 0.
- */
-
- /* measured value */
- measured_us = cpuidle_get_last_residency(dev);
-
- /* Deduct exit latency */
- if (measured_us > target->exit_latency)
- measured_us -= target->exit_latency;
-
- /* Make sure our coefficients do not exceed unity */
- if (measured_us > data->next_timer_us)
- measured_us = data->next_timer_us;
-
- /* Update our correction ratio */
- new_factor = data->correction_factor[data->bucket];
- new_factor -= new_factor / DECAY;
-
- if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
- new_factor += RESOLUTION * measured_us / data->next_timer_us;
- else
- /*
- * we were idle so long that we count it as a perfect
- * prediction
- */
- new_factor += RESOLUTION;
-
- /*
- * We don't want 0 as factor; we always want at least
- * a tiny bit of estimated time. Fortunately, due to rounding,
- * new_factor will stay nonzero regardless of measured_us values
- * and the compiler can eliminate this test as long as DECAY > 1.
- */
- if (DECAY == 1 && unlikely(new_factor == 0))
- new_factor = 1;
-
- data->correction_factor[data->bucket] = new_factor;
-
- /* update the repeating-pattern data */
- data->intervals[data->interval_ptr++] = measured_us;
- if (data->interval_ptr >= INTERVALS)
- data->interval_ptr = 0;
+ data->needs_update = 1;
}
/**
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 97c5903b4606..f446bd0fd9bd 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -439,6 +439,154 @@ static void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
cpuidle_free_state_kobj(device, i);
}
+#define kobj_to_stats_kobj(k) container_of(k, struct cpuidle_stats_kobj, kobj)
+#define attr_to_stats_attr(a) container_of(a, struct cpuidle_stats_attr, attr)
+
+#define define_show_stats_function(_name) \
+ static ssize_t show_stats_##_name(struct cpuidle_device *dev, \
+ char *buf) \
+ { \
+ return sprintf(buf, "%d\n", atomic_read(&dev->_name)); \
+ }
+
+#define define_store_stats_function(_name) \
+ static ssize_t store_stats_##_name(struct cpuidle_device *dev, \
+ const char *buf, size_t size) \
+ { \
+ unsigned long long value; \
+ int err; \
+ if (!capable(CAP_SYS_ADMIN)) \
+ return -EPERM; \
+ err = kstrtoull(buf, 0, &value); \
+ if (err) \
+ return err; \
+ \
+ atomic_set(&dev->_name, value); \
+ return size; \
+ }
+
+#define define_one_stats_rw(_name, show, store) \
+ static struct cpuidle_stats_attr attr_stats_##_name = \
+ __ATTR(_name, 0644, show, store)
+
+struct cpuidle_stats_kobj {
+ struct cpuidle_device *dev;
+ struct completion kobj_unregister;
+ struct kobject kobj;
+};
+
+struct cpuidle_stats_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct cpuidle_device *, char *);
+ ssize_t (*store)(struct cpuidle_device *, const char *, size_t);
+};
+
+static void cpuidle_stats_sysfs_release(struct kobject *kobj)
+{
+ struct cpuidle_stats_kobj *stats_kobj = kobj_to_stats_kobj(kobj);
+ complete(&stats_kobj->kobj_unregister);
+}
+
+static ssize_t cpuidle_stats_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ int ret = -EIO;
+ struct cpuidle_stats_kobj *stats_kobj = kobj_to_stats_kobj(kobj);
+ struct cpuidle_stats_attr *dattr = attr_to_stats_attr(attr);
+
+ if (dattr->show)
+ ret = dattr->show(stats_kobj->dev, buf);
+
+ return ret;
+}
+
+static ssize_t cpuidle_stats_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t size)
+{
+ int ret = -EIO;
+ struct cpuidle_stats_kobj *stats_kobj = kobj_to_stats_kobj(kobj);
+ struct cpuidle_stats_attr *dattr = attr_to_stats_attr(attr);
+
+ if (dattr->store)
+ ret = dattr->store(stats_kobj->dev, buf, size);
+
+ return ret;
+}
+
+define_show_stats_function(right_estimate);
+define_store_stats_function(right_estimate);
+
+define_show_stats_function(under_estimate);
+define_store_stats_function(under_estimate);
+
+define_show_stats_function(over_estimate);
+define_store_stats_function(over_estimate);
+
+define_one_stats_rw(right_estimate,
+ show_stats_right_estimate,
+ store_stats_right_estimate);
+
+define_one_stats_rw(under_estimate,
+ show_stats_under_estimate,
+ store_stats_under_estimate);
+
+define_one_stats_rw(over_estimate,
+ show_stats_over_estimate,
+ store_stats_over_estimate);
+
+static const struct sysfs_ops cpuidle_stats_sysfs_ops = {
+ .show = cpuidle_stats_show,
+ .store = cpuidle_stats_store,
+};
+
+static struct attribute *cpuidle_stats_default_attrs[] = {
+ &attr_stats_right_estimate.attr,
+ &attr_stats_under_estimate.attr,
+ &attr_stats_over_estimate.attr,
+ NULL
+};
+
+static struct kobj_type ktype_stats_cpuidle = {
+ .sysfs_ops = &cpuidle_stats_sysfs_ops,
+ .default_attrs = cpuidle_stats_default_attrs,
+ .release = cpuidle_stats_sysfs_release,
+};
+
+static int cpuidle_add_stats_sysfs(struct cpuidle_device *dev)
+{
+ struct cpuidle_stats_kobj *kstats;
+ struct cpuidle_device_kobj *kdev = dev->kobj_dev;
+ int ret;
+
+ kstats = kzalloc(sizeof(*kstats), GFP_KERNEL);
+ if (!kstats)
+ return -ENOMEM;
+
+ kstats->dev = dev;
+ init_completion(&kstats->kobj_unregister);
+
+ ret = kobject_init_and_add(&kstats->kobj, &ktype_stats_cpuidle,
+ &kdev->kobj, "stats");
+ if (ret) {
+ kfree(kstats);
+ return ret;
+ }
+
+ kobject_uevent(&kstats->kobj, KOBJ_ADD);
+ dev->kobj_stats = kstats;
+
+ return ret;
+}
+
+static void cpuidle_remove_stats_sysfs(struct cpuidle_device *dev)
+{
+ struct cpuidle_stats_kobj *kstats = dev->kobj_stats;
+ kobject_put(&kstats->kobj);
+ wait_for_completion(&kstats->kobj_unregister);
+ kfree(kstats);
+}
+
#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
#define kobj_to_driver_kobj(k) container_of(k, struct cpuidle_driver_kobj, kobj)
#define attr_to_driver_attr(a) container_of(a, struct cpuidle_driver_attr, attr)
@@ -589,6 +737,13 @@ int cpuidle_add_device_sysfs(struct cpuidle_device *device)
ret = cpuidle_add_driver_sysfs(device);
if (ret)
cpuidle_remove_state_sysfs(device);
+
+ ret = cpuidle_add_stats_sysfs(device);
+ if (ret) {
+ cpuidle_remove_driver_sysfs(device);
+ cpuidle_remove_state_sysfs(device);
+ }
+
return ret;
}
@@ -598,6 +753,7 @@ int cpuidle_add_device_sysfs(struct cpuidle_device *device)
*/
void cpuidle_remove_device_sysfs(struct cpuidle_device *device)
{
+ cpuidle_remove_stats_sysfs(device);
cpuidle_remove_driver_sysfs(device);
cpuidle_remove_state_sysfs(device);
}
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 9cceacb92f9d..f735355e0df5 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -845,8 +845,6 @@ static int __init intel_idle_cpuidle_driver_init(void)
intel_idle_state_table_update();
- drv->state_count = 1;
-
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
int num_substates, mwait_hint, mwait_cstate;
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index ab70f3bc44ad..e1f4914409b3 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -44,6 +44,7 @@ struct cpuidle_state {
int power_usage; /* in mW */
unsigned int target_residency; /* in US */
bool disabled; /* disabled on all CPUs */
+ u64 idle_stamp;
int (*enter) (struct cpuidle_device *dev,
struct cpuidle_driver *drv,
@@ -53,6 +54,7 @@ struct cpuidle_state {
};
/* Idle State Flags */
+#define CPUIDLE_FLAG_TIME_INVALID (0x01) /* is residency time measurable? */
#define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */
#define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */
@@ -61,6 +63,7 @@ struct cpuidle_state {
struct cpuidle_device_kobj;
struct cpuidle_state_kobj;
struct cpuidle_driver_kobj;
+struct cpuidle_stats_kobj;
struct cpuidle_device {
unsigned int registered:1;
@@ -73,8 +76,13 @@ struct cpuidle_device {
struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
struct cpuidle_driver_kobj *kobj_driver;
struct cpuidle_device_kobj *kobj_dev;
+ struct cpuidle_stats_kobj *kobj_stats;
struct list_head device_list;
+ atomic_t right_estimate;
+ atomic_t under_estimate;
+ atomic_t over_estimate;
+
#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
int safe_state_index;
cpumask_t coupled_cpus;
@@ -88,6 +96,8 @@ DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
/**
* cpuidle_get_last_residency - retrieves the last state's residency time
* @dev: the target CPU
+ *
+ * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_INVALID is set
*/
static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
{
@@ -118,8 +128,14 @@ struct cpuidle_driver {
#ifdef CONFIG_CPU_IDLE
extern void disable_cpuidle(void);
+extern int cpuidle_find_state(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev,
+ unsigned int sleep_time,
+ unsigned int latency_req);
+
extern int cpuidle_select(struct cpuidle_driver *drv,
- struct cpuidle_device *dev);
+ struct cpuidle_device *dev,
+ int latency_req, s64 next_timer_event);
extern int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index);
extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
@@ -146,8 +162,14 @@ extern void cpuidle_use_deepest_state(bool enable);
extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
#else
static inline void disable_cpuidle(void) { }
+static inline int cpuidle_find_state(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev,
+ unsigned int sleep_time,
+ unsigned int latency_req)
+{return -ENODEV; }
static inline int cpuidle_select(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev,
+ int latency_req, s64 next_timer_event)
{return -ENODEV; }
static inline int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index)
@@ -202,7 +224,8 @@ struct cpuidle_governor {
struct cpuidle_device *dev);
int (*select) (struct cpuidle_driver *drv,
- struct cpuidle_device *dev);
+ struct cpuidle_device *dev,
+ int latency_req, s64 next_timer_event);
void (*reflect) (struct cpuidle_device *dev, int index);
struct module *owner;
@@ -215,10 +238,4 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
{return 0;}
#endif
-#ifdef CONFIG_ARCH_HAS_CPU_RELAX
-#define CPUIDLE_DRIVER_STATE_START 1
-#else
-#define CPUIDLE_DRIVER_STATE_START 0
-#endif
-
#endif /* _LINUX_CPUIDLE_H */
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index d9b05b5bf8c7..bb7dddc33918 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -59,6 +59,7 @@
* resume time.
*/
#define IRQF_DISABLED 0x00000020
+#define IRQF_TIMINGS 0x00000040
#define IRQF_SHARED 0x00000080
#define IRQF_PROBE_SHARED 0x00000100
#define __IRQF_TIMER 0x00000200
diff --git a/include/linux/irq.h b/include/linux/irq.h
index d09ec7a1243e..8150c1cfc7f4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -862,4 +862,10 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc,
return readl(gc->reg_base + reg_offset);
}
+#ifdef CONFIG_IRQ_TIMINGS
+extern s64 irqt_get_next_prediction(int cpu);
+#else
+static inline s64 irqt_get_next_prediction(int cpu) { return 0; }
+#endif
+
#endif /* _LINUX_IRQ_H */
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index faf433af425e..3d723a946071 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -14,11 +14,13 @@ struct module;
struct irq_desc;
struct irq_domain;
struct pt_regs;
+struct irqt_stat;
/**
* struct irq_desc - interrupt descriptor
* @irq_data: per irq and chip data passed down to chip functions
* @kstat_irqs: irq stats per cpu
+ * @irq_timings: irq occurrence timing statistics
* @handle_irq: highlevel irq-events handler
* @preflow_handler: handler called before the flow handler (currently used by sparc)
* @action: the irq action chain
@@ -49,6 +51,9 @@ struct pt_regs;
struct irq_desc {
struct irq_data irq_data;
unsigned int __percpu *kstat_irqs;
+#ifdef CONFIG_IRQ_TIMINGS
+ struct irqt_stat *irq_timings;
+#endif
irq_flow_handler_t handle_irq;
#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
irq_preflow_handler_t preflow_handler;
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 3608bebd3d9c..53910cf3917d 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -84,6 +84,51 @@ TRACE_EVENT(irq_handler_exit,
__entry->irq, __entry->ret ? "handled" : "unhandled")
);
+#ifdef CONFIG_IRQ_TIMINGS
+/**
+ * irq_timings - provide updated IRQ timing statistics
+ * @irq: irq number
+ * @interval: time interval since last irq
+ * @variance: time interval variance
+ * @mean: mean interval
+ * @good: current count of predictable irqs
+ * @bad: current count of unpredictable irqs
+ *
+ * Note: variance is provided/listed before the mean value to help with
+ * alignment constraints on 64-bit values.
+ */
+TRACE_EVENT(irq_timings,
+
+ TP_PROTO(int irq, u32 interval, u64 variance, u32 mean,
+ u32 good, u32 bad),
+
+ TP_ARGS(irq, interval, variance, mean, good, bad),
+
+ TP_STRUCT__entry(
+ __field( int, irq )
+ __field( u32, interval )
+ __field( u64, variance )
+ __field( u32, mean )
+ __field( u32, good )
+ __field( u32, bad )
+ ),
+
+ TP_fast_assign(
+ __entry->irq = irq;
+ __entry->interval = interval;
+ __entry->variance = variance;
+ __entry->mean = mean;
+ __entry->good = good;
+ __entry->bad = bad;
+ ),
+
+ TP_printk("irq=%d intv=%u mean=%u variance=%llu (%u vs %u)",
+ __entry->irq, __entry->interval, __entry->mean,
+ (unsigned long long)__entry->variance,
+ __entry->good, __entry->bad)
+);
+#endif
+
DECLARE_EVENT_CLASS(softirq,
TP_PROTO(unsigned int vec_nr),
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 9a76e3beda54..3a134b685552 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -100,4 +100,9 @@ config SPARSE_IRQ
If you don't know what to do here, say N.
+# Support for IRQ timing stats and prediction, mainly for cpuidle usage
+config IRQ_TIMINGS
+ bool
+ default y
+
endmenu
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index d12123526e2b..fad4b6d2768a 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
obj-$(CONFIG_PM_SLEEP) += pm.o
obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
+obj-$(CONFIG_IRQ_TIMINGS) += timings.o
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index df553b0af936..a0e7522b2b4f 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -110,6 +110,23 @@ static inline void unregister_handler_proc(unsigned int irq,
struct irqaction *action) { }
#endif
+#ifdef CONFIG_IRQ_TIMINGS
+extern void __init irqt_init(void);
+extern void irqt_process(unsigned int irq, struct irqt_stat *s);
+static inline void irqt_event(int irq, struct irq_desc *desc)
+{
+ if (desc->irq_timings)
+ irqt_process(irq, desc->irq_timings);
+}
+extern int irqt_register(struct irq_desc *desc);
+extern void irqt_unregister(struct irq_desc *desc);
+#else
+static inline void irqt_init(void) { }
+static inline void irqt_event(int irq, struct irq_desc *desc) { }
+static inline int irqt_register(struct irq_desc *desc) { return 0; }
+static inline void irqt_unregister(struct irq_desc *desc) { }
+#endif
+
extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
extern void irq_set_thread_affinity(struct irq_desc *desc);
@@ -197,6 +214,7 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *d
{
__this_cpu_inc(*desc->kstat_irqs);
__this_cpu_inc(kstat.irqs_sum);
+ irqt_event(irq, desc);
}
#ifdef CONFIG_PM_SLEEP
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 99793b9b6d23..f31471ebce36 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -232,6 +232,7 @@ int __init early_irq_init(void)
int i, initcnt, node = first_online_node;
struct irq_desc *desc;
+ irqt_init();
init_irq_default_affinity();
/* Let arch update nr_irqs and return the nr of preallocated irqs */
@@ -270,6 +271,7 @@ int __init early_irq_init(void)
int count, i, node = first_online_node;
struct irq_desc *desc;
+ irqt_init();
init_irq_default_affinity();
printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 80692373abd6..88b487b355ae 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1231,6 +1231,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
register_handler_proc(irq, new);
free_cpumask_var(mask);
+ if (new->flags & IRQF_TIMINGS)
+ irqt_register(desc);
+
return 0;
mismatch:
@@ -1328,6 +1331,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
if (!desc->action) {
irq_shutdown(desc);
irq_release_resources(desc);
+ irqt_unregister(desc);
}
#ifdef CONFIG_SMP
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 9dc9bfd8a678..4cda809b8512 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -10,8 +10,10 @@
#include <linux/gfp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/uaccess.h>
#include "internals.h"
@@ -283,6 +285,62 @@ static const struct file_operations irq_spurious_proc_fops = {
.release = single_release,
};
+static int irq_timings_proc_show(struct seq_file *m, void *v)
+{
+ struct irq_desc *desc = irq_to_desc((long) m->private);
+
+ seq_printf(m, "%d\n", desc->irq_timings ? 1 : 0);
+
+ return 0;
+}
+
+static int irq_timings_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_timings_proc_show, PDE_DATA(inode));
+}
+
+static ssize_t irq_timings_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *pos)
+{
+ long enable;
+ int ret;
+ int irq = (int)(long)PDE_DATA(file_inode(file));
+ struct irq_desc *desc = irq_to_desc(irq);
+ char *buf;
+
+ buf = kzalloc(count, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ret = -EFAULT;
+ if (copy_from_user(buf, buffer, count))
+ goto out;
+
+ ret = kstrtoul(buf, 0, &enable);
+ if (ret < 0)
+ goto out;
+
+ if (enable) {
+ ret = irqt_register(desc);
+ } else {
+ unsigned long flags;
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ irqt_unregister(desc);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ }
+out:
+ kfree(buf);
+ return ret ? ret : count;
+}
+
+static const struct file_operations irq_timings_proc_fops = {
+ .open = irq_timings_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = irq_timings_proc_write,
+};
+
#define MAX_NAMELEN 128
static int name_unique(unsigned int irq, struct irqaction *new_action)
@@ -358,6 +416,11 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
proc_create_data("spurious", 0444, desc->dir,
&irq_spurious_proc_fops, (void *)(long)irq);
+#ifdef CONFIG_IRQ_TIMINGS
+ /* create /proc/irq/<irq>/timings */
+ proc_create_data("timings", 0644, desc->dir,
+ &irq_timings_proc_fops, (void *)(long)irq);
+#endif
}
void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
@@ -373,7 +436,9 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
remove_proc_entry("node", desc->dir);
#endif
remove_proc_entry("spurious", desc->dir);
-
+#ifdef CONFIG_IRQ_TIMING
+ remove_proc_entry("timing", desc->dir);
+#endif
memset(name, 0, MAX_NAMELEN);
sprintf(name, "%u", irq);
remove_proc_entry(name, root_irq_dir);
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
new file mode 100644
index 000000000000..27994cea4a99
--- /dev/null
+++ b/kernel/irq/timings.c
@@ -0,0 +1,338 @@
+/*
+ * IRQ occurrence timing statistics
+ *
+ * Created by: Nicolas Pitre, November 2014
+ * Copyright: (C) 2014-2015 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/irq.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "internals.h"
+
+#include <trace/events/irq.h>
+
+
+/*
+ * This is the size of the IRQ interval window used to compute the
+ * mean interval and its variance. This has to be at least 3 to still
+ * make sense. Higher values may improve prediction confidence but more
+ * false negatives are to be expected.
+ */
+#define IRQT_INTERVAL_WINDOW 3
+
+
+struct irqt_prediction {
+ struct list_head node;
+ ktime_t time; /* expected occurrence time */
+ int cpu; /* CPU for which this was queued for */
+};
+
+struct irqt_stat {
+ ktime_t last_time; /* previous IRQ occurrence */
+ u64 n_M2; /* IRQ interval variance (n scaled) */
+ u32 n_mean; /* IRQ mean interval (n scaled) */
+ u32 intervals[IRQT_INTERVAL_WINDOW];
+ /* window of recent IRQ intervals */
+ unsigned int w_ptr; /* current window pointer */
+ u32 predictable; /* # of IRQs that were predictable */
+ u32 unpredictable; /* # of IRQs that were not */
+ struct irqt_prediction prediction;
+};
+
+static DEFINE_PER_CPU(struct list_head, irqt_predictions);
+static DEFINE_PER_CPU(raw_spinlock_t, irqt_predictions_lock);
+
+void __init irqt_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ INIT_LIST_HEAD(&per_cpu(irqt_predictions, cpu));
+ raw_spin_lock_init(&per_cpu(irqt_predictions_lock, cpu));
+ }
+}
+
+/*
+ * Purge past events.
+ * Caller must take care of locking.
+ */
+static void irqt_purge(ktime_t now, struct list_head *head)
+{
+ struct irqt_prediction *entry, *n;
+
+ list_for_each_entry_safe(entry, n, head, node) {
+ if (ktime_after(entry->time, now))
+ break;
+ list_del_init(&entry->node);
+ }
+}
+
+/*
+ * Enqueue the next predicted event for this IRQ on this CPU.
+ * We are in interrupt context with IRQs disabled.
+ */
+static void irqt_enqueue_prediction(ktime_t now, struct irqt_stat *s)
+{
+ int this_cpu = raw_smp_processor_id();
+ int prev_cpu = s->prediction.cpu;
+ struct list_head *head = &per_cpu(irqt_predictions, this_cpu);
+ u32 predicted_interval = s->n_mean / IRQT_INTERVAL_WINDOW;
+ struct irqt_prediction *list_entry, *new_entry;
+ raw_spinlock_t *lock;
+
+ if (unlikely(prev_cpu != this_cpu && prev_cpu != -1)) {
+ lock = &per_cpu(irqt_predictions_lock, prev_cpu);
+ raw_spin_lock(lock);
+ list_del_init(&s->prediction.node);
+ raw_spin_unlock(lock);
+ }
+
+ lock = &per_cpu(irqt_predictions_lock, this_cpu);
+ raw_spin_lock(lock);
+ irqt_purge(now, head);
+ __list_del_entry(&s->prediction.node);
+ new_entry = &s->prediction;
+ new_entry->time = ktime_add_us(now, predicted_interval);
+ new_entry->cpu = this_cpu;
+ list_for_each_entry(list_entry, head, node)
+ if (ktime_after(new_entry->time, list_entry->time))
+ break;
+ list_add_tail(&new_entry->node, &list_entry->node);
+ raw_spin_unlock(lock);
+}
+
+/**
+ * irqt_get_next_prediction - get relative time before next predicted IRQ
+ *
+ * @cpu: the CPU number for which a prediction is wanted
+ *
+ * This returns the relative time in microsecs before the next expected IRQ
+ * on given CPU, or zero if no prediction is available. Those predictions
+ * are not guaranteed to be reliable, and guaranteed to fail from time to
+ * time i.e. when the predicted IRQ simply never comes, etc.
+ */
+s64 irqt_get_next_prediction(int cpu)
+{
+ raw_spinlock_t *lock = &per_cpu(irqt_predictions_lock, cpu);
+ struct list_head *head = &per_cpu(irqt_predictions, cpu);
+ unsigned long flags;
+ ktime_t now;
+ struct irqt_prediction *next;
+ s64 result;
+
+ raw_spin_lock_irqsave(lock, flags);
+ now = ktime_get();
+ irqt_purge(now, head);
+ next = list_first_entry_or_null(head, struct irqt_prediction, node);
+ result = next ? ktime_us_delta(next->time, now) : 0;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return result;
+}
+
+/*
+ * irqt_process - update timing interval statistics for the given IRQ
+ *
+ * @irq: the IRQ number
+ * @stat: the corresponding IRQ timing stats record
+ *
+ * This is assumed to be called in IRQ context with desc->lock held and
+ * IRQs turned off.
+ */
+void irqt_process(unsigned int irq, struct irqt_stat *s)
+{
+ ktime_t now = ktime_get();
+ ktime_t ktime_interval = ktime_sub(now, s->last_time);
+ u32 oldX, newX, n = IRQT_INTERVAL_WINDOW;
+ s32 delta, n_dold, n_dnew;
+
+ s->last_time = now;
+
+ /* An interval needs at least two events */
+ if (unlikely(ktime_equal(now, ktime_interval)))
+ return;
+
+ /*
+ * There is no point attempting predictions on interrupts more
+ * than 1 second apart. This has no benefit for sleep state
+ * selection and increases the risk of overflowing our variance
+ * computation. Reset all stats in that case.
+ */
+ if (unlikely(ktime_after(ktime_interval, ktime_set(1, 0)))) {
+ s->n_mean = 0;
+ return;
+ }
+
+ /* microsecs is good enough */
+ newX = ktime_to_us(ktime_interval);
+
+ /* Seed the stats with the first interval */
+ if (unlikely(!s->n_mean)) {
+ int i;
+ s->n_M2 = 0;
+ s->n_mean = newX * n;
+ for (i = 0; i < IRQT_INTERVAL_WINDOW; i++)
+ s->intervals[i] = newX;
+ s->predictable = s->unpredictable = 0;
+ return;
+ }
+
+ /* Replace the oldest interval in our window */
+ oldX = s->intervals[s->w_ptr];
+ s->intervals[s->w_ptr] = newX;
+ s->w_ptr = (s->w_ptr + 1) % IRQT_INTERVAL_WINDOW;
+
+ /*
+ * The variance gives us an instantaneous deviation from the
+ * mean interval value. Given x a new inter-IRQ interval and n the
+ * number of such intervals to date:
+ *
+ * n = n + 1
+ * delta = x - mean
+ * mean = mean + delta/n
+ * M2 = M2 + delta*(x - mean)
+ *
+ * variance = M2/(n - 1)
+ *
+ * We want to update the variance over a window of recent intervals
+ * in order to stay current with changing IRQ patterns. To remove
+ * the contribution from a sample x:
+ *
+ * n = n - 1
+ * delta = x - mean
+ * mean = mean - delta/n
+ * M2 = M2 - delta*(x - mean)
+ *
+ * Combining those equations, we update both the mean and
+ * variance by removing the contribution from the oldest window
+ * sample and adding the latest one at the same time:
+ *
+ * delta = newX - oldX
+ * dold = oldX - mean
+ * mean = mean + delta/n
+ * dnew = newX - mean
+ * M2 = M2 + delta * (dold + dnew)
+ *
+ * Ref:
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+ *
+ * However this is unstable if performed with integer math due to
+ * the accumulation of bit truncation errors caused by the division.
+ * To avoid that, let's factor out the division. Assuming
+ * n_mean = n * mean:
+ *
+ * delta = newX - oldX
+ * n_dold = n * oldX - n_mean
+ * n_mean = n_mean + delta
+ * n_dnew = n * newX - n_mean
+ * n_M2 = n_M2 + delta * (n_dold + n_dnew)
+ *
+ * variance = n_M2/n / (n - 1)
+ *
+ * To make things as efficient as possible, we keep our window
+ * size constant: n = IRQT_INTERVAL_WINDOW.
+ */
+ delta = newX - oldX;
+ n_dold = n*oldX - s->n_mean;
+ s->n_mean += delta;
+ n_dnew = n*newX - s->n_mean;
+ s->n_M2 += (s64)delta * (n_dold + n_dnew);
+
+ /*
+ * Let's determine if this interrupt actually happened after a
+ * periodic interval. We treat a standard deviation greater than
+ * the mean value as a signal that the current interval is no longer
+ * stable enough to be predictable.
+ *
+ * mean < SD --> mean < sqrt(variance) --> mean^2 < variance
+ *
+ * n_mean/n * n_mean/n < n_M2/n / (n - 1) -->
+ * n_mean * n_mean * (n - 1) < n_M2 * n
+ */
+ if ((u64)s->n_mean * s->n_mean * (n - 1) > s->n_M2 * n) {
+ s->predictable++;
+ if (s->predictable >= IRQT_INTERVAL_WINDOW)
+ irqt_enqueue_prediction(now, s);
+ } else {
+ s->predictable = 0;
+ s->unpredictable++;
+ }
+
+ trace_irq_timings(irq, newX, div_u64(s->n_M2, n*(n-1)), s->n_mean/n,
+ s->predictable, s->unpredictable);
+}
+
+/*
+ * Called from __setup_irq() after successful registration of a new action
+ * handler.
+ */
+int irqt_register(struct irq_desc *desc)
+{
+ struct irqt_stat *s;
+ unsigned long flags;
+ int ret;
+
+ if (desc->irq_timings)
+ return 0;
+
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&s->prediction.node);
+ s->prediction.cpu = -1;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ if (desc->irq_timings) {
+ /* someone else raced ahead of us */
+ ret = 0;
+ } else if (!desc->action) {
+ /* unused IRQ? */
+ ret = -ENXIO;
+ } else if (irq_settings_is_per_cpu(desc)) {
+ /* we're not set for per-CPU accounting */
+ pr_warn("IRQ %d: can't do timing stats on per-CPU IRQs\n",
+ desc->action->irq);
+ ret = -ENOSYS;
+ } else {
+ desc->irq_timings = s;
+ s = NULL;
+ ret = 0;
+ }
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ if (s)
+ kfree(s);
+ return ret;
+}
+
+/*
+ * Called from __free_irq() when there is no longer any handler attached
+ * to the IRQ descriptor. Must be called with desc->lock held.
+ */
+void irqt_unregister(struct irq_desc *desc)
+{
+ struct irqt_stat *s;
+ int cpu;
+ raw_spinlock_t *lock;
+
+ assert_raw_spin_locked(&desc->lock);
+ if (!desc->irq_timings)
+ return;
+ s = desc->irq_timings;
+ desc->irq_timings = NULL;
+ cpu = s->prediction.cpu;
+ if (cpu != -1) {
+ lock = &per_cpu(irqt_predictions_lock, cpu);
+ raw_spin_lock(lock);
+ __list_del_entry(&s->prediction.node);
+ raw_spin_unlock(lock);
+ }
+ kfree(s);
+}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b4976ab65b39..ce4b31bc7f28 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4968,21 +4968,45 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
if (idle_cpu(i)) {
struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
- if (idle && idle->exit_latency < min_exit_latency) {
+
+ if (idle) {
+
/*
- * We give priority to a CPU whose idle state
- * has the smallest exit latency irrespective
- * of any idle timestamp.
+ * When we want to save energy, exclude cpu which did not reach
+ * the break even point in the idle state
*/
- min_exit_latency = idle->exit_latency;
- latest_idle_timestamp = rq->idle_stamp;
- shallowest_idle_cpu = i;
- } else if ((!idle || idle->exit_latency == min_exit_latency) &&
- rq->idle_stamp > latest_idle_timestamp) {
+ if (sched_feat(ENERGY_IDLE) &&
+ ((ktime_to_us(ktime_get()) - idle->idle_stamp <
+ idle->target_residency)))
+ continue;
+
+ if (idle->exit_latency < min_exit_latency) {
+ /*
+ * We give priority to a CPU
+ * whose idle state has the
+ * smallest exit latency
+ * irrespective of any idle
+ * timestamp.
+ */
+ min_exit_latency = idle->exit_latency;
+ latest_idle_timestamp = idle->idle_stamp;
+ shallowest_idle_cpu = i;
+ } else if (idle->exit_latency == min_exit_latency &&
+ idle->idle_stamp > latest_idle_timestamp) {
+ /*
+ * If the CPU is in the same
+ * idle state, choose the more
+ * recent one as it might have
+ * a warmer cache
+ */
+ latest_idle_timestamp = idle->idle_stamp;
+ shallowest_idle_cpu = i;
+ }
+ } else if (rq->idle_stamp > latest_idle_timestamp) {
/*
- * If equal or no active idle state, then
- * the most recently idled CPU might have
- * a warmer cache.
+ * If no active idle state, then the
+ * most recent idled CPU might have a
+ * warmer cache
*/
latest_idle_timestamp = rq->idle_stamp;
shallowest_idle_cpu = i;
@@ -4996,7 +5020,15 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
}
}
- return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
+ /*
+ * If there is a non idle cpu different from the current one,
+ * let's use it if we want to save energy by not waking up an
+ * idle cpu, otherwise let's use the shallowest idle cpu
+ */
+ if (sched_feat(ENERGY_IDLE) && least_loaded_cpu != this_cpu)
+ return least_loaded_cpu;
+ else
+ return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
}
/*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 90284d117fe6..b14f8ebcf4b1 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -61,6 +61,11 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
SCHED_FEAT(LB_MIN, false)
/*
+ * Apply energy saving agressive policy when idle
+ */
+SCHED_FEAT(ENERGY_IDLE, true)
+
+/*
* Apply the automatic NUMA scheduling policy. Enabled automatically
* at runtime if running on a NUMA machine. Can be controlled via
* numa_balancing=
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index c47fce75e666..ffef99b2ad03 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -5,6 +5,7 @@
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/tick.h>
+#include <linux/pm_qos.h>
#include <linux/mm.h>
#include <linux/stackprotector.h>
@@ -42,18 +43,6 @@ static int __init cpu_idle_nopoll_setup(char *__unused)
__setup("hlt", cpu_idle_nopoll_setup);
#endif
-static inline int cpu_idle_poll(void)
-{
- rcu_idle_enter();
- trace_cpu_idle_rcuidle(0, smp_processor_id());
- local_irq_enable();
- while (!tif_need_resched())
- cpu_relax();
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
- rcu_idle_exit();
- return 1;
-}
-
/* Weak implementations for optional arch specific functions */
void __weak arch_cpu_idle_prepare(void) { }
void __weak arch_cpu_idle_enter(void) { }
@@ -65,6 +54,23 @@ void __weak arch_cpu_idle(void)
local_irq_enable();
}
+void __weak arch_cpu_idle_poll(void)
+{
+ local_irq_enable();
+ while (!tif_need_resched())
+ cpu_relax();
+}
+
+static inline int cpu_idle_poll(void)
+{
+ rcu_idle_enter();
+ trace_cpu_idle_rcuidle(0, smp_processor_id());
+ arch_cpu_idle_poll();
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
+ rcu_idle_exit();
+ return 1;
+}
+
/**
* cpuidle_idle_call - the main idle function
*
@@ -74,7 +80,7 @@ void __weak arch_cpu_idle(void)
* set, and it returns with polling set. If it ever stops polling, it
* must clear the polling bit.
*/
-static void cpuidle_idle_call(void)
+static void cpuidle_idle_call(unsigned int latency_req, s64 next_timer_event)
{
struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
@@ -107,7 +113,7 @@ static void cpuidle_idle_call(void)
* Ask the cpuidle framework to choose a convenient idle state.
* Fall back to the default arch idle method on errors.
*/
- next_state = cpuidle_select(drv, dev);
+ next_state = cpuidle_select(drv, dev, latency_req, next_timer_event);
if (next_state < 0) {
use_default:
/*
@@ -166,7 +172,8 @@ use_default:
/*
* Give the governor an opportunity to reflect on the outcome
*/
- cpuidle_reflect(dev, entered_state);
+ if (entered_state >= 0)
+ cpuidle_reflect(dev, entered_state);
exit_idle:
__current_set_polling();
@@ -188,6 +195,9 @@ exit_idle:
*/
static void cpu_idle_loop(void)
{
+ unsigned int latency_req;
+ s64 next_timer_event;
+
while (1) {
/*
* If the arch has a polling bit, we maintain an invariant:
@@ -211,19 +221,30 @@ static void cpu_idle_loop(void)
local_irq_disable();
arch_cpu_idle_enter();
+ latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
+
+ next_timer_event =
+ ktime_to_us(tick_nohz_get_sleep_length());
+
/*
* In poll mode we reenable interrupts and spin.
*
+ * If the latency req is zero, we don't want to
+ * enter any idle state and we jump to the poll
+ * function directly
+ *
* Also if we detected in the wakeup from idle
* path that the tick broadcast device expired
* for us, we don't want to go deep idle as we
* know that the IPI is going to arrive right
* away
*/
- if (cpu_idle_force_poll || tick_check_broadcast_expired())
+ if (!latency_req || cpu_idle_force_poll ||
+ tick_check_broadcast_expired())
cpu_idle_poll();
else
- cpuidle_idle_call();
+ cpuidle_idle_call(latency_req,
+ next_timer_event);
arch_cpu_idle_exit();
}