diff options
-rw-r--r-- | arch/x86/kernel/smp.c | 20 | ||||
-rw-r--r-- | drivers/cpuidle/Kconfig | 4 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle.c | 21 | ||||
-rw-r--r-- | drivers/cpuidle/governors/Makefile | 1 | ||||
-rw-r--r-- | drivers/cpuidle/governors/ladder.c | 11 | ||||
-rw-r--r-- | drivers/cpuidle/governors/menu.c | 15 | ||||
-rw-r--r-- | drivers/cpuidle/governors/select.c | 55 | ||||
-rw-r--r-- | drivers/cpuidle/sysfs.c | 156 | ||||
-rw-r--r-- | include/linux/cpuidle.h | 23 | ||||
-rw-r--r-- | include/linux/sched.h | 21 | ||||
-rw-r--r-- | include/linux/sched/sysctl.h | 8 | ||||
-rw-r--r-- | include/trace/events/io_latency.h | 32 | ||||
-rw-r--r-- | init/Kconfig | 11 | ||||
-rw-r--r-- | kernel/exit.c | 1 | ||||
-rw-r--r-- | kernel/fork.c | 5 | ||||
-rw-r--r-- | kernel/sched/Makefile | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 7 | ||||
-rw-r--r-- | kernel/sched/fair.c | 4 | ||||
-rw-r--r-- | kernel/sched/idle.c | 33 | ||||
-rw-r--r-- | kernel/sched/idle_debug.c | 126 | ||||
-rw-r--r-- | kernel/sched/idle_debug.h | 29 | ||||
-rw-r--r-- | kernel/sched/io_latency.c | 442 | ||||
-rw-r--r-- | kernel/sched/io_latency.h | 38 | ||||
-rw-r--r-- | kernel/sysctl.c | 11 | ||||
-rw-r--r-- | lib/Kconfig.debug | 9 |
25 files changed, 1052 insertions, 33 deletions
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index be8e1bde07aa..d193609beabc 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -31,6 +31,16 @@ #include <asm/apic.h> #include <asm/nmi.h> #include <asm/trace/irq_vectors.h> + +#define CREATE_TRACE_POINTS +/* + * Those were defined in <asm/trace/irq_vectors.h> and cause problems + * when including <trace/events/ipi.h>. + */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#include <trace/events/ipi.h> + /* * Some notes on x86 processor bugs affecting SMP operation: * @@ -124,11 +134,13 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } + trace_ipi_raise(cpumask_of(cpu), tracepoint_string("RESCHEDULE")); apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { + trace_ipi_raise(cpumask_of(cpu), tracepoint_string("CALL_FUNCTION_SINGLE")); apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); } @@ -136,6 +148,8 @@ void native_send_call_func_ipi(const struct cpumask *mask) { cpumask_var_t allbutself; + trace_ipi_raise(mask, tracepoint_string("CALL_FUNCTION")); + if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); return; @@ -252,8 +266,10 @@ finish: */ static inline void __smp_reschedule_interrupt(void) { + trace_ipi_entry(tracepoint_string("RESCHEDULE")); inc_irq_stat(irq_resched_count); scheduler_ipi(); + trace_ipi_exit(tracepoint_string("RESCHEDULE")); } __visible void smp_reschedule_interrupt(struct pt_regs *regs) @@ -291,8 +307,10 @@ __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) static inline void __smp_call_function_interrupt(void) { + trace_ipi_entry(tracepoint_string("CALL_FUNCTION")); generic_smp_call_function_interrupt(); inc_irq_stat(irq_call_count); + trace_ipi_exit(tracepoint_string("CALL_FUNCTION")); } __visible void smp_call_function_interrupt(struct pt_regs *regs) @@ -313,8 +331,10 @@ __visible void smp_trace_call_function_interrupt(struct pt_regs *regs) static inline void __smp_call_function_single_interrupt(void) { + trace_ipi_entry(tracepoint_string("CALL_FUNCTION_SINGLE")); generic_smp_call_function_single_interrupt(); inc_irq_stat(irq_call_count); + trace_ipi_exit(tracepoint_string("CALL_FUNCTION_SINGLE")); } __visible void smp_call_function_single_interrupt(struct pt_regs *regs) diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index c5029c1209b4..6deb2473c69c 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -25,6 +25,10 @@ config CPU_IDLE_GOV_MENU bool "Menu governor (for tickless system)" default y +config CPU_IDLE_GOV_SELECT + bool "Select governor (for tickless system)" + default y + config DT_IDLE_STATES bool diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 125150dc6e81..a79c4db22ffa 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -8,16 +8,12 @@ * This code is licenced under the GPL. */ -#include <linux/clockchips.h> #include <linux/kernel.h> #include <linux/mutex.h> -#include <linux/sched.h> #include <linux/notifier.h> #include <linux/pm_qos.h> #include <linux/cpu.h> #include <linux/cpuidle.h> -#include <linux/ktime.h> -#include <linux/hrtimer.h> #include <linux/module.h> #include <trace/events/power.h> @@ -122,6 +118,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, trace_cpu_idle_rcuidle(index, dev->cpu); time_start = ktime_get(); + target_state->idle_start = ktime_to_us(time_start); + entered_state = target_state->enter(dev, drv, index); time_end = ktime_get(); @@ -134,6 +132,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, if (diff > INT_MAX) diff = INT_MAX; + target_state->idle_start = 0; + dev->last_residency = (int) diff; if (entered_state >= 0) { @@ -143,6 +143,14 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, */ dev->states_usage[entered_state].time += dev->last_residency; dev->states_usage[entered_state].usage++; + if (diff < dev->last_residency) + atomic_inc(&dev->over_estimate); + else if (entered_state < (drv->state_count - 1) && + dev->last_residency < + drv->states[entered_state + 1].target_residency) + atomic_inc(&dev->under_estimate); + else + atomic_inc(&dev->right_estimate); } else { dev->last_residency = 0; } @@ -158,7 +166,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, * * Returns the index of the idle state. */ -int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) +int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, + struct cpuidle_times *times) { if (off || !initialized) return -ENODEV; @@ -169,7 +178,7 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (unlikely(use_deepest_state)) return cpuidle_find_deepest_state(drv, dev); - return cpuidle_curr_governor->select(drv, dev); + return cpuidle_curr_governor->select(drv, dev, times); } /** diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile index 1b512722689f..fa455206fede 100644 --- a/drivers/cpuidle/governors/Makefile +++ b/drivers/cpuidle/governors/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o +obj-$(CONFIG_CPU_IDLE_GOV_SELECT) += select.o diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 06b57c4c4d80..4f5ec8ade22e 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -64,18 +64,13 @@ static inline void ladder_do_selection(struct ladder_device *ldev, * @dev: the CPU */ static int ladder_select_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, + struct cpuidle_times *times) { struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device_state *last_state; + int latency_req = times->latency_req; int last_residency, last_idx = ldev->last_state_idx; - int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); - - /* Special case when user has set very strict latency requirement */ - if (unlikely(latency_req == 0)) { - ladder_do_selection(ldev, last_idx, 0); - return 0; - } last_state = &ldev->states[last_idx]; diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 710a233b9b0d..e360b08ea44a 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -13,10 +13,6 @@ #include <linux/kernel.h> #include <linux/cpuidle.h> #include <linux/pm_qos.h> -#include <linux/time.h> -#include <linux/ktime.h> -#include <linux/hrtimer.h> -#include <linux/tick.h> #include <linux/sched.h> #include <linux/math64.h> #include <linux/module.h> @@ -287,10 +283,11 @@ again: * @drv: cpuidle driver containing state data * @dev: the CPU */ -static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) +static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, + struct cpuidle_times *times) { struct menu_device *data = this_cpu_ptr(&menu_devices); - int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); + int latency_req = times->latency_req; int i; unsigned int interactivity_req; unsigned long nr_iowaiters, cpu_load; @@ -302,12 +299,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1; - /* Special case when user has set very strict latency requirement */ - if (unlikely(latency_req == 0)) - return 0; - /* determine the expected residency time, round up */ - data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length()); + data->next_timer_us = times->next_timer_event; get_iowait_load(&nr_iowaiters, &cpu_load); data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); diff --git a/drivers/cpuidle/governors/select.c b/drivers/cpuidle/governors/select.c new file mode 100644 index 000000000000..2193b7870b34 --- /dev/null +++ b/drivers/cpuidle/governors/select.c @@ -0,0 +1,55 @@ +/* + * select.c - the select governor + * + * Copyright (C) 2014 Daniel Lezcano <daniel.lezcano@linaro.org> + * +*/ + +#include <linux/cpuidle.h> + +static int select(struct cpuidle_driver *drv, struct cpuidle_device *dev, + struct cpuidle_times *times) +{ + int i, index = 0, latency_req = times->latency_req; + unsigned int next_event; + + /* + * If the guessed IO next event is zero, that means there is no IO + * pending, so we ignore it in the equation + */ + next_event = times->next_io_event ? + min(times->next_io_event, times->next_timer_event) : + times->next_timer_event; + + for (i = 0; i < drv->state_count; i++) { + + struct cpuidle_state *s = &drv->states[i]; + struct cpuidle_state_usage *su = &dev->states_usage[i]; + + if (s->disabled || su->disable) + continue; + if (s->target_residency > next_event) + continue; + if (s->exit_latency > latency_req) + continue; + + index = i; + } + + return index; +} + +static struct cpuidle_governor select_governor = { + .name = "select", + .rating = 30, + .select = select, + .owner = THIS_MODULE, +}; + +static int __init select_init(void) +{ + return cpuidle_register_governor(&select_governor); +} + +postcore_initcall(select_init); + diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 97c5903b4606..f446bd0fd9bd 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -439,6 +439,154 @@ static void cpuidle_remove_state_sysfs(struct cpuidle_device *device) cpuidle_free_state_kobj(device, i); } +#define kobj_to_stats_kobj(k) container_of(k, struct cpuidle_stats_kobj, kobj) +#define attr_to_stats_attr(a) container_of(a, struct cpuidle_stats_attr, attr) + +#define define_show_stats_function(_name) \ + static ssize_t show_stats_##_name(struct cpuidle_device *dev, \ + char *buf) \ + { \ + return sprintf(buf, "%d\n", atomic_read(&dev->_name)); \ + } + +#define define_store_stats_function(_name) \ + static ssize_t store_stats_##_name(struct cpuidle_device *dev, \ + const char *buf, size_t size) \ + { \ + unsigned long long value; \ + int err; \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + err = kstrtoull(buf, 0, &value); \ + if (err) \ + return err; \ + \ + atomic_set(&dev->_name, value); \ + return size; \ + } + +#define define_one_stats_rw(_name, show, store) \ + static struct cpuidle_stats_attr attr_stats_##_name = \ + __ATTR(_name, 0644, show, store) + +struct cpuidle_stats_kobj { + struct cpuidle_device *dev; + struct completion kobj_unregister; + struct kobject kobj; +}; + +struct cpuidle_stats_attr { + struct attribute attr; + ssize_t (*show)(struct cpuidle_device *, char *); + ssize_t (*store)(struct cpuidle_device *, const char *, size_t); +}; + +static void cpuidle_stats_sysfs_release(struct kobject *kobj) +{ + struct cpuidle_stats_kobj *stats_kobj = kobj_to_stats_kobj(kobj); + complete(&stats_kobj->kobj_unregister); +} + +static ssize_t cpuidle_stats_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + int ret = -EIO; + struct cpuidle_stats_kobj *stats_kobj = kobj_to_stats_kobj(kobj); + struct cpuidle_stats_attr *dattr = attr_to_stats_attr(attr); + + if (dattr->show) + ret = dattr->show(stats_kobj->dev, buf); + + return ret; +} + +static ssize_t cpuidle_stats_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t size) +{ + int ret = -EIO; + struct cpuidle_stats_kobj *stats_kobj = kobj_to_stats_kobj(kobj); + struct cpuidle_stats_attr *dattr = attr_to_stats_attr(attr); + + if (dattr->store) + ret = dattr->store(stats_kobj->dev, buf, size); + + return ret; +} + +define_show_stats_function(right_estimate); +define_store_stats_function(right_estimate); + +define_show_stats_function(under_estimate); +define_store_stats_function(under_estimate); + +define_show_stats_function(over_estimate); +define_store_stats_function(over_estimate); + +define_one_stats_rw(right_estimate, + show_stats_right_estimate, + store_stats_right_estimate); + +define_one_stats_rw(under_estimate, + show_stats_under_estimate, + store_stats_under_estimate); + +define_one_stats_rw(over_estimate, + show_stats_over_estimate, + store_stats_over_estimate); + +static const struct sysfs_ops cpuidle_stats_sysfs_ops = { + .show = cpuidle_stats_show, + .store = cpuidle_stats_store, +}; + +static struct attribute *cpuidle_stats_default_attrs[] = { + &attr_stats_right_estimate.attr, + &attr_stats_under_estimate.attr, + &attr_stats_over_estimate.attr, + NULL +}; + +static struct kobj_type ktype_stats_cpuidle = { + .sysfs_ops = &cpuidle_stats_sysfs_ops, + .default_attrs = cpuidle_stats_default_attrs, + .release = cpuidle_stats_sysfs_release, +}; + +static int cpuidle_add_stats_sysfs(struct cpuidle_device *dev) +{ + struct cpuidle_stats_kobj *kstats; + struct cpuidle_device_kobj *kdev = dev->kobj_dev; + int ret; + + kstats = kzalloc(sizeof(*kstats), GFP_KERNEL); + if (!kstats) + return -ENOMEM; + + kstats->dev = dev; + init_completion(&kstats->kobj_unregister); + + ret = kobject_init_and_add(&kstats->kobj, &ktype_stats_cpuidle, + &kdev->kobj, "stats"); + if (ret) { + kfree(kstats); + return ret; + } + + kobject_uevent(&kstats->kobj, KOBJ_ADD); + dev->kobj_stats = kstats; + + return ret; +} + +static void cpuidle_remove_stats_sysfs(struct cpuidle_device *dev) +{ + struct cpuidle_stats_kobj *kstats = dev->kobj_stats; + kobject_put(&kstats->kobj); + wait_for_completion(&kstats->kobj_unregister); + kfree(kstats); +} + #ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS #define kobj_to_driver_kobj(k) container_of(k, struct cpuidle_driver_kobj, kobj) #define attr_to_driver_attr(a) container_of(a, struct cpuidle_driver_attr, attr) @@ -589,6 +737,13 @@ int cpuidle_add_device_sysfs(struct cpuidle_device *device) ret = cpuidle_add_driver_sysfs(device); if (ret) cpuidle_remove_state_sysfs(device); + + ret = cpuidle_add_stats_sysfs(device); + if (ret) { + cpuidle_remove_driver_sysfs(device); + cpuidle_remove_state_sysfs(device); + } + return ret; } @@ -598,6 +753,7 @@ int cpuidle_add_device_sysfs(struct cpuidle_device *device) */ void cpuidle_remove_device_sysfs(struct cpuidle_device *device) { + cpuidle_remove_stats_sysfs(device); cpuidle_remove_driver_sysfs(device); cpuidle_remove_state_sysfs(device); } diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 25e0df6155a4..3ac0ded7d75a 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -44,7 +44,7 @@ struct cpuidle_state { int power_usage; /* in mW */ unsigned int target_residency; /* in US */ bool disabled; /* disabled on all CPUs */ - + s64 idle_start; int (*enter) (struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); @@ -62,6 +62,7 @@ struct cpuidle_state { struct cpuidle_device_kobj; struct cpuidle_state_kobj; struct cpuidle_driver_kobj; +struct cpuidle_stats_kobj; struct cpuidle_device { unsigned int registered:1; @@ -74,8 +75,13 @@ struct cpuidle_device { struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_driver_kobj *kobj_driver; struct cpuidle_device_kobj *kobj_dev; + struct cpuidle_stats_kobj *kobj_stats; struct list_head device_list; + atomic_t right_estimate; + atomic_t under_estimate; + atomic_t over_estimate; + #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED int safe_state_index; cpumask_t coupled_cpus; @@ -83,6 +89,12 @@ struct cpuidle_device { #endif }; +struct cpuidle_times { + unsigned int latency_req; + unsigned int next_timer_event; + unsigned int next_io_event; +}; + DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev); @@ -122,7 +134,8 @@ struct cpuidle_driver { extern void disable_cpuidle(void); extern int cpuidle_select(struct cpuidle_driver *drv, - struct cpuidle_device *dev); + struct cpuidle_device *dev, + struct cpuidle_times *times); extern int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index); @@ -150,7 +163,8 @@ extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev) #else static inline void disable_cpuidle(void) { } static inline int cpuidle_select(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, + struct cpuidle_times *times) {return -ENODEV; } static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) @@ -205,7 +219,8 @@ struct cpuidle_governor { struct cpuidle_device *dev); int (*select) (struct cpuidle_driver *drv, - struct cpuidle_device *dev); + struct cpuidle_device *dev, + struct cpuidle_times *times); void (*reflect) (struct cpuidle_device *dev, int index); struct module *owner; diff --git a/include/linux/sched.h b/include/linux/sched.h index e60a100d8713..fc3a7cf107ec 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1284,6 +1284,24 @@ enum perf_event_task_context { perf_nr_task_contexts, }; + +#ifdef CONFIG_SCHED_IO_LATENCY +struct io_latency_node { + struct rb_node node; + unsigned int avg_latency; + ktime_t start_time; + ktime_t end_time; + struct list_head bucket_list; +}; + +void exit_io_latency(struct task_struct *tsk); +#else +static inline void exit_io_latency(struct task_struct *tsk) +{ + ; +} +#endif + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1708,6 +1726,9 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif +#ifdef CONFIG_SCHED_IO_LATENCY + struct io_latency_node io_latency; +#endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 596a0e007c62..2d73bd23e206 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -48,6 +48,14 @@ enum sched_tunable_scaling { }; extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; +#ifdef CONFIG_SMP +extern unsigned int sysctl_sched_energy_option; + +int sched_proc_energy_option_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + extern unsigned int sysctl_numa_balancing_scan_delay; extern unsigned int sysctl_numa_balancing_scan_period_min; extern unsigned int sysctl_numa_balancing_scan_period_max; diff --git a/include/trace/events/io_latency.h b/include/trace/events/io_latency.h new file mode 100644 index 000000000000..ab679fcd8d27 --- /dev/null +++ b/include/trace/events/io_latency.h @@ -0,0 +1,32 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM io_latency + +#if !defined(_TRACE_IO_LATENCY_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IO_LATENCY_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(io_latency_entry, + + TP_PROTO(u64 latency, u64 avg_latency), + + TP_ARGS(latency, avg_latency), + + TP_STRUCT__entry( + __field( u64, latency ) + __field( u64, avg_latency ) + ), + + TP_fast_assign( + __entry->latency = latency; + __entry->avg_latency = avg_latency; + ), + + TP_printk("latency=%llu, avg latency=%llu", + __entry->latency, __entry->avg_latency) +); + +#endif /* _TRACE_IO_LATENCY_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/init/Kconfig b/init/Kconfig index 3ee28ae02cc8..b849c0947dd0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1230,6 +1230,17 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. +config SCHED_IO_LATENCY + bool "IO latency tracking for the scheduler" + depends on SMP + help + This option tracks for each task the io latency average time it has + been blocked on for each cpu. It helps to have more information + regarding how long a cpu will be idle and to take better scheduling + decision. + + If unsure, say Y. + config SYSFS_DEPRECATED bool "Enable deprecated sysfs features to support old userspace tools" depends on SYSFS diff --git a/kernel/exit.c b/kernel/exit.c index 232c4bc8bcc9..8e4e75d5efaa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -759,6 +759,7 @@ void do_exit(long code) exit_task_namespaces(tsk); exit_task_work(tsk); exit_thread(); + exit_io_latency(tsk); /* * Flush inherited counters to the parent - before the parent diff --git a/kernel/fork.c b/kernel/fork.c index 9b7d746d6d62..13b5cbf53628 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -351,7 +351,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) #endif tsk->splice_pipe = NULL; tsk->task_frag.page = NULL; - +#ifdef CONFIG_SCHED_IO_LATENCY + tsk->io_latency.avg_latency = 0; + INIT_LIST_HEAD(&tsk->io_latency.bucket_list); +#endif account_kernel_stack(ti, 1); return tsk; diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 7cd404cd5608..5d5380090741 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -19,4 +19,6 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o +obj-$(CONFIG_SCHED_IO_LATENCY) += io_latency.o +obj-$(CONFIG_SCHED_IDLE_DEBUG) += idle_debug.o obj-$(CONFIG_CPU_FREQ_GOV_ENERGY_MODEL) += energy_model.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 02ec4a5b5fd3..4db493445cd6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -84,6 +84,7 @@ #endif #include "sched.h" +#include "io_latency.h" #include "../workqueue_internal.h" #include "../smpboot.h" @@ -4396,7 +4397,9 @@ void __sched io_schedule(void) atomic_inc(&rq->nr_iowait); blk_flush_plug(current); current->in_iowait = 1; + io_latency_begin(rq, current); schedule(); + io_latency_end(rq, current); current->in_iowait = 0; atomic_dec(&rq->nr_iowait); delayacct_blkio_end(); @@ -4412,7 +4415,9 @@ long __sched io_schedule_timeout(long timeout) atomic_inc(&rq->nr_iowait); blk_flush_plug(current); current->in_iowait = 1; + io_latency_begin(rq, current); ret = schedule_timeout(timeout); + io_latency_end(rq, current); current->in_iowait = 0; atomic_dec(&rq->nr_iowait); delayacct_blkio_end(); @@ -7167,6 +7172,8 @@ void __init sched_init(void) autogroup_init(&init_task); #endif /* CONFIG_CGROUP_SCHED */ + + io_latency_init(); for_each_possible_cpu(i) { struct rq *rq; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 21043e9b31ae..5f3056215e41 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -62,6 +62,10 @@ unsigned int normalized_sysctl_sched_latency = 6000000ULL; enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; +#ifdef CONFIG_SMP +unsigned int sysctl_sched_energy_option = 0; /* Experimental code, disabled by default */ +#endif + /* * Minimal preemption granularity for CPU-bound tasks: * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index c47fce75e666..9fed4d593773 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -4,7 +4,8 @@ #include <linux/sched.h> #include <linux/cpu.h> #include <linux/cpuidle.h> -#include <linux/tick.h> +#include <linux/ktime.h> +#include <linux/pm_qos.h> #include <linux/mm.h> #include <linux/stackprotector.h> @@ -13,6 +14,8 @@ #include <trace/events/power.h> #include "sched.h" +#include "io_latency.h" +#include "idle_debug.h" static int __read_mostly cpu_idle_force_poll; @@ -78,8 +81,9 @@ static void cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + struct cpuidle_times times; int next_state, entered_state; - unsigned int broadcast; + bool broadcast; /* * Check if the idle task must be rescheduled. If it is the @@ -103,11 +107,29 @@ static void cpuidle_idle_call(void) */ rcu_idle_enter(); + times.latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); + /* + * The latency requirement does not allow any latency, jump to + * the default idle function without entering the cpuidle code + */ + if (times.latency_req == 0) + goto use_default; + + /* + * Retrieve the next timer event + */ + times.next_timer_event = ktime_to_us(tick_nohz_get_sleep_length()); + + /* + * Retrieve the next IO guessed event + */ + times.next_io_event = io_latency_get_sleep_length(this_rq()); + /* * Ask the cpuidle framework to choose a convenient idle state. * Fall back to the default arch idle method on errors. */ - next_state = cpuidle_select(drv, dev); + next_state = cpuidle_select(drv, dev, ×); if (next_state < 0) { use_default: /* @@ -160,6 +182,11 @@ use_default: /* The cpu is no longer idle or about to enter idle. */ idle_set_state(this_rq(), NULL); + /* + * Update the prediction rating + */ + idle_debug_prediction_update(drv, dev, ×, entered_state); + if (broadcast) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); diff --git a/kernel/sched/idle_debug.c b/kernel/sched/idle_debug.c new file mode 100644 index 000000000000..9cb17452729d --- /dev/null +++ b/kernel/sched/idle_debug.c @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2014 ARM/Linaro + * + * Author: Daniel Lezcano <daniel.lezcano@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Maintainer: Daniel Lezcano <daniel.lezcano@linaro.org> + */ + +#include <linux/cpuidle.h> +#include <linux/debugfs.h> +#include <linux/atomic.h> +#include <linux/init.h> + +static atomic_t idle_predictions_under_estimate; +static atomic_t idle_predictions_over_estimate; +static atomic_t idle_predictions_success; + +void idle_debug_prediction_update(struct cpuidle_driver *drv, + struct cpuidle_device *dev, + struct cpuidle_times *times, int index) +{ + int residency, target_residency; + int i; + + /* + * The cpuidle driver was not able to enter an idle state, the + * last_residency is then zero and it does not make sense to + * update the predictions accuracy. + */ + residency = dev->last_residency; + if (!residency) + return; + + target_residency = drv->states[index].target_residency; + + /* + * The last residency is smaller than the target residency, we + * overestimated the sleep time. + */ + if (residency < target_residency) { + atomic_inc(&idle_predictions_over_estimate); + return; + } + + /* + * This state is not the deepest one, get the next time residency to + * check if we could have been deeper in idle. + */ + for (i = index + 1; i < drv->state_count; i++) { + + /* + * Ignore the disabled states + */ + if (drv->states[i].disabled || dev->states_usage[i].disable) + continue; + + /* + * Ignore the states which did not fit the latency + * constraint. As the idle states array is ordered, we + * know the deeper idle state will have a greater exit + * latency, so no need to continue the loop because + * none of next idle states will fit the latency + * requirement. + */ + if (drv->states[i].exit_latency > times->latency_req) + break; + + /* + * The residency is greater than the next state's + * target residency. We underestimate the sleep time + * and we could have been sleeping deeper. + */ + if (residency > drv->states[i].target_residency) { + atomic_inc(&idle_predictions_under_estimate); + return; + } + + /* + * No need to continue looking at the deeper idle + * state as their target residency will be greater + * than the last one we compare to. + */ + break; + } + + atomic_inc(&idle_predictions_success); +} + +static int __init idle_debug(void) +{ + struct dentry *dsched, *didle; + int ret = -1; + + dsched = debugfs_create_dir("sched", NULL); + if (!dsched) + return -1; + + didle = debugfs_create_dir("idle", dsched); + if (!didle) + goto out; + + if (!debugfs_create_atomic_t("predictions_under_estimate", 0600, didle, + &idle_predictions_under_estimate)) + goto out; + + if (!debugfs_create_atomic_t("predictions_over_estimate", 0600, didle, + &idle_predictions_over_estimate)) + goto out; + + if (!debugfs_create_atomic_t("predictions_success", 0600, didle, + &idle_predictions_success)) + goto out; + + ret = 0; +out: + if (ret) + debugfs_remove_recursive(dsched); + + return ret; +} + +core_initcall(idle_debug) diff --git a/kernel/sched/idle_debug.h b/kernel/sched/idle_debug.h new file mode 100644 index 000000000000..3fca132d5fc3 --- /dev/null +++ b/kernel/sched/idle_debug.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2014 ARM/Linaro + * + * Author: Daniel Lezcano <daniel.lezcano@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Maintainer: Daniel Lezcano <daniel.lezcano@linaro.org> + */ + +struct cpuidle_device; +struct cpuidle_driver; + +#ifdef CONFIG_SCHED_IDLE_DEBUG +extern void idle_debug_prediction_update(struct cpuidle_driver *drv, + struct cpuidle_device *dev, + struct cpuidle_times *times, + int index); +#else +static inline void idle_debug_prediction_update(struct cpuidle_driver *drv, + struct cpuidle_device *dev, + struct cpuidle_times *times, + int index) +{ + ; +} +#endif diff --git a/kernel/sched/io_latency.c b/kernel/sched/io_latency.c new file mode 100644 index 000000000000..4f902d0c626f --- /dev/null +++ b/kernel/sched/io_latency.c @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2014 ARM/Linaro + * + * Author: Daniel Lezcano <daniel.lezcano@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/percpu.h> +#include <linux/ktime.h> +#include <linux/rbtree.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include "sched.h" + +#define CREATE_TRACE_POINTS +#include <trace/events/io_latency.h> + +struct io_latency_tree { + spinlock_t lock; + struct rb_root tree; + struct io_latency_node *left_most; +}; + +/* + * That represents the resolution of the statistics in usec, the latency + * for a bucket is BUCKET_INTERVAL * index. + * The higher the resolution is the lesser good prediction you will have. + * Some measurements: + * + * For 1ms: + * SSD 6Gb/s : 99.7% + * SD card class 10: 97.7% + * SD card class 4 : 54.3% + * HDD on USB : 93.6% + * + * For 500us: + * SSD 6Gb/s : 99.9% + * SD card class 10 : 96.8% + * SD card class 4 : 55.8% + * HDD on USB : 86.3% + * + * For 200us: + * SSD 6Gb/s : 99.7% + * SD card class 10 : 95.5% + * SD card class 4 : 29.5% + * HDD on USB : 66.3% + * + * For 100us: + * SSD 6Gb/s : 85.7% + * SD card class 10 : 67.63% + * SD card class 4 : 31.4% + * HDD on USB : 44.97% + * + * Aiming a 100% is not necessary good because we want to hit the correct + * idle state. Setting a low resolution will group the different latencies + * into a big interval which may overlap with the cpuidle state target + * residency. + * + */ +#define BUCKET_INTERVAL 200 + +/* + * Number of successive hits for the same bucket. That is the thresold + * triggering the move of the element at the beginning of the list, so + * becoming more weighted for the statistics when guessing for the next + * latency. + */ +#define BUCKET_SUCCESSIVE 5 + +/* + * What is a bucket ? + * + * A bucket is an interval of latency. This interval is defined with the + * BUCKET_INTERVAL. The bucket index gives what latency interval we have. + * For example, if you have an index 2 and a bucket interval of 1000usec, + * then the bucket contains the latencies 2000 and 2999 usec. + * + */ +struct bucket { + int hits; + int successive_hits; + int index; + int average; + struct list_head list; +}; + +static struct kmem_cache *bucket_cachep; + +static DEFINE_PER_CPU(struct io_latency_tree, latency_trees); + +/** + * io_latency_bucket_find - Find a bucket associated with the specified index + * + * @index: the index of the bucket to find + * @tsk: the task to retrieve the task list + * + * Returns the bucket associated with the index, NULL if no bucket is found + */ +static struct bucket *io_latency_bucket_find(struct task_struct *tsk, int index) +{ + struct list_head *list; + struct bucket *bucket = NULL; + struct list_head *bucket_list = &tsk->io_latency.bucket_list; + + list_for_each(list, bucket_list) { + + bucket = list_entry(list, struct bucket, list); + + if (bucket->index == index) + return bucket; + } + + return NULL; +} + +/** + * io_latency_bucket_alloc - Allocate a bucket + * + * @index: index of the bucket to allow + * + * Allocate and initialize a bucket structure + * + * Returns a pointer to a bucket or NULL is the allocation failed + */ +static struct bucket *io_latency_bucket_alloc(int index) +{ + struct bucket *bucket; + + bucket = kmem_cache_alloc(bucket_cachep, GFP_KERNEL); + if (bucket) { + bucket->hits = 0; + bucket->successive_hits = 0; + bucket->index = index; + bucket->average = 0; + INIT_LIST_HEAD(&bucket->list); + } + + return bucket; +} + +/** + * io_latency_guessed_bucket - try to predict the next bucket + * + * @tsk: the task to get the bucket list + * + * The list is ordered by history. The first element is the one with + * the more *successive* hits. This function is called each time a new + * latency is inserted. The algorithm is pretty simple here: As the + * first element is the one which more chance to occur next, its + * weight is the bigger, the second one has less weight, etc ... + * + * The bucket which has the maximum score (number of hits weighted by + * its position in the list) is the next bucket which has more chances + * to occur. + * + * Returns a pointer to the bucket structure, NULL if there are no + * buckets in the list + */ +static struct bucket *io_latency_guessed_bucket(struct task_struct *tsk) +{ + int weight = 0; + int score, score_max = 0; + struct bucket *bucket, *winner = NULL; + struct list_head *list = NULL; + struct list_head *bucket_list = &tsk->io_latency.bucket_list; + + if (list_empty(bucket_list)) + return NULL; + + list_for_each(list, bucket_list) { + + bucket = list_entry(list, struct bucket, list); + + /* + * The list is ordered by history, the first element has + * more weight the next one + */ + score = bucket->hits / ((2 * weight) + 1); + + weight++; + + if (score < score_max) + continue; + + score_max = score; + winner = bucket; + } + + return winner; +} + +/* + * io_latency_bucket_index - Returns the bucket index for the specified latency + * + * @latency: the latency fitting a bucket with the specified index + * + * Returns an integer for the bucket's index + */ +static int io_latency_bucket_index(int latency) +{ + return latency / BUCKET_INTERVAL; +} + +/* + * io_latency_bucket_fill - Compute and fill the bucket list + * + * @tsk: the task completing an IO + * @latency: the latency of the IO + * + * The dynamic of the list is the following. + * - Each new element is inserted at the end of the list + * - Each element passing <BUCKET_SUCCESSIVE> times in this function + * is elected to be moved at the beginning at the list + * + * Returns 0 on success, -1 if a bucket allocation failed + */ +static int io_latency_bucket_fill(struct task_struct *tsk, int latency) +{ + int diff, index = io_latency_bucket_index(latency); + struct bucket *bucket; + + /* + * Find the bucket associated with the index + */ + bucket = io_latency_bucket_find(tsk, index); + if (!bucket) { + bucket = io_latency_bucket_alloc(index); + if (!bucket) + return -1; + + list_add_tail(&bucket->list, &tsk->io_latency.bucket_list); + } + + /* + * Increase the number of times this bucket has been hit + */ + bucket->hits++; + bucket->successive_hits++; + + /* + * Compute a sliding average for latency in this bucket + */ + diff = latency - bucket->average; + bucket->average += (diff >> 6); + + /* + * We hit a successive number of times the same bucket, move + * it at the beginning of the list + */ + if (bucket->successive_hits == BUCKET_SUCCESSIVE) { + list_move(&bucket->list, &tsk->io_latency.bucket_list); + bucket->successive_hits = 1; + } + + return 0; +} + +/* + * exit_io_latency - free ressources when the task exits + * + * @tsk : the exiting task + * + */ +void exit_io_latency(struct task_struct *tsk) +{ + struct list_head *bucket_list = &tsk->io_latency.bucket_list; + struct list_head *tmp, *list; + struct bucket *bucket; + + list_for_each_safe(list, tmp, bucket_list) { + + list_del(list); + bucket = list_entry(list, struct bucket, list); + kmem_cache_free(bucket_cachep, bucket); + } +} + +/** + * io_latency_init : initialization routine + * + * Initializes the cache pool and the io latency rb trees. + */ +void io_latency_init(void) +{ + int cpu; + struct io_latency_tree *latency_tree; + struct rb_root *root; + + bucket_cachep = KMEM_CACHE(bucket, SLAB_PANIC); + + for_each_possible_cpu(cpu) { + latency_tree = &per_cpu(latency_trees, cpu); + latency_tree->left_most = NULL; + spin_lock_init(&latency_tree->lock); + root = &latency_tree->tree; + root->rb_node = NULL; + } +} + +/** + * io_latency_get_sleep_length: compute the expected sleep time + * + * @rq: the runqueue associated with the cpu + * + * Returns the minimal estimated remaining sleep time for the pending IOs + */ +s64 io_latency_get_sleep_length(struct rq *rq) +{ + int cpu = rq->cpu; + struct io_latency_tree *latency_tree = &per_cpu(latency_trees, cpu); + struct io_latency_node *node; + ktime_t now = ktime_get(); + s64 diff; + + node = latency_tree->left_most; + + if (!node) + return 0; + + diff = ktime_to_us(ktime_sub(now, node->start_time)); + diff = node->avg_latency - diff; + + /* Estimation was wrong, return 0 */ + if (diff < 0) + return 0; + + return diff; +} + +/** + * io_latency_avg: compute the io latency sliding average value + * + * @node: a rb tree node belonging to a task + * + */ +static void io_latency_avg(struct task_struct *tsk) +{ + struct io_latency_node *node = &tsk->io_latency; + s64 latency = ktime_to_us(ktime_sub(node->end_time, node->start_time)); + struct bucket *bucket; + + io_latency_bucket_fill(tsk, latency); + + bucket = io_latency_guessed_bucket(tsk); + if (bucket) + node->avg_latency = bucket->average; +} + +/** + * io_latency_begin - insert the node in the rb tree + * + * @rq: the runqueue the task is running on + * @task: the task being blocked on an IO + * + * Inserts the node in the rbtree in an ordered manner. If this task + * has the minimal io latency of all the tasks blocked on IO, it falls + * at the left most node and a shortcut is used. Stores the start + * time of the io schedule. + * + */ +int io_latency_begin(struct rq *rq, struct task_struct *tsk) +{ + int cpu = rq->cpu; + struct io_latency_tree *latency_tree = &per_cpu(latency_trees, cpu); + struct rb_root *root = &latency_tree->tree; + struct io_latency_node *node = &tsk->io_latency; + struct rb_node **new = &root->rb_node, *parent = NULL; + struct io_latency_node *lat; + int leftmost = 1; + + node->start_time = ktime_get(); + + spin_lock(&latency_tree->lock); + + while (*new) { + lat = rb_entry(*new, struct io_latency_node, node); + + parent = *new; + + if (lat->avg_latency > node->avg_latency) + new = &parent->rb_left; + else { + new = &parent->rb_right; + leftmost = 0; + } + } + + if (leftmost) + latency_tree->left_most = node; + + rb_link_node(&node->node, parent, new); + rb_insert_color(&node->node, root); + + spin_unlock(&latency_tree->lock); + + return 0; +} + +/** + * io_latency_end - Removes the node from the rb tree + * + * @rq: the runqueue the task belongs to + * @tsk: the task woken up after an IO completion + * + * Removes the node for the rb tree for this cpu. Update the left most + * node with the next node if itself it is the left most + * node. Retrieves the end time after the io has complete and update + * the io latency average time + */ +void io_latency_end(struct rq *rq, struct task_struct *tsk) +{ + int cpu = rq->cpu; + struct io_latency_tree *latency_tree = &per_cpu(latency_trees, cpu); + struct rb_root *root = &latency_tree->tree; + struct io_latency_node *old = &tsk->io_latency; + + old->end_time = ktime_get(); + + spin_lock(&latency_tree->lock); + + if (latency_tree->left_most == old) { + struct rb_node *next_node = + rb_next(&latency_tree->left_most->node); + latency_tree->left_most = + rb_entry(next_node, struct io_latency_node, node); + } + + rb_erase(&old->node, root); + + spin_unlock(&latency_tree->lock); + + io_latency_avg(tsk); + + trace_io_latency_entry( + ktime_to_us(ktime_sub(old->end_time, old->start_time)), + old->avg_latency); +} diff --git a/kernel/sched/io_latency.h b/kernel/sched/io_latency.h new file mode 100644 index 000000000000..abe063ee6417 --- /dev/null +++ b/kernel/sched/io_latency.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2014 ARM/Linaro + * + * Author: Daniel Lezcano <daniel.lezcano@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Maintainer: Daniel Lezcano <daniel.lezcano@linaro.org> + */ + +#ifdef CONFIG_SCHED_IO_LATENCY +extern void io_latency_init(void); +extern int io_latency_begin(struct rq *rq, struct task_struct *tsk); +extern void io_latency_end(struct rq *rq, struct task_struct *tsk); +extern int io_latency_get_sleep_length(struct rq *rq); +#else +static inline void io_latency_init(void) +{ + ; +} + +static inline int io_latency_begin(struct rq *rq, struct task_struct *tsk) +{ + return 0; +} + +static inline void io_latency_end(struct rq *rq, struct task_struct *tsk) +{ + ; +} + +static inline int io_latency_get_sleep_length(struct rq *rq) +{ + return 0; +} +#endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 15f2511a1b7c..947663d2935a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -284,6 +284,17 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_SMP + { + .procname = "sched_energy_option", + .data = &sysctl_sched_energy_option, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif #ifdef CONFIG_SCHED_DEBUG { .procname = "sched_min_granularity_ns", diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4e35a5d767ed..6e5b5ba795db 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -812,6 +812,15 @@ config SCHED_DEBUG that can help debug the scheduler. The runtime overhead of this option is minimal. +config SCHED_IDLE_DEBUG + bool "Collect idle prediction accuracy" + depends on CPU_IDLE && DEBUG_FS + default n + help + If you say Y here, the /sys/kernel/debug/sched/idle directory + will be provided with some idle predictions statistics success. + The overhead is negligeable + config SCHEDSTATS bool "Collect scheduler statistics" depends on DEBUG_KERNEL && PROC_FS |