diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 59 | ||||
-rw-r--r-- | kernel/cpu.c | 24 | ||||
-rw-r--r-- | kernel/debug/kdb/kdb_io.c | 12 | ||||
-rw-r--r-- | kernel/fork.c | 3 | ||||
-rw-r--r-- | kernel/panic.c | 8 | ||||
-rw-r--r-- | kernel/power/Kconfig | 16 | ||||
-rw-r--r-- | kernel/power/Makefile | 3 | ||||
-rw-r--r-- | kernel/power/process.c | 22 | ||||
-rw-r--r-- | kernel/power/suspend.c | 37 | ||||
-rw-r--r-- | kernel/power/suspend_time.c | 111 | ||||
-rw-r--r-- | kernel/power/wakeup_reason.c | 225 | ||||
-rw-r--r-- | kernel/printk/printk.c | 8 | ||||
-rw-r--r-- | kernel/sched/core.c | 15 | ||||
-rw-r--r-- | kernel/sched/fair.c | 1 | ||||
-rw-r--r-- | kernel/sys.c | 173 | ||||
-rw-r--r-- | kernel/sysctl.c | 39 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 3 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/gpu-traces.c | 23 | ||||
-rw-r--r-- | kernel/trace/trace.c | 99 | ||||
-rw-r--r-- | kernel/trace/trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 43 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 184 | ||||
-rw-r--r-- | kernel/watchdog.c | 123 |
24 files changed, 1178 insertions, 56 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 359da3abb004..0e98a35e6b6a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2375,6 +2375,44 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, return ret; } +int subsys_cgroup_allow_attach(struct cgroup_subsys_state *css, struct cgroup_taskset *tset) +{ + const struct cred *cred = current_cred(), *tcred; + struct task_struct *task; + + if (capable(CAP_SYS_NICE)) + return 0; + + cgroup_taskset_for_each(task, tset) { + tcred = __task_cred(task); + + if (current != task && !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->euid, tcred->suid)) + return -EACCES; + } + + return 0; +} + +static int cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +{ + struct cgroup_subsys_state *css; + int i; + int ret; + + for_each_css(css, i, cgrp) { + if (css->ss->allow_attach) { + ret = css->ss->allow_attach(css, tset); + if (ret) + return ret; + } else { + return -EACCES; + } + } + + return 0; +} + /* * Find the task_struct of the task to attach by vpid and pass it along to the * function to attach either it or all tasks in its threadgroup. Will lock @@ -2413,9 +2451,24 @@ retry_find_task: if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && !uid_eq(cred->euid, tcred->suid)) { - rcu_read_unlock(); - ret = -EACCES; - goto out_unlock_cgroup; + /* + * if the default permission check fails, give each + * cgroup a chance to extend the permission check + */ + struct cgroup_taskset tset = { + .src_csets = LIST_HEAD_INIT(tset.src_csets), + .dst_csets = LIST_HEAD_INIT(tset.dst_csets), + .csets = &tset.src_csets, + }; + struct css_set *cset; + cset = task_css_set(tsk); + list_add(&cset->mg_node, &tset.src_csets); + ret = cgroup_allow_attach(cgrp, &tset); + list_del(&tset.src_csets); + if (ret) { + rcu_read_unlock(); + goto out_unlock_cgroup; + } } } else tsk = current; diff --git a/kernel/cpu.c b/kernel/cpu.c index 94bbe4695232..3ba480f598a2 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -23,6 +23,8 @@ #include <linux/tick.h> #include <trace/events/power.h> +#include <trace/events/sched.h> + #include "smpboot.h" #ifdef CONFIG_SMP @@ -428,6 +430,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) out_release: cpu_hotplug_done(); + trace_sched_cpu_hotplug(cpu, err, 0); if (!err) cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu); return err; @@ -531,6 +534,7 @@ out_notify: __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); out: cpu_hotplug_done(); + trace_sched_cpu_hotplug(cpu, ret, 1); return ret; } @@ -823,3 +827,23 @@ void init_cpu_online(const struct cpumask *src) { cpumask_copy(to_cpumask(cpu_online_bits), src); } + +static ATOMIC_NOTIFIER_HEAD(idle_notifier); + +void idle_notifier_register(struct notifier_block *n) +{ + atomic_notifier_chain_register(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_register); + +void idle_notifier_unregister(struct notifier_block *n) +{ + atomic_notifier_chain_unregister(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_unregister); + +void idle_notifier_call_chain(unsigned long val) +{ + atomic_notifier_call_chain(&idle_notifier, val, NULL); +} +EXPORT_SYMBOL_GPL(idle_notifier_call_chain); diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index fc1ef736253c..0b891286a150 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize) int i; int diag, dtab_count; int key; - + static int last_crlf; diag = kdbgetintenv("DTABCOUNT", &dtab_count); if (diag) @@ -237,6 +237,9 @@ poll_again: return buffer; if (key != 9) tab = 0; + if (key != 10 && key != 13) + last_crlf = 0; + switch (key) { case 8: /* backspace */ if (cp > buffer) { @@ -254,7 +257,12 @@ poll_again: *cp = tmp; } break; - case 13: /* enter */ + case 10: /* new line */ + case 13: /* carriage return */ + /* handle \n after \r */ + if (last_crlf && last_crlf != key) + break; + last_crlf = key; *lastchar++ = '\n'; *lastchar++ = '\0'; if (!KDB_STATE(KGDB_TRANS)) { diff --git a/kernel/fork.c b/kernel/fork.c index 8209fa2d36ef..bd0f76fd95be 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -791,7 +791,8 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) mm = get_task_mm(task); if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { + !ptrace_may_access(task, mode) && + !capable(CAP_SYS_RESOURCE)) { mmput(mm); mm = ERR_PTR(-EACCES); } diff --git a/kernel/panic.c b/kernel/panic.c index a4f7820f5930..3c329009f9be 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -28,6 +28,9 @@ #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 +/* Machine specific panic information string */ +char *mach_panic_string; + int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; static unsigned long tainted_mask; static int pause_on_oops; @@ -411,6 +414,11 @@ late_initcall(init_oops_id); void print_oops_end_marker(void) { init_oops_id(); + + if (mach_panic_string) + printk(KERN_WARNING "Board Information: %s\n", + mach_panic_string); + pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id); } diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 9e302315e33d..b85b931c0e5f 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -18,6 +18,15 @@ config SUSPEND_FREEZER Turning OFF this setting is NOT recommended! If in doubt, say Y. +config WAKELOCK + bool "Android's method of preventing suspend" + default y + ---help--- + This allows applications to prevent the CPU from suspending while + they need it. + + Say Y if you are running an android userspace. + config HIBERNATE_CALLBACKS bool @@ -301,3 +310,10 @@ config PM_GENERIC_DOMAINS_OF config CPU_PM bool + +config SUSPEND_TIME + bool "Log time spent in suspend" + ---help--- + Prints the time spent in suspend in the kernel log, and + keeps statistics on the time spent in suspend in + /sys/kernel/debug/suspend_time diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 29472bff11ef..74c713ba61b0 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -11,5 +11,8 @@ obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o +obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o + +obj-$(CONFIG_SUSPEND) += wakeup_reason.o diff --git a/kernel/power/process.c b/kernel/power/process.c index 564f786df470..e7f1f736a5b6 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -18,6 +18,7 @@ #include <linux/workqueue.h> #include <linux/kmod.h> #include <trace/events/power.h> +#include <linux/wakeup_reason.h> /* * Timeout for stopping processes @@ -35,6 +36,9 @@ static int try_to_freeze_tasks(bool user_only) unsigned int elapsed_msecs; bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; +#ifdef CONFIG_PM_SLEEP + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; +#endif do_gettimeofday(&start); @@ -64,6 +68,11 @@ static int try_to_freeze_tasks(bool user_only) break; if (pm_wakeup_pending()) { +#ifdef CONFIG_PM_SLEEP + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); +#endif wakeup = true; break; } @@ -83,15 +92,17 @@ static int try_to_freeze_tasks(bool user_only) do_div(elapsed_msecs64, NSEC_PER_MSEC); elapsed_msecs = elapsed_msecs64; - if (todo) { + if (wakeup) { pr_cont("\n"); - pr_err("Freezing of tasks %s after %d.%03d seconds " - "(%d tasks refusing to freeze, wq_busy=%d):\n", - wakeup ? "aborted" : "failed", + pr_err("Freezing of tasks aborted after %d.%03d seconds", + elapsed_msecs / 1000, elapsed_msecs % 1000); + } else if (todo) { + pr_cont("\n"); + pr_err("Freezing of tasks failed after %d.%03d seconds" + " (%d tasks refusing to freeze, wq_busy=%d):\n", elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); - if (!wakeup) { read_lock(&tasklist_lock); for_each_process_thread(g, p) { if (p != current && !freezer_should_skip(p) @@ -99,7 +110,6 @@ static int try_to_freeze_tasks(bool user_only) sched_show_task(p); } read_unlock(&tasklist_lock); - } } else { pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, elapsed_msecs % 1000); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 8d7a1ef72758..3320efb7b655 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -26,9 +26,11 @@ #include <linux/suspend.h> #include <linux/syscore_ops.h> #include <linux/ftrace.h> +#include <linux/rtc.h> #include <trace/events/power.h> #include <linux/compiler.h> #include <linux/moduleparam.h> +#include <linux/wakeup_reason.h> #include "power.h" @@ -309,7 +311,8 @@ void __weak arch_suspend_enable_irqs(void) */ static int suspend_enter(suspend_state_t state, bool *wakeup) { - int error; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; + int error, last_dev; error = platform_suspend_prepare(state); if (error) @@ -317,7 +320,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) error = dpm_suspend_late(PMSG_SUSPEND); if (error) { + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; printk(KERN_ERR "PM: late suspend of devices failed\n"); + log_suspend_abort_reason("%s device failed to power down", + suspend_stats.failed_devs[last_dev]); goto Platform_finish; } error = platform_suspend_prepare_late(state); @@ -326,7 +333,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) error = dpm_suspend_noirq(PMSG_SUSPEND); if (error) { + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; printk(KERN_ERR "PM: noirq suspend of devices failed\n"); + log_suspend_abort_reason("noirq suspend of %s device failed", + suspend_stats.failed_devs[last_dev]); goto Platform_early_resume; } error = platform_suspend_prepare_noirq(state); @@ -350,8 +361,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) } error = disable_nonboot_cpus(); - if (error || suspend_test(TEST_CPUS)) + if (error || suspend_test(TEST_CPUS)) { + log_suspend_abort_reason("Disabling non-boot cpus failed"); goto Enable_cpus; + } arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -366,6 +379,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) trace_suspend_resume(TPS("machine_suspend"), state, false); events_check_enabled = false; + } else if (*wakeup) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); + error = -EBUSY; } syscore_resume(); } @@ -412,6 +430,7 @@ int suspend_devices_and_enter(suspend_state_t state) error = dpm_suspend_start(PMSG_SUSPEND); if (error) { pr_err("PM: Some devices failed to suspend, or early wake event detected\n"); + log_suspend_abort_reason("Some devices failed to suspend, or early wake event detected"); goto Recover_platform; } suspend_test_finish("suspend devices"); @@ -510,6 +529,18 @@ static int enter_state(suspend_state_t state) return error; } +static void pm_suspend_marker(char *annotation) +{ + struct timespec ts; + struct rtc_time tm; + + getnstimeofday(&ts); + rtc_time_to_tm(ts.tv_sec, &tm); + pr_info("PM: suspend %s %d-%02d-%02d %02d:%02d:%02d.%09lu UTC\n", + annotation, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec); +} + /** * pm_suspend - Externally visible function for suspending the system. * @state: System sleep state to enter. @@ -524,6 +555,7 @@ int pm_suspend(suspend_state_t state) if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) return -EINVAL; + pm_suspend_marker("entry"); error = enter_state(state); if (error) { suspend_stats.fail++; @@ -531,6 +563,7 @@ int pm_suspend(suspend_state_t state) } else { suspend_stats.success++; } + pm_suspend_marker("exit"); return error; } EXPORT_SYMBOL(pm_suspend); diff --git a/kernel/power/suspend_time.c b/kernel/power/suspend_time.c new file mode 100644 index 000000000000..d2a65da9f22c --- /dev/null +++ b/kernel/power/suspend_time.c @@ -0,0 +1,111 @@ +/* + * debugfs file to track time spent in suspend + * + * Copyright (c) 2011, Google, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/debugfs.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/seq_file.h> +#include <linux/syscore_ops.h> +#include <linux/time.h> + +static struct timespec suspend_time_before; +static unsigned int time_in_suspend_bins[32]; + +#ifdef CONFIG_DEBUG_FS +static int suspend_time_debug_show(struct seq_file *s, void *data) +{ + int bin; + seq_printf(s, "time (secs) count\n"); + seq_printf(s, "------------------\n"); + for (bin = 0; bin < 32; bin++) { + if (time_in_suspend_bins[bin] == 0) + continue; + seq_printf(s, "%4d - %4d %4u\n", + bin ? 1 << (bin - 1) : 0, 1 << bin, + time_in_suspend_bins[bin]); + } + return 0; +} + +static int suspend_time_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, suspend_time_debug_show, NULL); +} + +static const struct file_operations suspend_time_debug_fops = { + .open = suspend_time_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init suspend_time_debug_init(void) +{ + struct dentry *d; + + d = debugfs_create_file("suspend_time", 0755, NULL, NULL, + &suspend_time_debug_fops); + if (!d) { + pr_err("Failed to create suspend_time debug file\n"); + return -ENOMEM; + } + + return 0; +} + +late_initcall(suspend_time_debug_init); +#endif + +static int suspend_time_syscore_suspend(void) +{ + read_persistent_clock(&suspend_time_before); + + return 0; +} + +static void suspend_time_syscore_resume(void) +{ + struct timespec after; + + read_persistent_clock(&after); + + after = timespec_sub(after, suspend_time_before); + + time_in_suspend_bins[fls(after.tv_sec)]++; + + pr_info("Suspended for %lu.%03lu seconds\n", after.tv_sec, + after.tv_nsec / NSEC_PER_MSEC); +} + +static struct syscore_ops suspend_time_syscore_ops = { + .suspend = suspend_time_syscore_suspend, + .resume = suspend_time_syscore_resume, +}; + +static int suspend_time_syscore_init(void) +{ + register_syscore_ops(&suspend_time_syscore_ops); + + return 0; +} + +static void suspend_time_syscore_exit(void) +{ + unregister_syscore_ops(&suspend_time_syscore_ops); +} +module_init(suspend_time_syscore_init); +module_exit(suspend_time_syscore_exit); diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c new file mode 100644 index 000000000000..252611fad2fe --- /dev/null +++ b/kernel/power/wakeup_reason.c @@ -0,0 +1,225 @@ +/* + * kernel/power/wakeup_reason.c + * + * Logs the reasons which caused the kernel to resume from + * the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/wakeup_reason.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/notifier.h> +#include <linux/suspend.h> + + +#define MAX_WAKEUP_REASON_IRQS 32 +static int irq_list[MAX_WAKEUP_REASON_IRQS]; +static int irqcount; +static bool suspend_abort; +static char abort_reason[MAX_SUSPEND_ABORT_LEN]; +static struct kobject *wakeup_reason; +static DEFINE_SPINLOCK(resume_reason_lock); + +static ktime_t last_monotime; /* monotonic time before last suspend */ +static ktime_t curr_monotime; /* monotonic time after last suspend */ +static ktime_t last_stime; /* monotonic boottime offset before last suspend */ +static ktime_t curr_stime; /* monotonic boottime offset after last suspend */ + +static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + int irq_no, buf_offset = 0; + struct irq_desc *desc; + spin_lock(&resume_reason_lock); + if (suspend_abort) { + buf_offset = sprintf(buf, "Abort: %s", abort_reason); + } else { + for (irq_no = 0; irq_no < irqcount; irq_no++) { + desc = irq_to_desc(irq_list[irq_no]); + if (desc && desc->action && desc->action->name) + buf_offset += sprintf(buf + buf_offset, "%d %s\n", + irq_list[irq_no], desc->action->name); + else + buf_offset += sprintf(buf + buf_offset, "%d\n", + irq_list[irq_no]); + } + } + spin_unlock(&resume_reason_lock); + return buf_offset; +} + +static ssize_t last_suspend_time_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct timespec sleep_time; + struct timespec total_time; + struct timespec suspend_resume_time; + + /* + * total_time is calculated from monotonic bootoffsets because + * unlike CLOCK_MONOTONIC it include the time spent in suspend state. + */ + total_time = ktime_to_timespec(ktime_sub(curr_stime, last_stime)); + + /* + * suspend_resume_time is calculated as monotonic (CLOCK_MONOTONIC) + * time interval before entering suspend and post suspend. + */ + suspend_resume_time = ktime_to_timespec(ktime_sub(curr_monotime, last_monotime)); + + /* sleep_time = total_time - suspend_resume_time */ + sleep_time = timespec_sub(total_time, suspend_resume_time); + + /* Export suspend_resume_time and sleep_time in pair here. */ + return sprintf(buf, "%lu.%09lu %lu.%09lu\n", + suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec, + sleep_time.tv_sec, sleep_time.tv_nsec); +} + +static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); +static struct kobj_attribute suspend_time = __ATTR_RO(last_suspend_time); + +static struct attribute *attrs[] = { + &resume_reason.attr, + &suspend_time.attr, + NULL, +}; +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +/* + * logs all the wake up reasons to the kernel + * stores the irqs to expose them to the userspace via sysfs + */ +void log_wakeup_reason(int irq) +{ + struct irq_desc *desc; + desc = irq_to_desc(irq); + if (desc && desc->action && desc->action->name) + printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq, + desc->action->name); + else + printk(KERN_INFO "Resume caused by IRQ %d\n", irq); + + spin_lock(&resume_reason_lock); + if (irqcount == MAX_WAKEUP_REASON_IRQS) { + spin_unlock(&resume_reason_lock); + printk(KERN_WARNING "Resume caused by more than %d IRQs\n", + MAX_WAKEUP_REASON_IRQS); + return; + } + + irq_list[irqcount++] = irq; + spin_unlock(&resume_reason_lock); +} + +int check_wakeup_reason(int irq) +{ + int irq_no; + int ret = false; + + spin_lock(&resume_reason_lock); + for (irq_no = 0; irq_no < irqcount; irq_no++) + if (irq_list[irq_no] == irq) { + ret = true; + break; + } + spin_unlock(&resume_reason_lock); + return ret; +} + +void log_suspend_abort_reason(const char *fmt, ...) +{ + va_list args; + + spin_lock(&resume_reason_lock); + + //Suspend abort reason has already been logged. + if (suspend_abort) { + spin_unlock(&resume_reason_lock); + return; + } + + suspend_abort = true; + va_start(args, fmt); + vsnprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); + va_end(args); + spin_unlock(&resume_reason_lock); +} + +/* Detects a suspend and clears all the previous wake up reasons*/ +static int wakeup_reason_pm_event(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + switch (pm_event) { + case PM_SUSPEND_PREPARE: + spin_lock(&resume_reason_lock); + irqcount = 0; + suspend_abort = false; + spin_unlock(&resume_reason_lock); + /* monotonic time since boot */ + last_monotime = ktime_get(); + /* monotonic time since boot including the time spent in suspend */ + last_stime = ktime_get_boottime(); + break; + case PM_POST_SUSPEND: + /* monotonic time since boot */ + curr_monotime = ktime_get(); + /* monotonic time since boot including the time spent in suspend */ + curr_stime = ktime_get_boottime(); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block wakeup_reason_pm_notifier_block = { + .notifier_call = wakeup_reason_pm_event, +}; + +/* Initializes the sysfs parameter + * registers the pm_event notifier + */ +int __init wakeup_reason_init(void) +{ + int retval; + + retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); + if (retval) + printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", + __func__, retval); + + wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj); + if (!wakeup_reason) { + printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", + __func__); + return 1; + } + retval = sysfs_create_group(wakeup_reason, &attr_group); + if (retval) { + kobject_put(wakeup_reason); + printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", + __func__, retval); + } + return 0; +} + +late_initcall(wakeup_reason_init); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 3c1aca0c3543..9306d059ed52 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -55,6 +55,10 @@ #include "console_cmdline.h" #include "braille.h" +#ifdef CONFIG_EARLY_PRINTK_DIRECT +extern void printascii(char *); +#endif + int console_printk[4] = { CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ @@ -1705,6 +1709,10 @@ asmlinkage int vprintk_emit(int facility, int level, } } +#ifdef CONFIG_EARLY_PRINTK_DIRECT + printascii(text); +#endif + if (level == LOGLEVEL_DEFAULT) level = default_message_loglevel; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4d870eb6086b..26d9b29fb4cc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7286,6 +7286,14 @@ static inline int preempt_count_equals(int preempt_offset) return (nested == preempt_offset); } +static int __might_sleep_init_called; +int __init __might_sleep_init(void) +{ + __might_sleep_init_called = 1; + return 0; +} +early_initcall(__might_sleep_init); + void __might_sleep(const char *file, int line, int preempt_offset) { /* @@ -7310,8 +7318,10 @@ void ___might_sleep(const char *file, int line, int preempt_offset) rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && - !is_idle_task(current)) || - system_state != SYSTEM_RUNNING || oops_in_progress) + !is_idle_task(current)) || oops_in_progress) + return; + if (system_state != SYSTEM_RUNNING && + (!__might_sleep_init_called || system_state != SYSTEM_BOOTING)) return; if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) return; @@ -8382,6 +8392,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { .fork = cpu_cgroup_fork, .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, + .allow_attach = subsys_cgroup_allow_attach, .exit = cpu_cgroup_exit, .legacy_cftypes = cpu_files, .early_init = 1, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 77690b653ca9..7cadbee6b2a9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2997,6 +2997,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) } trace_sched_stat_blocked(tsk, delta); + trace_sched_blocked_reason(tsk); /* * Blocking time is in units of nanosecs, so shift by diff --git a/kernel/sys.c b/kernel/sys.c index 25ae8d2e65e2..b1a21622633c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -41,6 +41,8 @@ #include <linux/syscore_ops.h> #include <linux/version.h> #include <linux/ctype.h> +#include <linux/mm.h> +#include <linux/mempolicy.h> #include <linux/compat.h> #include <linux/syscalls.h> @@ -2049,10 +2051,158 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) } #endif +#ifdef CONFIG_MMU +static int prctl_update_vma_anon_name(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end, + const char __user *name_addr) +{ + struct mm_struct *mm = vma->vm_mm; + int error = 0; + pgoff_t pgoff; + + if (name_addr == vma_get_anon_name(vma)) { + *prev = vma; + goto out; + } + + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); + *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma, + vma->vm_file, pgoff, vma_policy(vma), + name_addr); + if (*prev) { + vma = *prev; + goto success; + } + + *prev = vma; + + if (start != vma->vm_start) { + error = split_vma(mm, vma, start, 1); + if (error) + goto out; + } + + if (end != vma->vm_end) { + error = split_vma(mm, vma, end, 0); + if (error) + goto out; + } + +success: + if (!vma->vm_file) + vma->anon_name = name_addr; + +out: + if (error == -ENOMEM) + error = -EAGAIN; + return error; +} + +static int prctl_set_vma_anon_name(unsigned long start, unsigned long end, + unsigned long arg) +{ + unsigned long tmp; + struct vm_area_struct *vma, *prev; + int unmapped_error = 0; + int error = -EINVAL; + + /* + * If the interval [start,end) covers some unmapped address + * ranges, just ignore them, but return -ENOMEM at the end. + * - this matches the handling in madvise. + */ + vma = find_vma_prev(current->mm, start, &prev); + if (vma && start > vma->vm_start) + prev = vma; + + for (;;) { + /* Still start < end. */ + error = -ENOMEM; + if (!vma) + return error; + + /* Here start < (end|vma->vm_end). */ + if (start < vma->vm_start) { + unmapped_error = -ENOMEM; + start = vma->vm_start; + if (start >= end) + return error; + } + + /* Here vma->vm_start <= start < (end|vma->vm_end) */ + tmp = vma->vm_end; + if (end < tmp) + tmp = end; + + /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ + error = prctl_update_vma_anon_name(vma, &prev, start, tmp, + (const char __user *)arg); + if (error) + return error; + start = tmp; + if (prev && start < prev->vm_end) + start = prev->vm_end; + error = unmapped_error; + if (start >= end) + return error; + if (prev) + vma = prev->vm_next; + else /* madvise_remove dropped mmap_sem */ + vma = find_vma(current->mm, start); + } +} + +static int prctl_set_vma(unsigned long opt, unsigned long start, + unsigned long len_in, unsigned long arg) +{ + struct mm_struct *mm = current->mm; + int error; + unsigned long len; + unsigned long end; + + if (start & ~PAGE_MASK) + return -EINVAL; + len = (len_in + ~PAGE_MASK) & PAGE_MASK; + + /* Check to see whether len was rounded up from small -ve to zero */ + if (len_in && !len) + return -EINVAL; + + end = start + len; + if (end < start) + return -EINVAL; + + if (end == start) + return 0; + + down_write(&mm->mmap_sem); + + switch (opt) { + case PR_SET_VMA_ANON_NAME: + error = prctl_set_vma_anon_name(start, end, arg); + break; + default: + error = -EINVAL; + } + + up_write(&mm->mmap_sem); + + return error; +} +#else /* CONFIG_MMU */ +static int prctl_set_vma(unsigned long opt, unsigned long start, + unsigned long len_in, unsigned long arg) +{ + return -EINVAL; +} +#endif + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; + struct task_struct *tsk; unsigned char comm[sizeof(me->comm)]; long error; @@ -2195,6 +2345,26 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_TID_ADDRESS: error = prctl_get_tid_address(me, (int __user **)arg2); break; + case PR_SET_TIMERSLACK_PID: + if (task_pid_vnr(current) != (pid_t)arg3 && + !capable(CAP_SYS_NICE)) + return -EPERM; + rcu_read_lock(); + tsk = find_task_by_vpid((pid_t)arg3); + if (tsk == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + get_task_struct(tsk); + rcu_read_unlock(); + if (arg2 <= 0) + tsk->timer_slack_ns = + tsk->default_timer_slack_ns; + else + tsk->timer_slack_ns = arg2; + put_task_struct(tsk); + error = 0; + break; case PR_SET_CHILD_SUBREAPER: me->signal->is_child_subreaper = !!arg2; break; @@ -2243,6 +2413,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_FP_MODE: error = GET_FP_MODE(me); break; + case PR_SET_VMA: + error = prctl_set_vma(arg2, arg3, arg4, arg5); + break; default: error = -EINVAL; break; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c3eee4c6d6c1..21369891ef0f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -103,6 +103,8 @@ extern char core_pattern[]; extern unsigned int core_pipe_limit; #endif extern int pid_max; +extern int extra_free_kbytes; +extern int min_free_order_shift; extern int pid_max_min, pid_max_max; extern int percpu_pagelist_fraction; extern int compat_log; @@ -1353,6 +1355,21 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, }, { + .procname = "extra_free_kbytes", + .data = &extra_free_kbytes, + .maxlen = sizeof(extra_free_kbytes), + .mode = 0644, + .proc_handler = min_free_kbytes_sysctl_handler, + .extra1 = &zero, + }, + { + .procname = "min_free_order_shift", + .data = &min_free_order_shift, + .maxlen = sizeof(min_free_order_shift), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .procname = "percpu_pagelist_fraction", .data = &percpu_pagelist_fraction, .maxlen = sizeof(percpu_pagelist_fraction), @@ -1528,6 +1545,28 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS + { + .procname = "mmap_rnd_bits", + .data = &mmap_rnd_bits, + .maxlen = sizeof(mmap_rnd_bits), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&mmap_rnd_bits_min, + .extra2 = (void *)&mmap_rnd_bits_max, + }, +#endif +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS + { + .procname = "mmap_rnd_compat_bits", + .data = &mmap_rnd_compat_bits, + .maxlen = sizeof(mmap_rnd_compat_bits), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&mmap_rnd_compat_bits_min, + .extra2 = (void *)&mmap_rnd_compat_bits_max, + }, +#endif { } }; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3b9a48ae153a..4b8c9dd3dded 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -77,6 +77,9 @@ config EVENT_TRACING select CONTEXT_SWITCH_TRACER bool +config GPU_TRACEPOINTS + bool + config CONTEXT_SWITCH_TRACER bool diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 9b1044e936a6..ba04bf0c2653 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o +obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/gpu-traces.c b/kernel/trace/gpu-traces.c new file mode 100644 index 000000000000..a4b3f00faee3 --- /dev/null +++ b/kernel/trace/gpu-traces.c @@ -0,0 +1,23 @@ +/* + * GPU tracepoints + * + * Copyright (C) 2013 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/module.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/gpu.h> + +EXPORT_TRACEPOINT_SYMBOL(gpu_sched_switch); +EXPORT_TRACEPOINT_SYMBOL(gpu_job_enqueue); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 05330494a0df..1e421aee5441 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -862,6 +862,7 @@ static const char *trace_options[] = { "irq-info", "markers", "function-trace", + "print-tgid", NULL }; @@ -1330,6 +1331,7 @@ void tracing_reset_all_online_cpus(void) #define SAVED_CMDLINES_DEFAULT 128 #define NO_CMDLINE_MAP UINT_MAX +static unsigned saved_tgids[SAVED_CMDLINES_DEFAULT]; static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; struct saved_cmdlines_buffer { unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; @@ -1568,7 +1570,7 @@ static int trace_save_cmdline(struct task_struct *tsk) } set_cmdline(idx, tsk->comm); - + saved_tgids[idx] = tsk->tgid; arch_spin_unlock(&trace_cmdline_lock); return 1; @@ -1611,6 +1613,25 @@ void trace_find_cmdline(int pid, char comm[]) preempt_enable(); } +int trace_find_tgid(int pid) +{ + unsigned map; + int tgid; + + preempt_disable(); + arch_spin_lock(&trace_cmdline_lock); + map = savedcmd->map_pid_to_cmdline[pid]; + if (map != NO_CMDLINE_MAP) + tgid = saved_tgids[map]; + else + tgid = -1; + + arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); + + return tgid; +} + void tracing_record_cmdline(struct task_struct *tsk) { if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on()) @@ -2586,6 +2607,13 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m) "# | | | | |\n"); } +static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m) +{ + print_event_info(buf, m); + seq_puts(m, "# TASK-PID TGID CPU# TIMESTAMP FUNCTION\n"); + seq_puts(m, "# | | | | | |\n"); +} + static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m) { print_event_info(buf, m); @@ -2598,6 +2626,18 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file "# | | | |||| | |\n"); } +static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m) +{ + print_event_info(buf, m); + seq_puts(m, "# _-----=> irqs-off\n"); + seq_puts(m, "# / _----=> need-resched\n"); + seq_puts(m, "# | / _---=> hardirq/softirq\n"); + seq_puts(m, "# || / _--=> preempt-depth\n"); + seq_puts(m, "# ||| / delay\n"); + seq_puts(m, "# TASK-PID TGID CPU# |||| TIMESTAMP FUNCTION\n"); + seq_puts(m, "# | | | | |||| | |\n"); +} + void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { @@ -2899,9 +2939,15 @@ void trace_default_header(struct seq_file *m) } else { if (!(trace_flags & TRACE_ITER_VERBOSE)) { if (trace_flags & TRACE_ITER_IRQ_INFO) - print_func_help_header_irq(iter->trace_buffer, m); + if (trace_flags & TRACE_ITER_TGID) + print_func_help_header_irq_tgid(iter->trace_buffer, m); + else + print_func_help_header_irq(iter->trace_buffer, m); else - print_func_help_header(iter->trace_buffer, m); + if (trace_flags & TRACE_ITER_TGID) + print_func_help_header_tgid(iter->trace_buffer, m); + else + print_func_help_header(iter->trace_buffer, m); } } } @@ -4123,6 +4169,50 @@ static void trace_insert_enum_map(struct module *mod, } static ssize_t +tracing_saved_tgids_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *file_buf; + char *buf; + int len = 0; + int pid; + int i; + + file_buf = kmalloc(SAVED_CMDLINES_DEFAULT*(16+1+16), GFP_KERNEL); + if (!file_buf) + return -ENOMEM; + + buf = file_buf; + + for (i = 0; i < SAVED_CMDLINES_DEFAULT; i++) { + int tgid; + int r; + + pid = savedcmd->map_cmdline_to_pid[i]; + if (pid == -1 || pid == NO_CMDLINE_MAP) + continue; + + tgid = trace_find_tgid(pid); + r = sprintf(buf, "%d %d\n", pid, tgid); + buf += r; + len += r; + } + + len = simple_read_from_buffer(ubuf, cnt, ppos, + file_buf, len); + + kfree(file_buf); + + return len; +} + +static const struct file_operations tracing_saved_tgids_fops = { + .open = tracing_open_generic, + .read = tracing_saved_tgids_read, + .llseek = generic_file_llseek, +}; + +static ssize_t tracing_set_trace_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -6659,6 +6749,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("trace_marker", 0220, d_tracer, tr, &tracing_mark_fops); + trace_create_file("saved_tgids", 0444, d_tracer, + tr, &tracing_saved_tgids_fops); + trace_create_file("trace_clock", 0644, d_tracer, tr, &trace_clock_fops); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 921691c5cb04..597776f8e639 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -648,6 +648,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags, extern cycle_t ftrace_now(int cpu); extern void trace_find_cmdline(int pid, char comm[]); +extern int trace_find_tgid(int pid); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; @@ -930,6 +931,7 @@ enum trace_iterator_flags { TRACE_ITER_IRQ_INFO = 0x800000, TRACE_ITER_MARKERS = 0x1000000, TRACE_ITER_FUNCTION = 0x2000000, + TRACE_ITER_TGID = 0x4000000, }; /* diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a51e79688455..8dc06b2d4e11 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -64,6 +64,9 @@ struct fgraph_data { #define TRACE_GRAPH_INDENT 2 +/* Flag options */ +#define TRACE_GRAPH_PRINT_FLAT 0x80 + static unsigned int max_depth; static struct tracer_opt trace_opts[] = { @@ -83,6 +86,8 @@ static struct tracer_opt trace_opts[] = { { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) }, /* Display function name after trailing } */ { TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) }, + /* Use standard trace formatting rather than hierarchical */ + { TRACER_OPT(funcgraph-flat, TRACE_GRAPH_PRINT_FLAT) }, { } /* Empty entry */ }; @@ -1160,6 +1165,9 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) int cpu = iter->cpu; int ret; + if (flags & TRACE_GRAPH_PRINT_FLAT) + return TRACE_TYPE_UNHANDLED; + if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) { per_cpu_ptr(data->cpu_data, cpu)->ignore = 0; return TRACE_TYPE_HANDLED; @@ -1217,13 +1225,6 @@ print_graph_function(struct trace_iterator *iter) return print_graph_function_flags(iter, tracer_flags.val); } -static enum print_line_t -print_graph_function_event(struct trace_iterator *iter, int flags, - struct trace_event *event) -{ - return print_graph_function(iter); -} - static void print_lat_header(struct seq_file *s, u32 flags) { static const char spaces[] = " " /* 16 spaces */ @@ -1290,6 +1291,11 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags) { struct trace_iterator *iter = s->private; + if (flags & TRACE_GRAPH_PRINT_FLAT) { + trace_default_header(s); + return; + } + if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) return; @@ -1365,19 +1371,6 @@ func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) return 0; } -static struct trace_event_functions graph_functions = { - .trace = print_graph_function_event, -}; - -static struct trace_event graph_trace_entry_event = { - .type = TRACE_GRAPH_ENT, - .funcs = &graph_functions, -}; - -static struct trace_event graph_trace_ret_event = { - .type = TRACE_GRAPH_RET, - .funcs = &graph_functions -}; static struct tracer graph_trace __tracer_data = { .name = "function_graph", @@ -1454,16 +1447,6 @@ static __init int init_graph_trace(void) { max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); - if (!register_ftrace_event(&graph_trace_entry_event)) { - pr_warning("Warning: could not register graph trace events\n"); - return 1; - } - - if (!register_ftrace_event(&graph_trace_ret_event)) { - pr_warning("Warning: could not register graph trace events\n"); - return 1; - } - return register_tracer(&graph_trace); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 25a086bcb700..d8c8cf2ceb9f 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -566,11 +566,21 @@ int trace_print_context(struct trace_iterator *iter) unsigned long long t; unsigned long secs, usec_rem; char comm[TASK_COMM_LEN]; + int tgid; trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%16s-%-5d [%03d] ", - comm, entry->pid, iter->cpu); + trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + + if (trace_flags & TRACE_ITER_TGID) { + tgid = trace_find_tgid(entry->pid); + if (tgid < 0) + trace_seq_puts(s, "(-----) "); + else + trace_seq_printf(s, "(%5d) ", tgid); + } + + trace_seq_printf(s, "[%03d] ", iter->cpu); if (trace_flags & TRACE_ITER_IRQ_INFO) trace_print_lat_fmt(s, entry); @@ -884,6 +894,174 @@ static struct trace_event trace_fn_event = { .funcs = &trace_fn_funcs, }; +/* TRACE_GRAPH_ENT */ +static enum print_line_t trace_graph_ent_trace(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct trace_seq *s = &iter->seq; + struct ftrace_graph_ent_entry *field; + + trace_assign_type(field, iter->ent); + + trace_seq_puts(s, "graph_ent: func="); + if (trace_seq_has_overflowed(s)) + return TRACE_TYPE_PARTIAL_LINE; + + if (!seq_print_ip_sym(s, field->graph_ent.func, flags)) + return TRACE_TYPE_PARTIAL_LINE; + + trace_seq_puts(s, "\n"); + if (trace_seq_has_overflowed(s)) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ent_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ent_entry *field; + + trace_assign_type(field, iter->ent); + + trace_seq_printf(&iter->seq, "%lx %d\n", + field->graph_ent.func, + field->graph_ent.depth); + if (trace_seq_has_overflowed(&iter->seq)) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ent_hex(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ent_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_HEX_FIELD(s, field->graph_ent.func); + SEQ_PUT_HEX_FIELD(s, field->graph_ent.depth); + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ent_bin(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ent_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_FIELD(s, field->graph_ent.func); + SEQ_PUT_FIELD(s, field->graph_ent.depth); + + return TRACE_TYPE_HANDLED; +} + +static struct trace_event_functions trace_graph_ent_funcs = { + .trace = trace_graph_ent_trace, + .raw = trace_graph_ent_raw, + .hex = trace_graph_ent_hex, + .binary = trace_graph_ent_bin, +}; + +static struct trace_event trace_graph_ent_event = { + .type = TRACE_GRAPH_ENT, + .funcs = &trace_graph_ent_funcs, +}; + +/* TRACE_GRAPH_RET */ +static enum print_line_t trace_graph_ret_trace(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct ftrace_graph_ret_entry *field; + + trace_assign_type(field, entry); + + trace_seq_puts(s, "graph_ret: func="); + if (trace_seq_has_overflowed(s)) + return TRACE_TYPE_PARTIAL_LINE; + + if (!seq_print_ip_sym(s, field->ret.func, flags)) + return TRACE_TYPE_PARTIAL_LINE; + + trace_seq_puts(s, "\n"); + if (trace_seq_has_overflowed(s)) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ret_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ret_entry *field; + + trace_assign_type(field, iter->ent); + + trace_seq_printf(&iter->seq, "%lx %lld %lld %ld %d\n", + field->ret.func, + field->ret.calltime, + field->ret.rettime, + field->ret.overrun, + field->ret.depth); + if (trace_seq_has_overflowed(&iter->seq)) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ret_hex(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ret_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_HEX_FIELD(s, field->ret.func); + SEQ_PUT_HEX_FIELD(s, field->ret.calltime); + SEQ_PUT_HEX_FIELD(s, field->ret.rettime); + SEQ_PUT_HEX_FIELD(s, field->ret.overrun); + SEQ_PUT_HEX_FIELD(s, field->ret.depth); + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ret_bin(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ret_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_FIELD(s, field->ret.func); + SEQ_PUT_FIELD(s, field->ret.calltime); + SEQ_PUT_FIELD(s, field->ret.rettime); + SEQ_PUT_FIELD(s, field->ret.overrun); + SEQ_PUT_FIELD(s, field->ret.depth); + + return TRACE_TYPE_HANDLED; +} + +static struct trace_event_functions trace_graph_ret_funcs = { + .trace = trace_graph_ret_trace, + .raw = trace_graph_ret_raw, + .hex = trace_graph_ret_hex, + .binary = trace_graph_ret_bin, +}; + +static struct trace_event trace_graph_ret_event = { + .type = TRACE_GRAPH_RET, + .funcs = &trace_graph_ret_funcs, +}; + /* TRACE_CTX an TRACE_WAKE */ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, char *delim) @@ -1225,6 +1403,8 @@ static struct trace_event trace_print_event = { static struct trace_event *events[] __initdata = { &trace_fn_event, + &trace_graph_ent_event, + &trace_graph_ret_event, &trace_ctx_event, &trace_wake_event, &trace_stack_event, diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 581a68a04c64..21a61f836736 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -74,6 +74,11 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); +#endif +#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU +static cpumask_t __read_mostly watchdog_cpus; +#endif +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif static unsigned long soft_lockup_nmi_warn; @@ -234,7 +239,7 @@ void touch_softlockup_watchdog_sync(void) __this_cpu_write(watchdog_touch_ts, 0); } -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI /* watchdog detector functions */ static int is_hardlockup(void) { @@ -248,6 +253,76 @@ static int is_hardlockup(void) } #endif +#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU +static unsigned int watchdog_next_cpu(unsigned int cpu) +{ + cpumask_t cpus = watchdog_cpus; + unsigned int next_cpu; + + next_cpu = cpumask_next(cpu, &cpus); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(&cpus); + + if (next_cpu == cpu) + return nr_cpu_ids; + + return next_cpu; +} + +static int is_hardlockup_other_cpu(unsigned int cpu) +{ + unsigned long hrint = per_cpu(hrtimer_interrupts, cpu); + + if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) + return 1; + + per_cpu(hrtimer_interrupts_saved, cpu) = hrint; + return 0; +} + +static void watchdog_check_hardlockup_other_cpu(void) +{ + unsigned int next_cpu; + + /* + * Test for hardlockups every 3 samples. The sample period is + * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over + * watchdog_thresh (over by 20%). + */ + if (__this_cpu_read(hrtimer_interrupts) % 3 != 0) + return; + + /* check for a hardlockup on the next cpu */ + next_cpu = watchdog_next_cpu(smp_processor_id()); + if (next_cpu >= nr_cpu_ids) + return; + + smp_rmb(); + + if (per_cpu(watchdog_nmi_touch, next_cpu) == true) { + per_cpu(watchdog_nmi_touch, next_cpu) = false; + return; + } + + if (is_hardlockup_other_cpu(next_cpu)) { + /* only warn once */ + if (per_cpu(hard_watchdog_warn, next_cpu) == true) + return; + + if (hardlockup_panic) + panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu); + else + WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu); + + per_cpu(hard_watchdog_warn, next_cpu) = true; + } else { + per_cpu(hard_watchdog_warn, next_cpu) = false; + } +} +#else +static inline void watchdog_check_hardlockup_other_cpu(void) { return; } +#endif + static int is_softlockup(unsigned long touch_ts) { unsigned long now = get_timestamp(); @@ -260,7 +335,7 @@ static int is_softlockup(unsigned long touch_ts) return 0; } -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, @@ -310,7 +385,7 @@ static void watchdog_overflow_callback(struct perf_event *event, __this_cpu_write(hard_watchdog_warn, false); return; } -#endif /* CONFIG_HARDLOCKUP_DETECTOR */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */ static void watchdog_interrupt_count(void) { @@ -331,6 +406,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* kick the hardlockup detector */ watchdog_interrupt_count(); + /* test for hardlockups on the next cpu */ + watchdog_check_hardlockup_other_cpu(); + /* kick the softlockup detector */ wake_up_process(__this_cpu_read(softlockup_watchdog)); @@ -508,7 +586,7 @@ static void watchdog(unsigned int cpu) watchdog_nmi_disable(cpu); } -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI /* * People like the simple clean cpu node info on boot. * Reduce the watchdog noise by only printing messages @@ -642,11 +720,46 @@ unlock: mutex_unlock(&watchdog_proc_mutex); } #else +#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU +static int watchdog_nmi_enable(unsigned int cpu) +{ + /* + * The new cpu will be marked online before the first hrtimer interrupt + * runs on it. If another cpu tests for a hardlockup on the new cpu + * before it has run its first hrtimer, it will get a false positive. + * Touch the watchdog on the new cpu to delay the first check for at + * least 3 sampling periods to guarantee one hrtimer has run on the new + * cpu. + */ + per_cpu(watchdog_nmi_touch, cpu) = true; + smp_wmb(); + cpumask_set_cpu(cpu, &watchdog_cpus); + return 0; +} + +static void watchdog_nmi_disable(unsigned int cpu) +{ + unsigned int next_cpu = watchdog_next_cpu(cpu); + + /* + * Offlining this cpu will cause the cpu before this one to start + * checking the one after this one. If this cpu just finished checking + * the next cpu and updating hrtimer_interrupts_saved, and then the + * previous cpu checks it within one sample period, it will trigger a + * false positive. Touch the watchdog on the next cpu to prevent it. + */ + if (next_cpu < nr_cpu_ids) + per_cpu(watchdog_nmi_touch, next_cpu) = true; + smp_wmb(); + cpumask_clear_cpu(cpu, &watchdog_cpus); +} +#else static int watchdog_nmi_enable(unsigned int cpu) { return 0; } static void watchdog_nmi_disable(unsigned int cpu) { return; } void watchdog_nmi_enable_all(void) {} void watchdog_nmi_disable_all(void) {} -#endif /* CONFIG_HARDLOCKUP_DETECTOR */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */ static struct smp_hotplug_thread watchdog_threads = { .store = &softlockup_watchdog, |