aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c33
-rw-r--r--kernel/cpu.c20
-rw-r--r--kernel/debug/debug_core.c12
-rw-r--r--kernel/debug/kdb/kdb_io.c12
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c19
-rw-r--r--kernel/freezer.c12
-rw-r--r--kernel/futex.c3
-rw-r--r--kernel/hrtimer.c3
-rw-r--r--kernel/irq/pm.c12
-rw-r--r--kernel/lockdep.c17
-rw-r--r--kernel/panic.c13
-rw-r--r--kernel/power/Kconfig15
-rw-r--r--kernel/power/Makefile1
-rw-r--r--kernel/power/process.c26
-rw-r--r--kernel/power/suspend.c15
-rw-r--r--kernel/power/suspend_time.c111
-rw-r--r--kernel/power/wakelock.c7
-rw-r--r--kernel/sched/core.c31
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sys.c152
-rw-r--r--kernel/sysctl.c17
-rw-r--r--kernel/time/alarmtimer.c39
-rw-r--r--kernel/trace/Kconfig3
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/gpu-traces.c23
-rw-r--r--kernel/trace/trace.c104
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_functions_graph.c43
-rw-r--r--kernel/trace/trace_output.c182
-rw-r--r--kernel/watchdog.c123
31 files changed, 966 insertions, 89 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d0def7fc284..cd1c303214f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2106,6 +2106,24 @@ out_free_group_list:
return retval;
}
+static int cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+ struct cgroup_subsys *ss;
+ int ret;
+
+ for_each_subsys(cgrp->root, ss) {
+ if (ss->allow_attach) {
+ ret = ss->allow_attach(cgrp, tset);
+ if (ret)
+ return ret;
+ } else {
+ return -EACCES;
+ }
+ }
+
+ return 0;
+}
+
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
@@ -2137,9 +2155,18 @@ retry_find_task:
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid)) {
- rcu_read_unlock();
- ret = -EACCES;
- goto out_unlock_cgroup;
+ /*
+ * if the default permission check fails, give each
+ * cgroup a chance to extend the permission check
+ */
+ struct cgroup_taskset tset = { };
+ tset.single.task = tsk;
+ tset.single.cgrp = cgrp;
+ ret = cgroup_allow_attach(cgrp, &tset);
+ if (ret) {
+ rcu_read_unlock();
+ goto out_unlock_cgroup;
+ }
}
} else
tsk = current;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 198a38883e6..f2d0575f9a3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -726,3 +726,23 @@ void init_cpu_online(const struct cpumask *src)
{
cpumask_copy(to_cpumask(cpu_online_bits), src);
}
+
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+void idle_notifier_register(struct notifier_block *n)
+{
+ atomic_notifier_chain_register(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+ atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_unregister);
+
+void idle_notifier_call_chain(unsigned long val)
+{
+ atomic_notifier_call_chain(&idle_notifier, val, NULL);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_call_chain);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0506d447aed..2d4438b14b4 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -86,6 +86,10 @@ static int kgdb_use_con;
bool dbg_is_early = true;
/* Next cpu to become the master debug core */
int dbg_switch_cpu;
+/* Flag for entering kdb when a panic occurs */
+static bool break_on_panic = true;
+/* Flag for entering kdb when an exception occurs */
+static bool break_on_exception = true;
/* Use kdb or gdbserver mode */
int dbg_kdb_mode = 1;
@@ -100,6 +104,8 @@ early_param("kgdbcon", opt_kgdb_con);
module_param(kgdb_use_con, int, 0644);
module_param(kgdbreboot, int, 0644);
+module_param(break_on_panic, bool, 0644);
+module_param(break_on_exception, bool, 0644);
/*
* Holds information about breakpoints in a kernel. These breakpoints are
@@ -678,6 +684,9 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
if (arch_kgdb_ops.enable_nmi)
arch_kgdb_ops.enable_nmi(0);
+ if (unlikely(signo != SIGTRAP && !break_on_exception))
+ return 1;
+
ks->cpu = raw_smp_processor_id();
ks->ex_vector = evector;
ks->signo = signo;
@@ -784,6 +793,9 @@ static int kgdb_panic_event(struct notifier_block *self,
unsigned long val,
void *data)
{
+ if (!break_on_panic)
+ return NOTIFY_DONE;
+
if (dbg_kdb_mode)
kdb_printf("PANIC: %s\n", (char *)data);
kgdb_breakpoint();
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 14ff4849262..4b0fb2fb779 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize)
int i;
int diag, dtab_count;
int key;
-
+ static int last_crlf;
diag = kdbgetintenv("DTABCOUNT", &dtab_count);
if (diag)
@@ -237,6 +237,9 @@ poll_again:
return buffer;
if (key != 9)
tab = 0;
+ if (key != 10 && key != 13)
+ last_crlf = 0;
+
switch (key) {
case 8: /* backspace */
if (cp > buffer) {
@@ -254,7 +257,12 @@ poll_again:
*cp = tmp;
}
break;
- case 13: /* enter */
+ case 10: /* new line */
+ case 13: /* carriage return */
+ /* handle \n after \r */
+ if (last_crlf && last_crlf != key)
+ break;
+ last_crlf = key;
*lastchar++ = '\n';
*lastchar++ = '\0';
if (!KDB_STATE(KGDB_TRANS)) {
diff --git a/kernel/exit.c b/kernel/exit.c
index 7bb73f9d09d..6a057750ebb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -835,7 +835,7 @@ void do_exit(long code)
/*
* Make sure we are holding no locks:
*/
- debug_check_no_locks_held(tsk);
+ debug_check_no_locks_held();
/*
* We can do this unlocked here. The futex code uses this flag
* just to verify whether the pi state cleanup has been done
diff --git a/kernel/fork.c b/kernel/fork.c
index ff7be9dac4c..0aa1bb5c8d6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -198,6 +198,9 @@ struct kmem_cache *vm_area_cachep;
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;
+/* Notifier list called when a task struct is freed */
+static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
+
static void account_kernel_stack(struct thread_info *ti, int account)
{
struct zone *zone = page_zone(virt_to_page(ti));
@@ -231,6 +234,18 @@ static inline void put_signal_struct(struct signal_struct *sig)
free_signal_struct(sig);
}
+int task_free_register(struct notifier_block *n)
+{
+ return atomic_notifier_chain_register(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_register);
+
+int task_free_unregister(struct notifier_block *n)
+{
+ return atomic_notifier_chain_unregister(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_unregister);
+
void __put_task_struct(struct task_struct *tsk)
{
WARN_ON(!tsk->exit_state);
@@ -242,6 +257,7 @@ void __put_task_struct(struct task_struct *tsk)
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
+ atomic_notifier_call_chain(&task_free_notifier, 0, tsk);
if (!profile_handoff_task(tsk))
free_task(tsk);
}
@@ -697,7 +713,8 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
mm = get_task_mm(task);
if (mm && mm != current->mm &&
- !ptrace_may_access(task, mode)) {
+ !ptrace_may_access(task, mode) &&
+ !capable(CAP_SYS_RESOURCE)) {
mmput(mm);
mm = ERR_PTR(-EACCES);
}
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 78758512b1e..5420f635111 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -116,6 +116,18 @@ bool freeze_task(struct task_struct *p)
{
unsigned long flags;
+ /*
+ * This check can race with freezer_do_not_count, but worst case that
+ * will result in an extra wakeup being sent to the task. It does not
+ * race with freezer_count(), the barriers in freezer_count() and
+ * freezer_should_skip() ensure that either freezer_count() sees
+ * freezing == true in try_to_freeze() and freezes, or
+ * freezer_should_skip() sees !PF_FREEZE_SKIP and freezes the task
+ * normally.
+ */
+ if (freezer_should_skip(p))
+ return false;
+
spin_lock_irqsave(&freezer_lock, flags);
if (!freezing(p) || frozen(p)) {
spin_unlock_irqrestore(&freezer_lock, flags);
diff --git a/kernel/futex.c b/kernel/futex.c
index a283b304107..221a58fc62f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -62,6 +62,7 @@
#include <linux/ptrace.h>
#include <linux/sched/rt.h>
#include <linux/hugetlb.h>
+#include <linux/freezer.h>
#include <asm/futex.h>
@@ -1808,7 +1809,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
* is no timeout, or if it has yet to expire.
*/
if (!timeout || timeout->task)
- schedule();
+ freezable_schedule();
}
__set_current_state(TASK_RUNNING);
}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2288fbdada1..e268e269a69 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -47,6 +47,7 @@
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
#include <linux/timer.h>
+#include <linux/freezer.h>
#include <asm/uaccess.h>
@@ -1543,7 +1544,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
t->task = NULL;
if (likely(t->task))
- schedule();
+ freezable_schedule();
hrtimer_cancel(&t->timer);
mode = HRTIMER_MODE_ABS;
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index abcd6ca86cb..c72b7a43beb 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -103,14 +103,14 @@ int check_wakeup_irqs(void)
int irq;
for_each_irq_desc(irq, desc) {
- /*
- * Only interrupts which are marked as wakeup source
- * and have not been disabled before the suspend check
- * can abort suspend.
- */
if (irqd_is_wakeup_set(&desc->irq_data)) {
- if (desc->depth == 1 && desc->istate & IRQS_PENDING)
+ if (desc->istate & IRQS_PENDING) {
+ pr_info("Wakeup IRQ %d %s pending, suspend aborted\n",
+ irq,
+ desc->action && desc->action->name ?
+ desc->action->name : "");
return -EBUSY;
+ }
continue;
}
/*
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 1f3186b37fd..e16c45b9ee7 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -4090,7 +4090,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
}
EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
-static void print_held_locks_bug(struct task_struct *curr)
+static void print_held_locks_bug(void)
{
if (!debug_locks_off())
return;
@@ -4099,22 +4099,21 @@ static void print_held_locks_bug(struct task_struct *curr)
printk("\n");
printk("=====================================\n");
- printk("[ BUG: lock held at task exit time! ]\n");
+ printk("[ BUG: %s/%d still has locks held! ]\n",
+ current->comm, task_pid_nr(current));
print_kernel_ident();
printk("-------------------------------------\n");
- printk("%s/%d is exiting with locks still held!\n",
- curr->comm, task_pid_nr(curr));
- lockdep_print_held_locks(curr);
-
+ lockdep_print_held_locks(current);
printk("\nstack backtrace:\n");
dump_stack();
}
-void debug_check_no_locks_held(struct task_struct *task)
+void debug_check_no_locks_held(void)
{
- if (unlikely(task->lockdep_depth > 0))
- print_held_locks_bug(task);
+ if (unlikely(current->lockdep_depth > 0))
+ print_held_locks_bug();
}
+EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
void debug_show_all_locks(void)
{
diff --git a/kernel/panic.c b/kernel/panic.c
index 167ec097ce8..126b2ef2eb6 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -26,13 +26,19 @@
#define PANIC_TIMER_STEP 100
#define PANIC_BLINK_SPD 18
+/* Machine specific panic information string */
+char *mach_panic_string;
+
int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
static unsigned long tainted_mask;
static int pause_on_oops;
static int pause_on_oops_flag;
static DEFINE_SPINLOCK(pause_on_oops_lock);
-int panic_timeout;
+#ifndef CONFIG_PANIC_TIMEOUT
+#define CONFIG_PANIC_TIMEOUT 0
+#endif
+int panic_timeout = CONFIG_PANIC_TIMEOUT;
EXPORT_SYMBOL_GPL(panic_timeout);
ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
@@ -375,6 +381,11 @@ late_initcall(init_oops_id);
void print_oops_end_marker(void)
{
init_oops_id();
+
+ if (mach_panic_string)
+ printk(KERN_WARNING "Board Information: %s\n",
+ mach_panic_string);
+
printk(KERN_WARNING "---[ end trace %016llx ]---\n",
(unsigned long long)oops_id);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 46455961a88..7a297aeeca9 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -18,6 +18,14 @@ config SUSPEND_FREEZER
Turning OFF this setting is NOT recommended! If in doubt, say Y.
+config HAS_WAKELOCK
+ bool
+ default y
+
+config WAKELOCK
+ bool
+ default y
+
config HIBERNATE_CALLBACKS
bool
@@ -294,3 +302,10 @@ config PM_GENERIC_DOMAINS_RUNTIME
config CPU_PM
bool
depends on SUSPEND || CPU_IDLE
+
+config SUSPEND_TIME
+ bool "Log time spent in suspend"
+ ---help---
+ Prints the time spent in suspend in the kernel log, and
+ keeps statistics on the time spent in suspend in
+ /sys/kernel/debug/suspend_time
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bff11e..8450b85d33c 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -11,5 +11,6 @@ obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
block_io.o
obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o
obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o
+obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 98088e0e71e..fc0df848644 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -30,9 +30,10 @@ static int try_to_freeze_tasks(bool user_only)
unsigned int todo;
bool wq_busy = false;
struct timeval start, end;
- u64 elapsed_csecs64;
- unsigned int elapsed_csecs;
+ u64 elapsed_msecs64;
+ unsigned int elapsed_msecs;
bool wakeup = false;
+ int sleep_usecs = USEC_PER_MSEC;
do_gettimeofday(&start);
@@ -68,22 +69,25 @@ static int try_to_freeze_tasks(bool user_only)
/*
* We need to retry, but first give the freezing tasks some
- * time to enter the refrigerator.
+ * time to enter the refrigerator. Start with an initial
+ * 1 ms sleep followed by exponential backoff until 8 ms.
*/
- msleep(10);
+ usleep_range(sleep_usecs / 2, sleep_usecs);
+ if (sleep_usecs < 8 * USEC_PER_MSEC)
+ sleep_usecs *= 2;
}
do_gettimeofday(&end);
- elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start);
- do_div(elapsed_csecs64, NSEC_PER_SEC / 100);
- elapsed_csecs = elapsed_csecs64;
+ elapsed_msecs64 = timeval_to_ns(&end) - timeval_to_ns(&start);
+ do_div(elapsed_msecs64, NSEC_PER_MSEC);
+ elapsed_msecs = elapsed_msecs64;
if (todo) {
printk("\n");
- printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "
+ printk(KERN_ERR "Freezing of tasks %s after %d.%03d seconds "
"(%d tasks refusing to freeze, wq_busy=%d):\n",
wakeup ? "aborted" : "failed",
- elapsed_csecs / 100, elapsed_csecs % 100,
+ elapsed_msecs / 1000, elapsed_msecs % 1000,
todo - wq_busy, wq_busy);
if (!wakeup) {
@@ -96,8 +100,8 @@ static int try_to_freeze_tasks(bool user_only)
read_unlock(&tasklist_lock);
}
} else {
- printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100,
- elapsed_csecs % 100);
+ printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000,
+ elapsed_msecs % 1000);
}
return todo ? -EBUSY : 0;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index bef86d121eb..454568e6c8d 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -25,6 +25,7 @@
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/ftrace.h>
+#include <linux/rtc.h>
#include <trace/events/power.h>
#include "power.h"
@@ -358,6 +359,18 @@ static int enter_state(suspend_state_t state)
return error;
}
+static void pm_suspend_marker(char *annotation)
+{
+ struct timespec ts;
+ struct rtc_time tm;
+
+ getnstimeofday(&ts);
+ rtc_time_to_tm(ts.tv_sec, &tm);
+ pr_info("PM: suspend %s %d-%02d-%02d %02d:%02d:%02d.%09lu UTC\n",
+ annotation, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec);
+}
+
/**
* pm_suspend - Externally visible function for suspending the system.
* @state: System sleep state to enter.
@@ -372,6 +385,7 @@ int pm_suspend(suspend_state_t state)
if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
return -EINVAL;
+ pm_suspend_marker("entry");
error = enter_state(state);
if (error) {
suspend_stats.fail++;
@@ -379,6 +393,7 @@ int pm_suspend(suspend_state_t state)
} else {
suspend_stats.success++;
}
+ pm_suspend_marker("exit");
return error;
}
EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/suspend_time.c b/kernel/power/suspend_time.c
new file mode 100644
index 00000000000..d2a65da9f22
--- /dev/null
+++ b/kernel/power/suspend_time.c
@@ -0,0 +1,111 @@
+/*
+ * debugfs file to track time spent in suspend
+ *
+ * Copyright (c) 2011, Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/syscore_ops.h>
+#include <linux/time.h>
+
+static struct timespec suspend_time_before;
+static unsigned int time_in_suspend_bins[32];
+
+#ifdef CONFIG_DEBUG_FS
+static int suspend_time_debug_show(struct seq_file *s, void *data)
+{
+ int bin;
+ seq_printf(s, "time (secs) count\n");
+ seq_printf(s, "------------------\n");
+ for (bin = 0; bin < 32; bin++) {
+ if (time_in_suspend_bins[bin] == 0)
+ continue;
+ seq_printf(s, "%4d - %4d %4u\n",
+ bin ? 1 << (bin - 1) : 0, 1 << bin,
+ time_in_suspend_bins[bin]);
+ }
+ return 0;
+}
+
+static int suspend_time_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, suspend_time_debug_show, NULL);
+}
+
+static const struct file_operations suspend_time_debug_fops = {
+ .open = suspend_time_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init suspend_time_debug_init(void)
+{
+ struct dentry *d;
+
+ d = debugfs_create_file("suspend_time", 0755, NULL, NULL,
+ &suspend_time_debug_fops);
+ if (!d) {
+ pr_err("Failed to create suspend_time debug file\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+late_initcall(suspend_time_debug_init);
+#endif
+
+static int suspend_time_syscore_suspend(void)
+{
+ read_persistent_clock(&suspend_time_before);
+
+ return 0;
+}
+
+static void suspend_time_syscore_resume(void)
+{
+ struct timespec after;
+
+ read_persistent_clock(&after);
+
+ after = timespec_sub(after, suspend_time_before);
+
+ time_in_suspend_bins[fls(after.tv_sec)]++;
+
+ pr_info("Suspended for %lu.%03lu seconds\n", after.tv_sec,
+ after.tv_nsec / NSEC_PER_MSEC);
+}
+
+static struct syscore_ops suspend_time_syscore_ops = {
+ .suspend = suspend_time_syscore_suspend,
+ .resume = suspend_time_syscore_resume,
+};
+
+static int suspend_time_syscore_init(void)
+{
+ register_syscore_ops(&suspend_time_syscore_ops);
+
+ return 0;
+}
+
+static void suspend_time_syscore_exit(void)
+{
+ unregister_syscore_ops(&suspend_time_syscore_ops);
+}
+module_init(suspend_time_syscore_init);
+module_exit(suspend_time_syscore_exit);
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
index 8f50de394d2..c8fba338007 100644
--- a/kernel/power/wakelock.c
+++ b/kernel/power/wakelock.c
@@ -9,7 +9,6 @@
* manipulate wakelocks on Android.
*/
-#include <linux/capability.h>
#include <linux/ctype.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -189,9 +188,6 @@ int pm_wake_lock(const char *buf)
size_t len;
int ret = 0;
- if (!capable(CAP_BLOCK_SUSPEND))
- return -EPERM;
-
while (*str && !isspace(*str))
str++;
@@ -235,9 +231,6 @@ int pm_wake_unlock(const char *buf)
size_t len;
int ret = 0;
- if (!capable(CAP_BLOCK_SUSPEND))
- return -EPERM;
-
len = strlen(buf);
if (!len)
return -EINVAL;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 277e3557d0e..de9d360a65c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7130,13 +7130,24 @@ static inline int preempt_count_equals(int preempt_offset)
return (nested == preempt_offset);
}
+static int __might_sleep_init_called;
+int __init __might_sleep_init(void)
+{
+ __might_sleep_init_called = 1;
+ return 0;
+}
+early_initcall(__might_sleep_init);
+
void __might_sleep(const char *file, int line, int preempt_offset)
{
static unsigned long prev_jiffy; /* ratelimiting */
rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
- system_state != SYSTEM_RUNNING || oops_in_progress)
+ oops_in_progress)
+ return;
+ if (system_state != SYSTEM_RUNNING &&
+ (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
return;
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
return;
@@ -7742,6 +7753,23 @@ static void cpu_cgroup_css_offline(struct cgroup *cgrp)
sched_offline_group(tg);
}
+static int
+cpu_cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+ const struct cred *cred = current_cred(), *tcred;
+ struct task_struct *task;
+
+ cgroup_taskset_for_each(task, cgrp, tset) {
+ tcred = __task_cred(task);
+
+ if ((current != task) && !capable(CAP_SYS_NICE) &&
+ cred->euid != tcred->uid && cred->euid != tcred->suid)
+ return -EACCES;
+ }
+
+ return 0;
+}
+
static int cpu_cgroup_can_attach(struct cgroup *cgrp,
struct cgroup_taskset *tset)
{
@@ -8108,6 +8136,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
.css_offline = cpu_cgroup_css_offline,
.can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach,
+ .allow_attach = cpu_cgroup_allow_attach,
.exit = cpu_cgroup_exit,
.subsys_id = cpu_cgroup_subsys_id,
.base_cftypes = cpu_files,
diff --git a/kernel/signal.c b/kernel/signal.c
index 113411bfe8b..50e41075ac7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2848,7 +2848,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
recalc_sigpending();
spin_unlock_irq(&tsk->sighand->siglock);
- timeout = schedule_timeout_interruptible(timeout);
+ timeout = freezable_schedule_timeout_interruptible(timeout);
spin_lock_irq(&tsk->sighand->siglock);
__set_task_blocked(tsk, &tsk->real_blocked);
diff --git a/kernel/sys.c b/kernel/sys.c
index 2bbd9a73b54..126b7c939d1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -42,6 +42,8 @@
#include <linux/syscore_ops.h>
#include <linux/version.h>
#include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/mempolicy.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -2099,6 +2101,153 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
}
#endif
+#ifdef CONFIG_MMU
+static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end,
+ const char __user *name_addr)
+{
+ struct mm_struct * mm = vma->vm_mm;
+ int error = 0;
+ pgoff_t pgoff;
+
+ if (name_addr == vma_get_anon_name(vma)) {
+ *prev = vma;
+ goto out;
+ }
+
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
+ vma->vm_file, pgoff, vma_policy(vma),
+ name_addr);
+ if (*prev) {
+ vma = *prev;
+ goto success;
+ }
+
+ *prev = vma;
+
+ if (start != vma->vm_start) {
+ error = split_vma(mm, vma, start, 1);
+ if (error)
+ goto out;
+ }
+
+ if (end != vma->vm_end) {
+ error = split_vma(mm, vma, end, 0);
+ if (error)
+ goto out;
+ }
+
+success:
+ if (!vma->vm_file)
+ vma->shared.anon_name = name_addr;
+
+out:
+ if (error == -ENOMEM)
+ error = -EAGAIN;
+ return error;
+}
+
+static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
+ unsigned long arg)
+{
+ unsigned long tmp;
+ struct vm_area_struct * vma, *prev;
+ int unmapped_error = 0;
+ int error = -EINVAL;
+
+ /*
+ * If the interval [start,end) covers some unmapped address
+ * ranges, just ignore them, but return -ENOMEM at the end.
+ * - this matches the handling in madvise.
+ */
+ vma = find_vma_prev(current->mm, start, &prev);
+ if (vma && start > vma->vm_start)
+ prev = vma;
+
+ for (;;) {
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ return error;
+
+ /* Here start < (end|vma->vm_end). */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ if (start >= end)
+ return error;
+ }
+
+ /* Here vma->vm_start <= start < (end|vma->vm_end) */
+ tmp = vma->vm_end;
+ if (end < tmp)
+ tmp = end;
+
+ /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
+ error = prctl_update_vma_anon_name(vma, &prev, start, end,
+ (const char __user *)arg);
+ if (error)
+ return error;
+ start = tmp;
+ if (prev && start < prev->vm_end)
+ start = prev->vm_end;
+ error = unmapped_error;
+ if (start >= end)
+ return error;
+ if (prev)
+ vma = prev->vm_next;
+ else /* madvise_remove dropped mmap_sem */
+ vma = find_vma(current->mm, start);
+ }
+}
+
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+ unsigned long len_in, unsigned long arg)
+{
+ struct mm_struct *mm = current->mm;
+ int error;
+ unsigned long len;
+ unsigned long end;
+
+ if (start & ~PAGE_MASK)
+ return -EINVAL;
+ len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+ /* Check to see whether len was rounded up from small -ve to zero */
+ if (len_in && !len)
+ return -EINVAL;
+
+ end = start + len;
+ if (end < start)
+ return -EINVAL;
+
+ if (end == start)
+ return 0;
+
+ down_write(&mm->mmap_sem);
+
+ switch (opt) {
+ case PR_SET_VMA_ANON_NAME:
+ error = prctl_set_vma_anon_name(start, end, arg);
+ break;
+ default:
+ error = -EINVAL;
+ }
+
+ up_write(&mm->mmap_sem);
+
+ return error;
+}
+#else /* CONFIG_MMU */
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+ unsigned long len_in, unsigned long arg)
+{
+ return -EINVAL;
+}
+#endif
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -2262,6 +2411,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
return current->no_new_privs ? 1 : 0;
+ case PR_SET_VMA:
+ error = prctl_set_vma(arg2, arg3, arg4, arg5);
+ break;
default:
error = -EINVAL;
break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9edcf456e0f..6f562baf5f4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -105,6 +105,8 @@ extern char core_pattern[];
extern unsigned int core_pipe_limit;
#endif
extern int pid_max;
+extern int extra_free_kbytes;
+extern int min_free_order_shift;
extern int pid_max_min, pid_max_max;
extern int percpu_pagelist_fraction;
extern int compat_log;
@@ -1266,6 +1268,21 @@ static struct ctl_table vm_table[] = {
.extra1 = &zero,
},
{
+ .procname = "extra_free_kbytes",
+ .data = &extra_free_kbytes,
+ .maxlen = sizeof(extra_free_kbytes),
+ .mode = 0644,
+ .proc_handler = min_free_kbytes_sysctl_handler,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "min_free_order_shift",
+ .data = &min_free_order_shift,
+ .maxlen = sizeof(min_free_order_shift),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
.procname = "percpu_pagelist_fraction",
.data = &percpu_pagelist_fraction,
.maxlen = sizeof(percpu_pagelist_fraction),
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index a8f5084dcde..d41fcb46a40 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -199,6 +199,12 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
}
+ktime_t alarm_expires_remaining(const struct alarm *alarm)
+{
+ struct alarm_base *base = &alarm_bases[alarm->type];
+ return ktime_sub(alarm->node.expires, base->gettime());
+}
+
#ifdef CONFIG_RTC_CLASS
/**
* alarmtimer_suspend - Suspend time callback
@@ -305,7 +311,7 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
}
/**
- * alarm_start - Sets an alarm to fire
+ * alarm_start - Sets an absolute alarm to fire
* @alarm: ptr to alarm to set
* @start: time to run the alarm
*/
@@ -325,6 +331,31 @@ int alarm_start(struct alarm *alarm, ktime_t start)
}
/**
+ * alarm_start_relative - Sets a relative alarm to fire
+ * @alarm: ptr to alarm to set
+ * @start: time relative to now to run the alarm
+ */
+int alarm_start_relative(struct alarm *alarm, ktime_t start)
+{
+ struct alarm_base *base = &alarm_bases[alarm->type];
+
+ start = ktime_add(start, base->gettime());
+ return alarm_start(alarm, start);
+}
+
+void alarm_restart(struct alarm *alarm)
+{
+ struct alarm_base *base = &alarm_bases[alarm->type];
+ unsigned long flags;
+
+ spin_lock_irqsave(&base->lock, flags);
+ hrtimer_set_expires(&alarm->timer, alarm->node.expires);
+ hrtimer_restart(&alarm->timer);
+ alarmtimer_enqueue(base, alarm);
+ spin_unlock_irqrestore(&base->lock, flags);
+}
+
+/**
* alarm_try_to_cancel - Tries to cancel an alarm timer
* @alarm: ptr to alarm to be canceled
*
@@ -394,6 +425,12 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
return overrun;
}
+u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
+{
+ struct alarm_base *base = &alarm_bases[alarm->type];
+
+ return alarm_forward(alarm, base->gettime(), interval);
+}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 015f85aaca0..e24c188cbbc 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,6 +82,9 @@ config EVENT_TRACING
select CONTEXT_SWITCH_TRACER
bool
+config GPU_TRACEPOINTS
+ bool
+
config CONTEXT_SWITCH_TRACER
bool
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d7e2068e4b7..45012122fbb 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -60,5 +60,6 @@ obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/gpu-traces.c b/kernel/trace/gpu-traces.c
new file mode 100644
index 00000000000..a4b3f00faee
--- /dev/null
+++ b/kernel/trace/gpu-traces.c
@@ -0,0 +1,23 @@
+/*
+ * GPU tracepoints
+ *
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/gpu.h>
+
+EXPORT_TRACEPOINT_SYMBOL(gpu_sched_switch);
+EXPORT_TRACEPOINT_SYMBOL(gpu_job_enqueue);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6dbdf277c8f..4c41e22f162 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -722,6 +722,7 @@ static const char *trace_options[] = {
"irq-info",
"markers",
"function-trace",
+ "print-tgid",
NULL
};
@@ -1234,6 +1235,7 @@ void tracing_reset_all_online_cpus(void)
static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
+static unsigned saved_tgids[SAVED_CMDLINES];
static int cmdline_idx;
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -1437,6 +1439,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
}
memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+ saved_tgids[idx] = tsk->tgid;
arch_spin_unlock(&trace_cmdline_lock);
}
@@ -1472,6 +1475,25 @@ void trace_find_cmdline(int pid, char comm[])
preempt_enable();
}
+int trace_find_tgid(int pid)
+{
+ unsigned map;
+ int tgid;
+
+ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+ map = map_pid_to_cmdline[pid];
+ if (map != NO_CMDLINE_MAP)
+ tgid = saved_tgids[map];
+ else
+ tgid = -1;
+
+ arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
+
+ return tgid;
+}
+
void tracing_record_cmdline(struct task_struct *tsk)
{
if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
@@ -2428,6 +2450,13 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
seq_puts(m, "# | | | | |\n");
}
+static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m)
+{
+ print_event_info(buf, m);
+ seq_puts(m, "# TASK-PID TGID CPU# TIMESTAMP FUNCTION\n");
+ seq_puts(m, "# | | | | | |\n");
+}
+
static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
{
print_event_info(buf, m);
@@ -2440,6 +2469,18 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
seq_puts(m, "# | | | |||| | |\n");
}
+static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m)
+{
+ print_event_info(buf, m);
+ seq_puts(m, "# _-----=> irqs-off\n");
+ seq_puts(m, "# / _----=> need-resched\n");
+ seq_puts(m, "# | / _---=> hardirq/softirq\n");
+ seq_puts(m, "# || / _--=> preempt-depth\n");
+ seq_puts(m, "# ||| / delay\n");
+ seq_puts(m, "# TASK-PID TGID CPU# |||| TIMESTAMP FUNCTION\n");
+ seq_puts(m, "# | | | | |||| | |\n");
+}
+
void
print_trace_header(struct seq_file *m, struct trace_iterator *iter)
{
@@ -2740,9 +2781,15 @@ void trace_default_header(struct seq_file *m)
} else {
if (!(trace_flags & TRACE_ITER_VERBOSE)) {
if (trace_flags & TRACE_ITER_IRQ_INFO)
- print_func_help_header_irq(iter->trace_buffer, m);
+ if (trace_flags & TRACE_ITER_TGID)
+ print_func_help_header_irq_tgid(iter->trace_buffer, m);
+ else
+ print_func_help_header_irq(iter->trace_buffer, m);
else
- print_func_help_header(iter->trace_buffer, m);
+ if (trace_flags & TRACE_ITER_TGID)
+ print_func_help_header_tgid(iter->trace_buffer, m);
+ else
+ print_func_help_header(iter->trace_buffer, m);
}
}
}
@@ -3594,9 +3641,53 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
}
static const struct file_operations tracing_saved_cmdlines_fops = {
- .open = tracing_open_generic,
- .read = tracing_saved_cmdlines_read,
- .llseek = generic_file_llseek,
+ .open = tracing_open_generic,
+ .read = tracing_saved_cmdlines_read,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t
+tracing_saved_tgids_read(struct file *file, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char *file_buf;
+ char *buf;
+ int len = 0;
+ int pid;
+ int i;
+
+ file_buf = kmalloc(SAVED_CMDLINES*(16+1+16), GFP_KERNEL);
+ if (!file_buf)
+ return -ENOMEM;
+
+ buf = file_buf;
+
+ for (i = 0; i < SAVED_CMDLINES; i++) {
+ int tgid;
+ int r;
+
+ pid = map_cmdline_to_pid[i];
+ if (pid == -1 || pid == NO_CMDLINE_MAP)
+ continue;
+
+ tgid = trace_find_tgid(pid);
+ r = sprintf(buf, "%d %d\n", pid, tgid);
+ buf += r;
+ len += r;
+ }
+
+ len = simple_read_from_buffer(ubuf, cnt, ppos,
+ file_buf, len);
+
+ kfree(file_buf);
+
+ return len;
+}
+
+static const struct file_operations tracing_saved_tgids_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_saved_tgids_read,
+ .llseek = generic_file_llseek,
};
static ssize_t
@@ -6139,6 +6230,9 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("trace_marker", 0220, d_tracer,
tr, &tracing_mark_fops);
+ trace_create_file("saved_tgids", 0444, d_tracer,
+ tr, &tracing_saved_tgids_fops);
+
trace_create_file("trace_clock", 0644, d_tracer, tr,
&trace_clock_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 51b44483eb7..691cb4fba7e 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -653,6 +653,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
extern cycle_t ftrace_now(int cpu);
extern void trace_find_cmdline(int pid, char comm[]);
+extern int trace_find_tgid(int pid);
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_update_tot_cnt;
@@ -866,6 +867,7 @@ enum trace_iterator_flags {
TRACE_ITER_IRQ_INFO = 0x800000,
TRACE_ITER_MARKERS = 0x1000000,
TRACE_ITER_FUNCTION = 0x2000000,
+ TRACE_ITER_TGID = 0x4000000,
};
/*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8388bc99f2e..28dd40c2c42 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -46,6 +46,8 @@ struct fgraph_data {
#define TRACE_GRAPH_PRINT_DURATION 0x10
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
#define TRACE_GRAPH_PRINT_IRQS 0x40
+#define TRACE_GRAPH_PRINT_FLAT 0x80
+
static unsigned int max_depth;
@@ -64,6 +66,8 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
/* Display interrupts */
{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
+ /* Use standard trace formatting rather than hierarchical */
+ { TRACER_OPT(funcgraph-flat, TRACE_GRAPH_PRINT_FLAT) },
{ } /* Empty entry */
};
@@ -1234,6 +1238,9 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
int cpu = iter->cpu;
int ret;
+ if (flags & TRACE_GRAPH_PRINT_FLAT)
+ return TRACE_TYPE_UNHANDLED;
+
if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
return TRACE_TYPE_HANDLED;
@@ -1291,13 +1298,6 @@ print_graph_function(struct trace_iterator *iter)
return print_graph_function_flags(iter, tracer_flags.val);
}
-static enum print_line_t
-print_graph_function_event(struct trace_iterator *iter, int flags,
- struct trace_event *event)
-{
- return print_graph_function(iter);
-}
-
static void print_lat_header(struct seq_file *s, u32 flags)
{
static const char spaces[] = " " /* 16 spaces */
@@ -1364,6 +1364,11 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
{
struct trace_iterator *iter = s->private;
+ if (flags & TRACE_GRAPH_PRINT_FLAT) {
+ trace_default_header(s);
+ return;
+ }
+
if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
return;
@@ -1434,20 +1439,6 @@ static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
return 0;
}
-static struct trace_event_functions graph_functions = {
- .trace = print_graph_function_event,
-};
-
-static struct trace_event graph_trace_entry_event = {
- .type = TRACE_GRAPH_ENT,
- .funcs = &graph_functions,
-};
-
-static struct trace_event graph_trace_ret_event = {
- .type = TRACE_GRAPH_RET,
- .funcs = &graph_functions
-};
-
static struct tracer graph_trace __read_mostly = {
.name = "function_graph",
.open = graph_trace_open,
@@ -1523,16 +1514,6 @@ static __init int init_graph_trace(void)
{
max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
- if (!register_ftrace_event(&graph_trace_entry_event)) {
- pr_warning("Warning: could not register graph trace events\n");
- return 1;
- }
-
- if (!register_ftrace_event(&graph_trace_ret_event)) {
- pr_warning("Warning: could not register graph trace events\n");
- return 1;
- }
-
return register_tracer(&graph_trace);
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index bb922d9ee51..a68e5e34c00 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -702,11 +702,25 @@ int trace_print_context(struct trace_iterator *iter)
unsigned long secs, usec_rem;
char comm[TASK_COMM_LEN];
int ret;
+ int tgid;
trace_find_cmdline(entry->pid, comm);
- ret = trace_seq_printf(s, "%16s-%-5d [%03d] ",
- comm, entry->pid, iter->cpu);
+ ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
+ if (!ret)
+ return 0;
+
+ if (trace_flags & TRACE_ITER_TGID) {
+ tgid = trace_find_tgid(entry->pid);
+ if (tgid < 0)
+ ret = trace_seq_puts(s, "(-----) ");
+ else
+ ret = trace_seq_printf(s, "(%5d) ", tgid);
+ if (!ret)
+ return 0;
+ }
+
+ ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
if (!ret)
return 0;
@@ -1035,6 +1049,168 @@ static struct trace_event trace_fn_event = {
.funcs = &trace_fn_funcs,
};
+/* TRACE_GRAPH_ENT */
+static enum print_line_t trace_graph_ent_trace(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct trace_seq *s = &iter->seq;
+ struct ftrace_graph_ent_entry *field;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!trace_seq_puts(s, "graph_ent: func="))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!seq_print_ip_sym(s, field->graph_ent.func, flags))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!trace_seq_puts(s, "\n"))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_raw(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ent_entry *field;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!trace_seq_printf(&iter->seq, "%lx %d\n",
+ field->graph_ent.func,
+ field->graph_ent.depth))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_hex(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ent_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.func);
+ SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.depth);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_bin(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ent_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_FIELD_RET(s, field->graph_ent.func);
+ SEQ_PUT_FIELD_RET(s, field->graph_ent.depth);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event_functions trace_graph_ent_funcs = {
+ .trace = trace_graph_ent_trace,
+ .raw = trace_graph_ent_raw,
+ .hex = trace_graph_ent_hex,
+ .binary = trace_graph_ent_bin,
+};
+
+static struct trace_event trace_graph_ent_event = {
+ .type = TRACE_GRAPH_ENT,
+ .funcs = &trace_graph_ent_funcs,
+};
+
+/* TRACE_GRAPH_RET */
+static enum print_line_t trace_graph_ret_trace(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent;
+ struct ftrace_graph_ret_entry *field;
+
+ trace_assign_type(field, entry);
+
+ if (!trace_seq_puts(s, "graph_ret: func="))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!seq_print_ip_sym(s, field->ret.func, flags))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!trace_seq_puts(s, "\n"))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_raw(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ret_entry *field;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!trace_seq_printf(&iter->seq, "%lx %lld %lld %ld %d\n",
+ field->ret.func,
+ field->ret.calltime,
+ field->ret.rettime,
+ field->ret.overrun,
+ field->ret.depth));
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_hex(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ret_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->ret.func);
+ SEQ_PUT_HEX_FIELD_RET(s, field->ret.calltime);
+ SEQ_PUT_HEX_FIELD_RET(s, field->ret.rettime);
+ SEQ_PUT_HEX_FIELD_RET(s, field->ret.overrun);
+ SEQ_PUT_HEX_FIELD_RET(s, field->ret.depth);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_bin(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct ftrace_graph_ret_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_FIELD_RET(s, field->ret.func);
+ SEQ_PUT_FIELD_RET(s, field->ret.calltime);
+ SEQ_PUT_FIELD_RET(s, field->ret.rettime);
+ SEQ_PUT_FIELD_RET(s, field->ret.overrun);
+ SEQ_PUT_FIELD_RET(s, field->ret.depth);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event_functions trace_graph_ret_funcs = {
+ .trace = trace_graph_ret_trace,
+ .raw = trace_graph_ret_raw,
+ .hex = trace_graph_ret_hex,
+ .binary = trace_graph_ret_bin,
+};
+
+static struct trace_event trace_graph_ret_event = {
+ .type = TRACE_GRAPH_RET,
+ .funcs = &trace_graph_ret_funcs,
+};
+
/* TRACE_CTX an TRACE_WAKE */
static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
char *delim)
@@ -1425,6 +1601,8 @@ static struct trace_event trace_print_event = {
static struct trace_event *events[] __initdata = {
&trace_fn_event,
+ &trace_graph_ent_event,
+ &trace_graph_ret_event,
&trace_ctx_event,
&trace_wake_event,
&trace_stack_event,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 05039e348f0..e092e5a6cdd 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -45,6 +45,11 @@ static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static cpumask_t __read_mostly watchdog_cpus;
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
@@ -178,7 +183,7 @@ void touch_softlockup_watchdog_sync(void)
__raw_get_cpu_var(watchdog_touch_ts) = 0;
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
/* watchdog detector functions */
static int is_hardlockup(void)
{
@@ -192,6 +197,76 @@ static int is_hardlockup(void)
}
#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static unsigned int watchdog_next_cpu(unsigned int cpu)
+{
+ cpumask_t cpus = watchdog_cpus;
+ unsigned int next_cpu;
+
+ next_cpu = cpumask_next(cpu, &cpus);
+ if (next_cpu >= nr_cpu_ids)
+ next_cpu = cpumask_first(&cpus);
+
+ if (next_cpu == cpu)
+ return nr_cpu_ids;
+
+ return next_cpu;
+}
+
+static int is_hardlockup_other_cpu(unsigned int cpu)
+{
+ unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
+
+ if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
+ return 1;
+
+ per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+ return 0;
+}
+
+static void watchdog_check_hardlockup_other_cpu(void)
+{
+ unsigned int next_cpu;
+
+ /*
+ * Test for hardlockups every 3 samples. The sample period is
+ * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
+ * watchdog_thresh (over by 20%).
+ */
+ if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
+ return;
+
+ /* check for a hardlockup on the next cpu */
+ next_cpu = watchdog_next_cpu(smp_processor_id());
+ if (next_cpu >= nr_cpu_ids)
+ return;
+
+ smp_rmb();
+
+ if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
+ per_cpu(watchdog_nmi_touch, next_cpu) = false;
+ return;
+ }
+
+ if (is_hardlockup_other_cpu(next_cpu)) {
+ /* only warn once */
+ if (per_cpu(hard_watchdog_warn, next_cpu) == true)
+ return;
+
+ if (hardlockup_panic)
+ panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+ else
+ WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+
+ per_cpu(hard_watchdog_warn, next_cpu) = true;
+ } else {
+ per_cpu(hard_watchdog_warn, next_cpu) = false;
+ }
+}
+#else
+static inline void watchdog_check_hardlockup_other_cpu(void) { return; }
+#endif
+
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp();
@@ -203,7 +278,7 @@ static int is_softlockup(unsigned long touch_ts)
return 0;
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
@@ -251,7 +326,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
__this_cpu_write(hard_watchdog_warn, false);
return;
}
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
static void watchdog_interrupt_count(void)
{
@@ -271,6 +346,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
/* kick the hardlockup detector */
watchdog_interrupt_count();
+ /* test for hardlockups on the next cpu */
+ watchdog_check_hardlockup_other_cpu();
+
/* kick the softlockup detector */
wake_up_process(__this_cpu_read(softlockup_watchdog));
@@ -395,7 +473,7 @@ static void watchdog(unsigned int cpu)
__touch_watchdog();
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
/*
* People like the simple clean cpu node info on boot.
* Reduce the watchdog noise by only printing messages
@@ -471,9 +549,44 @@ static void watchdog_nmi_disable(unsigned int cpu)
return;
}
#else
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static int watchdog_nmi_enable(unsigned int cpu)
+{
+ /*
+ * The new cpu will be marked online before the first hrtimer interrupt
+ * runs on it. If another cpu tests for a hardlockup on the new cpu
+ * before it has run its first hrtimer, it will get a false positive.
+ * Touch the watchdog on the new cpu to delay the first check for at
+ * least 3 sampling periods to guarantee one hrtimer has run on the new
+ * cpu.
+ */
+ per_cpu(watchdog_nmi_touch, cpu) = true;
+ smp_wmb();
+ cpumask_set_cpu(cpu, &watchdog_cpus);
+ return 0;
+}
+
+static void watchdog_nmi_disable(unsigned int cpu)
+{
+ unsigned int next_cpu = watchdog_next_cpu(cpu);
+
+ /*
+ * Offlining this cpu will cause the cpu before this one to start
+ * checking the one after this one. If this cpu just finished checking
+ * the next cpu and updating hrtimer_interrupts_saved, and then the
+ * previous cpu checks it within one sample period, it will trigger a
+ * false positive. Touch the watchdog on the next cpu to prevent it.
+ */
+ if (next_cpu < nr_cpu_ids)
+ per_cpu(watchdog_nmi_touch, next_cpu) = true;
+ smp_wmb();
+ cpumask_clear_cpu(cpu, &watchdog_cpus);
+}
+#else
static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
static void watchdog_nmi_disable(unsigned int cpu) { return; }
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
/* prepare/enable/disable routines */
/* sysctl functions */