aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/hw_breakpoint.c4
-rw-r--r--kernel/fork.c65
-rw-r--r--kernel/hung_task.c11
-rw-r--r--kernel/irq/autoprobe.c4
-rw-r--r--kernel/irq/chip.c42
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/manage.c46
-rw-r--r--kernel/kprobes.c12
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/printk.c6
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/fair.c2
12 files changed, 144 insertions, 55 deletions
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index b7971d6f38bf..ee706ce44aa0 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -651,10 +651,10 @@ int __init init_hw_breakpoint(void)
err_alloc:
for_each_possible_cpu(err_cpu) {
- if (err_cpu == cpu)
- break;
for (i = 0; i < TYPE_MAX; i++)
kfree(per_cpu(nr_task_bp_pinned[i], cpu));
+ if (err_cpu == cpu)
+ break;
}
return -ENOMEM;
diff --git a/kernel/fork.c b/kernel/fork.c
index b77fd559c78e..26a7a6707fa7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
#include <linux/user-return-notifier.h>
#include <linux/oom.h>
#include <linux/khugepaged.h>
+#include <linux/signalfd.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -667,6 +668,38 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
return mm;
}
+static void complete_vfork_done(struct task_struct *tsk)
+{
+ struct completion *vfork;
+
+ task_lock(tsk);
+ vfork = tsk->vfork_done;
+ if (likely(vfork)) {
+ tsk->vfork_done = NULL;
+ complete(vfork);
+ }
+ task_unlock(tsk);
+}
+
+static int wait_for_vfork_done(struct task_struct *child,
+ struct completion *vfork)
+{
+ int killed;
+
+ freezer_do_not_count();
+ killed = wait_for_completion_killable(vfork);
+ freezer_count();
+
+ if (killed) {
+ task_lock(child);
+ child->vfork_done = NULL;
+ task_unlock(child);
+ }
+
+ put_task_struct(child);
+ return killed;
+}
+
/* Please note the differences between mmput and mm_release.
* mmput is called whenever we stop holding onto a mm_struct,
* error success whatever.
@@ -682,8 +715,6 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
*/
void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
- struct completion *vfork_done = tsk->vfork_done;
-
/* Get rid of any futexes when releasing the mm */
#ifdef CONFIG_FUTEX
if (unlikely(tsk->robust_list)) {
@@ -703,17 +734,15 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
/* Get rid of any cached register state */
deactivate_mm(tsk, mm);
- /* notify parent sleeping on vfork() */
- if (vfork_done) {
- tsk->vfork_done = NULL;
- complete(vfork_done);
- }
+ if (tsk->vfork_done)
+ complete_vfork_done(tsk);
/*
* If we're exiting normally, clear a user-space tid field if
* requested. We leave this alone when dying by signal, to leave
* the value intact in a core dump, and to save the unnecessary
- * trouble otherwise. Userland only wants this done for a sys_exit.
+ * trouble, say, a killed vfork parent shouldn't touch this mm.
+ * Userland only wants this done for a sys_exit.
*/
if (tsk->clear_child_tid) {
if (!(tsk->flags & PF_SIGNALED) &&
@@ -935,8 +964,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
void __cleanup_sighand(struct sighand_struct *sighand)
{
- if (atomic_dec_and_test(&sighand->count))
+ if (atomic_dec_and_test(&sighand->count)) {
+ signalfd_cleanup(sighand);
kmem_cache_free(sighand_cachep, sighand);
+ }
}
@@ -1015,7 +1046,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
new_flags |= PF_FORKNOEXEC;
- new_flags |= PF_STARTING;
p->flags = new_flags;
}
@@ -1545,16 +1575,9 @@ long do_fork(unsigned long clone_flags,
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
+ get_task_struct(p);
}
- /*
- * We set PF_STARTING at creation in case tracing wants to
- * use this to distinguish a fully live task from one that
- * hasn't finished SIGSTOP raising yet. Now we clear it
- * and set the child going.
- */
- p->flags &= ~PF_STARTING;
-
wake_up_new_task(p);
/* forking complete and child started to run, tell ptracer */
@@ -1562,10 +1585,8 @@ long do_fork(unsigned long clone_flags,
ptrace_event(trace, nr);
if (clone_flags & CLONE_VFORK) {
- freezer_do_not_count();
- wait_for_completion(&vfork);
- freezer_count();
- ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
+ if (!wait_for_vfork_done(p, &vfork))
+ ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
}
} else {
nr = PTR_ERR(p);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 2e48ec0c2e91..c21449f85a2a 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -119,15 +119,20 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
* to exit the grace period. For classic RCU, a reschedule is required.
*/
-static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
+static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
{
+ bool can_cont;
+
get_task_struct(g);
get_task_struct(t);
rcu_read_unlock();
cond_resched();
rcu_read_lock();
+ can_cont = pid_alive(g) && pid_alive(t);
put_task_struct(t);
put_task_struct(g);
+
+ return can_cont;
}
/*
@@ -154,9 +159,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
goto unlock;
if (!--batch_count) {
batch_count = HUNG_TASK_BATCHING;
- rcu_lock_break(g, t);
- /* Exit if t or g was unhashed during refresh. */
- if (t->state == TASK_DEAD || g->state == TASK_DEAD)
+ if (!rcu_lock_break(g, t))
goto unlock;
}
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 342d8f44e401..0119b9d467ae 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -53,7 +53,7 @@ unsigned long probe_irq_on(void)
if (desc->irq_data.chip->irq_set_type)
desc->irq_data.chip->irq_set_type(&desc->irq_data,
IRQ_TYPE_PROBE);
- irq_startup(desc);
+ irq_startup(desc, false);
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -70,7 +70,7 @@ unsigned long probe_irq_on(void)
raw_spin_lock_irq(&desc->lock);
if (!desc->action && irq_settings_can_probe(desc)) {
desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
- if (irq_startup(desc))
+ if (irq_startup(desc, false))
desc->istate |= IRQS_PENDING;
}
raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f7c543a801d9..fb7db75ee0c8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -157,19 +157,22 @@ static void irq_state_set_masked(struct irq_desc *desc)
irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
}
-int irq_startup(struct irq_desc *desc)
+int irq_startup(struct irq_desc *desc, bool resend)
{
+ int ret = 0;
+
irq_state_clr_disabled(desc);
desc->depth = 0;
if (desc->irq_data.chip->irq_startup) {
- int ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
+ ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
irq_state_clr_masked(desc);
- return ret;
+ } else {
+ irq_enable(desc);
}
-
- irq_enable(desc);
- return 0;
+ if (resend)
+ check_irq_resend(desc, desc->irq_data.irq);
+ return ret;
}
void irq_shutdown(struct irq_desc *desc)
@@ -330,6 +333,24 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(handle_simple_irq);
+/*
+ * Called unconditionally from handle_level_irq() and only for oneshot
+ * interrupts from handle_fasteoi_irq()
+ */
+static void cond_unmask_irq(struct irq_desc *desc)
+{
+ /*
+ * We need to unmask in the following cases:
+ * - Standard level irq (IRQF_ONESHOT is not set)
+ * - Oneshot irq which did not wake the thread (caused by a
+ * spurious interrupt or a primary handler handling it
+ * completely).
+ */
+ if (!irqd_irq_disabled(&desc->irq_data) &&
+ irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot)
+ unmask_irq(desc);
+}
+
/**
* handle_level_irq - Level type irq handler
* @irq: the interrupt number
@@ -362,8 +383,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
handle_irq_event(desc);
- if (!irqd_irq_disabled(&desc->irq_data) && !(desc->istate & IRQS_ONESHOT))
- unmask_irq(desc);
+ cond_unmask_irq(desc);
+
out_unlock:
raw_spin_unlock(&desc->lock);
}
@@ -417,6 +438,9 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
preflow_handler(desc);
handle_irq_event(desc);
+ if (desc->istate & IRQS_ONESHOT)
+ cond_unmask_irq(desc);
+
out_eoi:
desc->irq_data.chip->irq_eoi(&desc->irq_data);
out_unlock:
@@ -625,7 +649,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
irq_settings_set_noprobe(desc);
irq_settings_set_norequest(desc);
irq_settings_set_nothread(desc);
- irq_startup(desc);
+ irq_startup(desc, true);
}
out:
irq_put_desc_busunlock(desc, flags);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index b7952316016a..40378ff877e7 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -67,7 +67,7 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
-extern int irq_startup(struct irq_desc *desc);
+extern int irq_startup(struct irq_desc *desc, bool resend);
extern void irq_shutdown(struct irq_desc *desc);
extern void irq_enable(struct irq_desc *desc);
extern void irq_disable(struct irq_desc *desc);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index a9a9dbe49fea..0f0d4704ddd8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -985,6 +985,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
/* add new interrupt at end of irq queue */
do {
+ /*
+ * Or all existing action->thread_mask bits,
+ * so we can find the next zero bit for this
+ * new action.
+ */
thread_mask |= old->thread_mask;
old_ptr = &old->next;
old = *old_ptr;
@@ -993,14 +998,41 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
/*
- * Setup the thread mask for this irqaction. Unlikely to have
- * 32 resp 64 irqs sharing one line, but who knows.
+ * Setup the thread mask for this irqaction for ONESHOT. For
+ * !ONESHOT irqs the thread mask is 0 so we can avoid a
+ * conditional in irq_wake_thread().
*/
- if (new->flags & IRQF_ONESHOT && thread_mask == ~0UL) {
- ret = -EBUSY;
- goto out_mask;
+ if (new->flags & IRQF_ONESHOT) {
+ /*
+ * Unlikely to have 32 resp 64 irqs sharing one line,
+ * but who knows.
+ */
+ if (thread_mask == ~0UL) {
+ ret = -EBUSY;
+ goto out_mask;
+ }
+ /*
+ * The thread_mask for the action is or'ed to
+ * desc->thread_active to indicate that the
+ * IRQF_ONESHOT thread handler has been woken, but not
+ * yet finished. The bit is cleared when a thread
+ * completes. When all threads of a shared interrupt
+ * line have completed desc->threads_active becomes
+ * zero and the interrupt line is unmasked. See
+ * handle.c:irq_wake_thread() for further information.
+ *
+ * If no thread is woken by primary (hard irq context)
+ * interrupt handlers, then desc->threads_active is
+ * also checked for zero to unmask the irq line in the
+ * affected hard irq flow handlers
+ * (handle_[fasteoi|level]_irq).
+ *
+ * The new action gets the first zero bit of
+ * thread_mask assigned. See the loop above which or's
+ * all existing action->thread_mask bits.
+ */
+ new->thread_mask = 1 << ffz(thread_mask);
}
- new->thread_mask = 1 << ffz(thread_mask);
if (!shared) {
init_waitqueue_head(&desc->wait_for_threads);
@@ -1027,7 +1059,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
desc->istate |= IRQS_ONESHOT;
if (irq_settings_can_autoenable(desc))
- irq_startup(desc);
+ irq_startup(desc, true);
else
/* Undo nested disables: */
desc->depth = 1;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9788c0ec6f43..c62b8546cc90 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1334,8 +1334,10 @@ int __kprobes register_kprobe(struct kprobe *p)
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr) ||
ftrace_text_reserved(p->addr, p->addr) ||
- jump_label_text_reserved(p->addr, p->addr))
- goto fail_with_jump_label;
+ jump_label_text_reserved(p->addr, p->addr)) {
+ ret = -EINVAL;
+ goto cannot_probe;
+ }
/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
p->flags &= KPROBE_FLAG_DISABLED;
@@ -1352,7 +1354,7 @@ int __kprobes register_kprobe(struct kprobe *p)
* its code to prohibit unexpected unloading.
*/
if (unlikely(!try_module_get(probed_mod)))
- goto fail_with_jump_label;
+ goto cannot_probe;
/*
* If the module freed .init.text, we couldn't insert
@@ -1361,7 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p)
if (within_module_init((unsigned long)p->addr, probed_mod) &&
probed_mod->state != MODULE_STATE_COMING) {
module_put(probed_mod);
- goto fail_with_jump_label;
+ goto cannot_probe;
}
/* ret will be updated by following code */
}
@@ -1409,7 +1411,7 @@ out:
return ret;
-fail_with_jump_label:
+cannot_probe:
preempt_enable();
jump_label_unlock();
return ret;
diff --git a/kernel/pid.c b/kernel/pid.c
index ce8e00deaccb..9f08dfabaf13 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -543,12 +543,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
*/
void __init pidhash_init(void)
{
- int i, pidhash_size;
+ unsigned int i, pidhash_size;
pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
HASH_EARLY | HASH_SMALL,
&pidhash_shift, NULL, 4096);
- pidhash_size = 1 << pidhash_shift;
+ pidhash_size = 1U << pidhash_shift;
for (i = 0; i < pidhash_size; i++)
INIT_HLIST_HEAD(&pid_hash[i]);
diff --git a/kernel/printk.c b/kernel/printk.c
index 13c0a1143f49..32690a0b7a18 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -702,6 +702,9 @@ static bool printk_time = 0;
#endif
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
+static bool always_kmsg_dump;
+module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
+
/* Check if we have any console registered that can be called early in boot. */
static int have_callable_console(void)
{
@@ -1732,6 +1735,9 @@ void kmsg_dump(enum kmsg_dump_reason reason)
unsigned long l1, l2;
unsigned long flags;
+ if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
+ return;
+
/* Theoretically, the log could move on after we do this, but
there's not a lot we can do about that. The new messages
will overwrite the start of what we dump. */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5255c9d2e053..b342f57879e6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1932,7 +1932,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
- trace_sched_stat_sleeptime(current, rq->clock);
fire_sched_in_preempt_notifiers(current);
if (mm)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7c6414fc669d..aca16b843b7e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1003,6 +1003,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (unlikely(delta > se->statistics.sleep_max))
se->statistics.sleep_max = delta;
+ se->statistics.sleep_start = 0;
se->statistics.sum_sleep_runtime += delta;
if (tsk) {
@@ -1019,6 +1020,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (unlikely(delta > se->statistics.block_max))
se->statistics.block_max = delta;
+ se->statistics.block_start = 0;
se->statistics.sum_sleep_runtime += delta;
if (tsk) {