aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c7
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/events/core.c99
-rw-r--r--kernel/events/internal.h10
-rw-r--r--kernel/events/ring_buffer.c37
-rw-r--r--kernel/fork.c28
-rw-r--r--kernel/irq/chip.c19
-rw-r--r--kernel/irq/manage.c158
-rw-r--r--kernel/irq/proc.c19
-rw-r--r--kernel/sched/core.c47
-rw-r--r--kernel/sched/fair.c25
-rw-r--r--kernel/sched/sched.h5
-rw-r--r--kernel/sched/work-simple.c1
-rw-r--r--kernel/signal.c13
-rw-r--r--kernel/softirq.c38
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/trace/latency_hist.c4
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_sched_switch.c2
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--kernel/workqueue.c8
21 files changed, 356 insertions, 174 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2a7a0fe1973b..21f5bdfc5c6c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1319,7 +1319,7 @@ static int cgroup_show_options(struct seq_file *seq,
for_each_subsys(ss, ssid)
if (root->subsys_mask & (1 << ssid))
- seq_printf(seq, ",%s", ss->name);
+ seq_show_option(seq, ss->name, NULL);
if (root->flags & CGRP_ROOT_NOPREFIX)
seq_puts(seq, ",noprefix");
if (root->flags & CGRP_ROOT_XATTR)
@@ -1327,13 +1327,14 @@ static int cgroup_show_options(struct seq_file *seq,
spin_lock(&release_agent_path_lock);
if (strlen(root->release_agent_path))
- seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+ seq_show_option(seq, "release_agent",
+ root->release_agent_path);
spin_unlock(&release_agent_path_lock);
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
seq_puts(seq, ",clone_children");
if (strlen(root->name))
- seq_printf(seq, ",name=%s", root->name);
+ seq_show_option(seq, "name", root->name);
return 0;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ee14e3a35a29..f0acff0f66c9 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1223,7 +1223,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
spin_unlock_irq(&callback_lock);
/* use trialcs->mems_allowed as a temp variable */
- update_nodemasks_hier(cs, &cs->mems_allowed);
+ update_nodemasks_hier(cs, &trialcs->mems_allowed);
done:
return retval;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e05d1dd7db8f..1e7392d7b29a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1886,8 +1886,6 @@ event_sched_in(struct perf_event *event,
perf_pmu_disable(event->pmu);
- event->tstamp_running += tstamp - event->tstamp_stopped;
-
perf_set_shadow_time(event, ctx, tstamp);
perf_log_itrace_start(event);
@@ -1899,6 +1897,8 @@ event_sched_in(struct perf_event *event,
goto out;
}
+ event->tstamp_running += tstamp - event->tstamp_stopped;
+
if (!is_software_event(event))
cpuctx->active_oncpu++;
if (!ctx->nr_active++)
@@ -3976,28 +3976,21 @@ static void perf_event_for_each(struct perf_event *event,
perf_event_for_each_child(sibling, func);
}
-static int perf_event_period(struct perf_event *event, u64 __user *arg)
-{
- struct perf_event_context *ctx = event->ctx;
- int ret = 0, active;
+struct period_event {
+ struct perf_event *event;
u64 value;
+};
- if (!is_sampling_event(event))
- return -EINVAL;
-
- if (copy_from_user(&value, arg, sizeof(value)))
- return -EFAULT;
-
- if (!value)
- return -EINVAL;
+static int __perf_event_period(void *info)
+{
+ struct period_event *pe = info;
+ struct perf_event *event = pe->event;
+ struct perf_event_context *ctx = event->ctx;
+ u64 value = pe->value;
+ bool active;
- raw_spin_lock_irq(&ctx->lock);
+ raw_spin_lock(&ctx->lock);
if (event->attr.freq) {
- if (value > sysctl_perf_event_sample_rate) {
- ret = -EINVAL;
- goto unlock;
- }
-
event->attr.sample_freq = value;
} else {
event->attr.sample_period = value;
@@ -4016,11 +4009,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
event->pmu->start(event, PERF_EF_RELOAD);
perf_pmu_enable(ctx->pmu);
}
+ raw_spin_unlock(&ctx->lock);
-unlock:
+ return 0;
+}
+
+static int perf_event_period(struct perf_event *event, u64 __user *arg)
+{
+ struct period_event pe = { .event = event, };
+ struct perf_event_context *ctx = event->ctx;
+ struct task_struct *task;
+ u64 value;
+
+ if (!is_sampling_event(event))
+ return -EINVAL;
+
+ if (copy_from_user(&value, arg, sizeof(value)))
+ return -EFAULT;
+
+ if (!value)
+ return -EINVAL;
+
+ if (event->attr.freq && value > sysctl_perf_event_sample_rate)
+ return -EINVAL;
+
+ task = ctx->task;
+ pe.value = value;
+
+ if (!task) {
+ cpu_function_call(event->cpu, __perf_event_period, &pe);
+ return 0;
+ }
+
+retry:
+ if (!task_function_call(task, __perf_event_period, &pe))
+ return 0;
+
+ raw_spin_lock_irq(&ctx->lock);
+ if (ctx->is_active) {
+ raw_spin_unlock_irq(&ctx->lock);
+ task = ctx->task;
+ goto retry;
+ }
+
+ __perf_event_period(&pe);
raw_spin_unlock_irq(&ctx->lock);
- return ret;
+ return 0;
}
static const struct file_operations perf_fops;
@@ -4376,14 +4411,6 @@ static void ring_buffer_wakeup(struct perf_event *event)
rcu_read_unlock();
}
-static void rb_free_rcu(struct rcu_head *rcu_head)
-{
- struct ring_buffer *rb;
-
- rb = container_of(rcu_head, struct ring_buffer, rcu_head);
- rb_free(rb);
-}
-
struct ring_buffer *ring_buffer_get(struct perf_event *event)
{
struct ring_buffer *rb;
@@ -4766,12 +4793,20 @@ static const struct file_operations perf_fops = {
* to user-space before waking everybody up.
*/
+static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
+{
+ /* only the parent has fasync state */
+ if (event->parent)
+ event = event->parent;
+ return &event->fasync;
+}
+
void perf_event_wakeup(struct perf_event *event)
{
ring_buffer_wakeup(event);
if (event->pending_kill) {
- kill_fasync(&event->fasync, SIGIO, event->pending_kill);
+ kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill);
event->pending_kill = 0;
}
}
@@ -6117,7 +6152,7 @@ static int __perf_event_overflow(struct perf_event *event,
else
perf_event_output(event, data, regs);
- if (event->fasync && event->pending_kill) {
+ if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
irq_work_queue(&event->pending);
}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 9f6ce9ba4a04..a6adc36a3732 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -11,6 +11,7 @@
struct ring_buffer {
atomic_t refcount;
struct rcu_head rcu_head;
+ struct irq_work irq_work;
#ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work;
int page_order; /* allocation order */
@@ -55,6 +56,15 @@ struct ring_buffer {
};
extern void rb_free(struct ring_buffer *rb);
+
+static inline void rb_free_rcu(struct rcu_head *rcu_head)
+{
+ struct ring_buffer *rb;
+
+ rb = container_of(rcu_head, struct ring_buffer, rcu_head);
+ rb_free(rb);
+}
+
extern struct ring_buffer *
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 725c416085e3..7f63ad978cb8 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -221,6 +221,8 @@ void perf_output_end(struct perf_output_handle *handle)
rcu_read_unlock();
}
+static void rb_irq_work(struct irq_work *work);
+
static void
ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
{
@@ -241,6 +243,16 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
INIT_LIST_HEAD(&rb->event_list);
spin_lock_init(&rb->event_lock);
+ init_irq_work(&rb->irq_work, rb_irq_work);
+}
+
+static void ring_buffer_put_async(struct ring_buffer *rb)
+{
+ if (!atomic_dec_and_test(&rb->refcount))
+ return;
+
+ rb->rcu_head.next = (void *)rb;
+ irq_work_queue(&rb->irq_work);
}
/*
@@ -319,7 +331,7 @@ err_put:
rb_free_aux(rb);
err:
- ring_buffer_put(rb);
+ ring_buffer_put_async(rb);
handle->event = NULL;
return NULL;
@@ -370,7 +382,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
local_set(&rb->aux_nest, 0);
rb_free_aux(rb);
- ring_buffer_put(rb);
+ ring_buffer_put_async(rb);
}
/*
@@ -547,17 +559,30 @@ static void __rb_free_aux(struct ring_buffer *rb)
rb->aux_priv = NULL;
}
- for (pg = 0; pg < rb->aux_nr_pages; pg++)
- rb_free_aux_page(rb, pg);
+ if (rb->aux_nr_pages) {
+ for (pg = 0; pg < rb->aux_nr_pages; pg++)
+ rb_free_aux_page(rb, pg);
- kfree(rb->aux_pages);
- rb->aux_nr_pages = 0;
+ kfree(rb->aux_pages);
+ rb->aux_nr_pages = 0;
+ }
}
void rb_free_aux(struct ring_buffer *rb)
{
if (atomic_dec_and_test(&rb->aux_refcount))
+ irq_work_queue(&rb->irq_work);
+}
+
+static void rb_irq_work(struct irq_work *work)
+{
+ struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
+
+ if (!atomic_read(&rb->aux_refcount))
__rb_free_aux(rb);
+
+ if (rb->rcu_head.next == (void *)rb)
+ call_rcu(&rb->rcu_head, rb_free_rcu);
}
#ifndef CONFIG_PERF_USE_VMALLOC
diff --git a/kernel/fork.c b/kernel/fork.c
index dd407cea059a..1b0e656f60e8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1888,13 +1888,21 @@ static int check_unshare_flags(unsigned long unshare_flags)
CLONE_NEWUSER|CLONE_NEWPID))
return -EINVAL;
/*
- * Not implemented, but pretend it works if there is nothing to
- * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
- * needs to unshare vm.
+ * Not implemented, but pretend it works if there is nothing
+ * to unshare. Note that unsharing the address space or the
+ * signal handlers also need to unshare the signal queues (aka
+ * CLONE_THREAD).
*/
if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
- /* FIXME: get_task_mm() increments ->mm_users */
- if (atomic_read(&current->mm->mm_users) > 1)
+ if (!thread_group_empty(current))
+ return -EINVAL;
+ }
+ if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
+ if (atomic_read(&current->sighand->count) > 1)
+ return -EINVAL;
+ }
+ if (unshare_flags & CLONE_VM) {
+ if (!current_is_single_threaded())
return -EINVAL;
}
@@ -1963,16 +1971,16 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
if (unshare_flags & CLONE_NEWUSER)
unshare_flags |= CLONE_THREAD | CLONE_FS;
/*
- * If unsharing a thread from a thread group, must also unshare vm.
- */
- if (unshare_flags & CLONE_THREAD)
- unshare_flags |= CLONE_VM;
- /*
* If unsharing vm, must also unshare signal handlers.
*/
if (unshare_flags & CLONE_VM)
unshare_flags |= CLONE_SIGHAND;
/*
+ * If unsharing a signal handlers, must also unshare the signal queues.
+ */
+ if (unshare_flags & CLONE_SIGHAND)
+ unshare_flags |= CLONE_THREAD;
+ /*
* If unsharing namespace, must also unshare filesystem information.
*/
if (unshare_flags & CLONE_NEWNS)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index eb9a4ea394ab..94bbd8fee90d 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -934,6 +934,23 @@ int irq_chip_set_affinity_parent(struct irq_data *data,
}
/**
+ * irq_chip_set_type_parent - Set IRQ type on the parent interrupt
+ * @data: Pointer to interrupt specific data
+ * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
+ *
+ * Conditional, as the underlying parent chip might not implement it.
+ */
+int irq_chip_set_type_parent(struct irq_data *data, unsigned int type)
+{
+ data = data->parent_data;
+
+ if (data->chip->irq_set_type)
+ return data->chip->irq_set_type(data, type);
+
+ return -ENOSYS;
+}
+
+/**
* irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware
* @data: Pointer to interrupt specific data
*
@@ -946,7 +963,7 @@ int irq_chip_retrigger_hierarchy(struct irq_data *data)
if (data->chip && data->chip->irq_retrigger)
return data->chip->irq_retrigger(data);
- return -ENOSYS;
+ return 0;
}
/**
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1b5c50a68e23..79c55c26eaee 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -772,6 +772,12 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id)
return IRQ_NONE;
}
+static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id)
+{
+ WARN(1, "Secondary action handler called for irq %d\n", irq);
+ return IRQ_NONE;
+}
+
static int irq_wait_for_interrupt(struct irqaction *action)
{
set_current_state(TASK_INTERRUPTIBLE);
@@ -798,7 +804,8 @@ static int irq_wait_for_interrupt(struct irqaction *action)
static void irq_finalize_oneshot(struct irq_desc *desc,
struct irqaction *action)
{
- if (!(desc->istate & IRQS_ONESHOT))
+ if (!(desc->istate & IRQS_ONESHOT) ||
+ action->handler == irq_forced_secondary_handler)
return;
again:
chip_bus_lock(desc);
@@ -960,6 +967,18 @@ static void irq_thread_dtor(struct callback_head *unused)
irq_finalize_oneshot(desc, action);
}
+static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
+{
+ struct irqaction *secondary = action->secondary;
+
+ if (WARN_ON_ONCE(!secondary))
+ return;
+
+ raw_spin_lock_irq(&desc->lock);
+ __irq_wake_thread(desc, secondary);
+ raw_spin_unlock_irq(&desc->lock);
+}
+
/*
* Interrupt handler thread
*/
@@ -990,6 +1009,8 @@ static int irq_thread(void *data)
action_ret = handler_fn(desc, action);
if (action_ret == IRQ_HANDLED)
atomic_inc(&desc->threads_handled);
+ if (action_ret == IRQ_WAKE_THREAD)
+ irq_wake_secondary(desc, action);
#ifdef CONFIG_PREEMPT_RT_FULL
migrate_disable();
@@ -1040,20 +1061,36 @@ void irq_wake_thread(unsigned int irq, void *dev_id)
}
EXPORT_SYMBOL_GPL(irq_wake_thread);
-static void irq_setup_forced_threading(struct irqaction *new)
+static int irq_setup_forced_threading(struct irqaction *new)
{
if (!force_irqthreads)
- return;
+ return 0;
if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
- return;
+ return 0;
new->flags |= IRQF_ONESHOT;
- if (!new->thread_fn) {
- set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
- new->thread_fn = new->handler;
- new->handler = irq_default_primary_handler;
+ /*
+ * Handle the case where we have a real primary handler and a
+ * thread handler. We force thread them as well by creating a
+ * secondary action.
+ */
+ if (new->handler != irq_default_primary_handler && new->thread_fn) {
+ /* Allocate the secondary action */
+ new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
+ if (!new->secondary)
+ return -ENOMEM;
+ new->secondary->handler = irq_forced_secondary_handler;
+ new->secondary->thread_fn = new->thread_fn;
+ new->secondary->dev_id = new->dev_id;
+ new->secondary->irq = new->irq;
+ new->secondary->name = new->name;
}
+ /* Deal with the primary handler */
+ set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
+ new->thread_fn = new->handler;
+ new->handler = irq_default_primary_handler;
+ return 0;
}
static int irq_request_resources(struct irq_desc *desc)
@@ -1073,6 +1110,48 @@ static void irq_release_resources(struct irq_desc *desc)
c->irq_release_resources(d);
}
+static int
+setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
+{
+ struct task_struct *t;
+ struct sched_param param = {
+ .sched_priority = MAX_USER_RT_PRIO/2,
+ };
+
+ if (!secondary) {
+ t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
+ new->name);
+ } else {
+ t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq,
+ new->name);
+ param.sched_priority += 1;
+ }
+
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+
+ sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
+
+ /*
+ * We keep the reference to the task struct even if
+ * the thread dies to avoid that the interrupt code
+ * references an already freed task_struct.
+ */
+ get_task_struct(t);
+ new->thread = t;
+ /*
+ * Tell the thread to set its affinity. This is
+ * important for shared interrupt handlers as we do
+ * not invoke setup_affinity() for the secondary
+ * handlers as everything is already set up. Even for
+ * interrupts marked with IRQF_NO_BALANCE this is
+ * correct as we want the thread to move to the cpu(s)
+ * on which the requesting code placed the interrupt.
+ */
+ set_bit(IRQTF_AFFINITY, &new->thread_flags);
+ return 0;
+}
+
/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
@@ -1093,6 +1172,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (!try_module_get(desc->owner))
return -ENODEV;
+ new->irq = irq;
+
/*
* Check whether the interrupt nests into another interrupt
* thread.
@@ -1110,8 +1191,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
*/
new->handler = irq_nested_primary_handler;
} else {
- if (irq_settings_can_thread(desc))
- irq_setup_forced_threading(new);
+ if (irq_settings_can_thread(desc)) {
+ ret = irq_setup_forced_threading(new);
+ if (ret)
+ goto out_mput;
+ }
}
/*
@@ -1120,37 +1204,14 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
* thread.
*/
if (new->thread_fn && !nested) {
- struct task_struct *t;
- static const struct sched_param param = {
- .sched_priority = MAX_USER_RT_PRIO/2,
- };
-
- t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
- new->name);
- if (IS_ERR(t)) {
- ret = PTR_ERR(t);
+ ret = setup_irq_thread(new, irq, false);
+ if (ret)
goto out_mput;
+ if (new->secondary) {
+ ret = setup_irq_thread(new->secondary, irq, true);
+ if (ret)
+ goto out_thread;
}
-
- sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
-
- /*
- * We keep the reference to the task struct even if
- * the thread dies to avoid that the interrupt code
- * references an already freed task_struct.
- */
- get_task_struct(t);
- new->thread = t;
- /*
- * Tell the thread to set its affinity. This is
- * important for shared interrupt handlers as we do
- * not invoke setup_affinity() for the secondary
- * handlers as everything is already set up. Even for
- * interrupts marked with IRQF_NO_BALANCE this is
- * correct as we want the thread to move to the cpu(s)
- * on which the requesting code placed the interrupt.
- */
- set_bit(IRQTF_AFFINITY, &new->thread_flags);
}
if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
@@ -1326,7 +1387,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
irq, nmsk, omsk);
}
- new->irq = irq;
*old_ptr = new;
irq_pm_install_action(desc, new);
@@ -1352,6 +1412,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
*/
if (new->thread)
wake_up_process(new->thread);
+ if (new->secondary)
+ wake_up_process(new->secondary->thread);
register_irq_proc(irq, desc);
new->dir = NULL;
@@ -1382,6 +1444,13 @@ out_thread:
kthread_stop(t);
put_task_struct(t);
}
+ if (new->secondary && new->secondary->thread) {
+ struct task_struct *t = new->secondary->thread;
+
+ new->secondary->thread = NULL;
+ kthread_stop(t);
+ put_task_struct(t);
+ }
out_mput:
module_put(desc->owner);
return ret;
@@ -1489,9 +1558,14 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
if (action->thread) {
kthread_stop(action->thread);
put_task_struct(action->thread);
+ if (action->secondary && action->secondary->thread) {
+ kthread_stop(action->secondary->thread);
+ put_task_struct(action->secondary->thread);
+ }
}
module_put(desc->owner);
+ kfree(action->secondary);
return action;
}
@@ -1635,8 +1709,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
retval = __setup_irq(irq, desc, action);
chip_bus_sync_unlock(desc);
- if (retval)
+ if (retval) {
+ kfree(action->secondary);
kfree(action);
+ }
#ifdef CONFIG_DEBUG_SHIRQ_FIXME
if (!retval && (irqflags & IRQF_SHARED)) {
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index df2f4642d1e7..5c38f59741e2 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -12,6 +12,7 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/mutex.h>
#include "internals.h"
@@ -323,18 +324,29 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
void register_irq_proc(unsigned int irq, struct irq_desc *desc)
{
+ static DEFINE_MUTEX(register_lock);
char name [MAX_NAMELEN];
- if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir)
+ if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip))
return;
+ /*
+ * irq directories are registered only when a handler is
+ * added, not when the descriptor is created, so multiple
+ * tasks might try to register at the same time.
+ */
+ mutex_lock(&register_lock);
+
+ if (desc->dir)
+ goto out_unlock;
+
memset(name, 0, MAX_NAMELEN);
sprintf(name, "%d", irq);
/* create /proc/irq/1234 */
desc->dir = proc_mkdir(name, root_irq_dir);
if (!desc->dir)
- return;
+ goto out_unlock;
#ifdef CONFIG_SMP
/* create /proc/irq/<irq>/smp_affinity */
@@ -355,6 +367,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
proc_create_data("spurious", 0444, desc->dir,
&irq_spurious_proc_fops, (void *)(long)irq);
+
+out_unlock:
+ mutex_unlock(&register_lock);
}
void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 392aad4ec3d6..d21091f2fd1f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1558,9 +1558,9 @@ static void
ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
{
check_preempt_curr(rq, p, wake_flags);
- trace_sched_wakeup(p, true);
-
p->state = TASK_RUNNING;
+ trace_sched_wakeup(p);
+
#ifdef CONFIG_SMP
if (p->sched_class->task_woken)
p->sched_class->task_woken(rq, p);
@@ -1784,6 +1784,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
if (!(wake_flags & WF_LOCK_SLEEPER))
p->saved_state = TASK_RUNNING;
+ trace_sched_waking(p);
+
success = 1; /* we're going to change ->state */
cpu = task_cpu(p);
@@ -2188,7 +2190,7 @@ void wake_up_new_task(struct task_struct *p)
rq = __task_rq_lock(p);
activate_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
- trace_sched_wakeup_new(p, true);
+ trace_sched_wakeup_new(p);
check_preempt_curr(rq, p, WF_FORK);
#ifdef CONFIG_SMP
if (p->sched_class->task_woken)
@@ -2311,11 +2313,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
* If a task dies, then it sets TASK_DEAD in tsk->state and calls
* schedule one last time. The schedule call will never return, and
* the scheduled task must drop that reference.
- * The test for TASK_DEAD must occur while the runqueue locks are
- * still held, otherwise prev could be scheduled on another cpu, die
- * there before we look at prev->state, and then the reference would
- * be dropped twice.
- * Manfred Spraul <manfred@colorfullife.com>
+ *
+ * We must observe prev->state before clearing prev->on_cpu (in
+ * finish_lock_switch), otherwise a concurrent wakeup can get prev
+ * running on another CPU and we could rave with its RUNNING -> DEAD
+ * transition, resulting in a double drop.
*/
prev_state = prev->state;
vtime_task_switch(prev);
@@ -2456,13 +2458,20 @@ unsigned long nr_running(void)
/*
* Check if only the current task is running on the cpu.
+ *
+ * Caution: this function does not check that the caller has disabled
+ * preemption, thus the result might have a time-of-check-to-time-of-use
+ * race. The caller is responsible to use it correctly, for example:
+ *
+ * - from a non-preemptable section (of course)
+ *
+ * - from a thread that is bound to a single CPU
+ *
+ * - in a loop with very short iterations (e.g. a polling loop)
*/
bool single_task_running(void)
{
- if (cpu_rq(smp_processor_id())->nr_running == 1)
- return true;
- else
- return false;
+ return raw_rq()->nr_running == 1;
}
EXPORT_SYMBOL(single_task_running);
@@ -4475,7 +4484,7 @@ SYSCALL_DEFINE0(sched_yield)
int __sched _cond_resched(void)
{
- if (should_resched()) {
+ if (should_resched(0)) {
preempt_schedule_common();
return 1;
}
@@ -4493,7 +4502,7 @@ EXPORT_SYMBOL(_cond_resched);
*/
int __cond_resched_lock(spinlock_t *lock)
{
- int resched = should_resched();
+ int resched = should_resched(PREEMPT_LOCK_OFFSET);
int ret = 0;
lockdep_assert_held(lock);
@@ -4516,7 +4525,7 @@ int __sched __cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
- if (should_resched()) {
+ if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
local_bh_enable();
preempt_schedule_common();
local_bh_disable();
@@ -5672,6 +5681,14 @@ static int sched_cpu_active(struct notifier_block *nfb,
case CPU_STARTING:
set_cpu_rq_start_time();
return NOTIFY_OK;
+ case CPU_ONLINE:
+ /*
+ * At this point a starting CPU has marked itself as online via
+ * set_cpu_online(). But it might not yet have marked itself
+ * as active, which is essential from here on.
+ *
+ * Thus, fall-through and help the starting CPU along.
+ */
case CPU_DOWN_FAILED:
set_cpu_active((long)hcpu, true);
return NOTIFY_OK;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 92c53d074f41..7aae8d27611e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5126,18 +5126,21 @@ again:
* entity, update_curr() will update its vruntime, otherwise
* forget we've ever seen it.
*/
- if (curr && curr->on_rq)
- update_curr(cfs_rq);
- else
- curr = NULL;
+ if (curr) {
+ if (curr->on_rq)
+ update_curr(cfs_rq);
+ else
+ curr = NULL;
- /*
- * This call to check_cfs_rq_runtime() will do the throttle and
- * dequeue its entity in the parent(s). Therefore the 'simple'
- * nr_running test will indeed be correct.
- */
- if (unlikely(check_cfs_rq_runtime(cfs_rq)))
- goto simple;
+ /*
+ * This call to check_cfs_rq_runtime() will do the
+ * throttle and dequeue its entity in the parent(s).
+ * Therefore the 'simple' nr_running test will indeed
+ * be correct.
+ */
+ if (unlikely(check_cfs_rq_runtime(cfs_rq)))
+ goto simple;
+ }
se = pick_next_entity(cfs_rq, curr);
cfs_rq = group_cfs_rq(se);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 308f664541a5..1bdd1e5f056d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1068,9 +1068,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
* After ->on_cpu is cleared, the task can be moved to a different CPU.
* We must ensure this doesn't happen until the switch is completely
* finished.
+ *
+ * Pairs with the control dependency and rmb in try_to_wake_up().
*/
- smp_wmb();
- prev->on_cpu = 0;
+ smp_store_release(&prev->on_cpu, 0);
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
/* this is a valid case when another task releases the spinlock */
diff --git a/kernel/sched/work-simple.c b/kernel/sched/work-simple.c
index c996f755dba6..e57a0522573f 100644
--- a/kernel/sched/work-simple.c
+++ b/kernel/sched/work-simple.c
@@ -10,6 +10,7 @@
#include <linux/kthread.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/export.h>
#define SWORK_EVENT_PENDING (1 << 0)
diff --git a/kernel/signal.c b/kernel/signal.c
index 7dc2ebee3c68..1336e4c016ba 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2854,12 +2854,15 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
* Other callers might not initialize the si_lsb field,
* so check explicitly for the right codes here.
*/
- if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
+ if (from->si_signo == SIGBUS &&
+ (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
#endif
#ifdef SEGV_BNDERR
- err |= __put_user(from->si_lower, &to->si_lower);
- err |= __put_user(from->si_upper, &to->si_upper);
+ if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) {
+ err |= __put_user(from->si_lower, &to->si_lower);
+ err |= __put_user(from->si_upper, &to->si_upper);
+ }
#endif
break;
case __SI_CHLD:
@@ -3123,7 +3126,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
int, sig,
struct compat_siginfo __user *, uinfo)
{
- siginfo_t info;
+ siginfo_t info = {};
int ret = copy_siginfo_from_user32(&info, uinfo);
if (unlikely(ret))
return ret;
@@ -3167,7 +3170,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
int, sig,
struct compat_siginfo __user *, uinfo)
{
- siginfo_t info;
+ siginfo_t info = {};
if (copy_siginfo_from_user32(&info, uinfo))
return -EFAULT;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 49baf81848d6..0fd93311536f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -554,26 +554,14 @@ static void do_current_softirqs(void)
}
}
-static void __local_bh_disable(void)
+void __local_bh_disable(void)
{
if (++current->softirq_nestcnt == 1)
migrate_disable();
}
+EXPORT_SYMBOL(__local_bh_disable);
-void local_bh_disable(void)
-{
- __local_bh_disable();
-}
-EXPORT_SYMBOL(local_bh_disable);
-
-void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
-{
- __local_bh_disable();
- if (cnt & PREEMPT_CHECK_OFFSET)
- preempt_disable();
-}
-
-static void __local_bh_enable(void)
+void __local_bh_enable(void)
{
if (WARN_ON(current->softirq_nestcnt == 0))
return;
@@ -586,25 +574,7 @@ static void __local_bh_enable(void)
if (--current->softirq_nestcnt == 0)
migrate_enable();
}
-
-void local_bh_enable(void)
-{
- __local_bh_enable();
-}
-EXPORT_SYMBOL(local_bh_enable);
-
-extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
-{
- __local_bh_enable();
- if (cnt & PREEMPT_CHECK_OFFSET)
- preempt_enable();
-}
-
-void local_bh_enable_ip(unsigned long ip)
-{
- local_bh_enable();
-}
-EXPORT_SYMBOL(local_bh_enable_ip);
+EXPORT_SYMBOL(__local_bh_enable);
void _local_bh_enable(void)
{
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 17d4a8933536..7df1e0028312 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1615,7 +1615,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
negative = (tick_error < 0);
/* Sort out the magnitude of the correction */
- tick_error = abs(tick_error);
+ tick_error = abs64(tick_error);
for (adj = 0; tick_error > interval; adj++)
tick_error >>= 1;
diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c
index 66a69eb5329c..b6c1d14b71c4 100644
--- a/kernel/trace/latency_hist.c
+++ b/kernel/trace/latency_hist.c
@@ -115,7 +115,7 @@ static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
static char *wakeup_latency_hist_dir = "wakeup";
static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
static notrace void probe_wakeup_latency_hist_start(void *v,
- struct task_struct *p, int success);
+ struct task_struct *p);
static notrace void probe_wakeup_latency_hist_stop(void *v,
struct task_struct *prev, struct task_struct *next);
static notrace void probe_sched_migrate_task(void *,
@@ -869,7 +869,7 @@ static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
}
static notrace void probe_wakeup_latency_hist_start(void *v,
- struct task_struct *p, int success)
+ struct task_struct *p)
{
unsigned long flags;
struct task_struct *curr = current;
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index aaade2efc560..d0e1d0e48640 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -450,7 +450,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
#ifdef CONFIG_PROVE_LOCKING
void time_hardirqs_on(unsigned long a0, unsigned long a1)
{
- trace_preemptirqsoff_hist(IRQS_ON, 0);
+ trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
if (!preempt_trace() && irq_trace())
stop_critical_timing(a0, a1);
}
@@ -459,7 +459,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(a0, a1);
- trace_preemptirqsoff_hist(IRQS_OFF, 1);
+ trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
}
#else /* !CONFIG_PROVE_LOCKING */
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 419ca37e72c9..f270088e9929 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -26,7 +26,7 @@ probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *n
}
static void
-probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
+probe_sched_wakeup(void *ignore, struct task_struct *wakee)
{
if (unlikely(!sched_ref))
return;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index d6e1003724e9..79a2a5f7fc82 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -514,7 +514,7 @@ static void wakeup_reset(struct trace_array *tr)
}
static void
-probe_wakeup(void *ignore, struct task_struct *p, int success)
+probe_wakeup(void *ignore, struct task_struct *p)
{
struct trace_array_cpu *data;
int cpu = smp_processor_id();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 21daecdfd86d..a523ca75df24 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1480,13 +1480,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
timer_stats_timer_set_start_info(&dwork->timer);
dwork->wq = wq;
+ /* timer isn't guaranteed to run in this cpu, record earlier */
+ if (cpu == WORK_CPU_UNBOUND)
+ cpu = raw_smp_processor_id();
dwork->cpu = cpu;
timer->expires = jiffies + delay;
- if (unlikely(cpu != WORK_CPU_UNBOUND))
- add_timer_on(timer, cpu);
- else
- add_timer(timer);
+ add_timer_on(timer, cpu);
}
/**