aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-04-02 19:37:50 +0200
committerIngo Molnar <mingo@elte.hu>2010-04-02 19:38:10 +0200
commitec5e61aabeac58670691bd0613388d16697d0d81 (patch)
tree59838509358f27334874b90756505785cde29b02 /kernel
parent75ec5a245c7763c397f31ec8964d0a46c54a7386 (diff)
parent8bb39f9aa068262732fe44b965d7a6eb5a5a7d67 (diff)
Merge branch 'perf/urgent' into perf/core
Conflicts: arch/x86/kernel/cpu/perf_event.c Merge reason: Resolve the conflict, pick up fixes Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c1
-rw-r--r--kernel/cpuset.c106
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/early_res.c6
-rw-r--r--kernel/irq/chip.c35
-rw-r--r--kernel/irq/manage.c22
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/perf_event.c19
-rw-r--r--kernel/posix-cpu-timers.c10
-rw-r--r--kernel/rcupdate.c23
-rw-r--r--kernel/resource.c44
-rw-r--r--kernel/sched.c12
-rw-r--r--kernel/slow-work.c2
-rw-r--r--kernel/slow-work.h8
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/time/tick-oneshot.c52
-rw-r--r--kernel/time/timekeeping.c3
-rw-r--r--kernel/time/timer_list.c3
-rw-r--r--kernel/timer.c1
-rw-r--r--kernel/trace/ring_buffer.c14
-rw-r--r--kernel/trace/trace_event_perf.c11
21 files changed, 292 insertions, 92 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ef909a32975..e2769e13980 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -27,7 +27,6 @@
*/
#include <linux/cgroup.h>
-#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/fs.h>
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ba401fab459..d10946748ec 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -920,9 +920,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
* call to guarantee_online_mems(), as we know no one is changing
* our task's cpuset.
*
- * Hold callback_mutex around the two modifications of our tasks
- * mems_allowed to synchronize with cpuset_mems_allowed().
- *
* While the mm_struct we are migrating is typically from some
* other task, the task_struct mems_allowed that we are hacking
* is for our current task, which must allocate new pages for that
@@ -973,15 +970,20 @@ static void cpuset_change_nodemask(struct task_struct *p,
struct cpuset *cs;
int migrate;
const nodemask_t *oldmem = scan->data;
- nodemask_t newmems;
+ NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL);
+
+ if (!newmems)
+ return;
cs = cgroup_cs(scan->cg);
- guarantee_online_mems(cs, &newmems);
+ guarantee_online_mems(cs, newmems);
task_lock(p);
- cpuset_change_task_nodemask(p, &newmems);
+ cpuset_change_task_nodemask(p, newmems);
task_unlock(p);
+ NODEMASK_FREE(newmems);
+
mm = get_task_mm(p);
if (!mm)
return;
@@ -1051,16 +1053,21 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
const char *buf)
{
- nodemask_t oldmem;
+ NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL);
int retval;
struct ptr_heap heap;
+ if (!oldmem)
+ return -ENOMEM;
+
/*
* top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
* it's read-only
*/
- if (cs == &top_cpuset)
- return -EACCES;
+ if (cs == &top_cpuset) {
+ retval = -EACCES;
+ goto done;
+ }
/*
* An empty mems_allowed is ok iff there are no tasks in the cpuset.
@@ -1076,11 +1083,13 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
goto done;
if (!nodes_subset(trialcs->mems_allowed,
- node_states[N_HIGH_MEMORY]))
- return -EINVAL;
+ node_states[N_HIGH_MEMORY])) {
+ retval = -EINVAL;
+ goto done;
+ }
}
- oldmem = cs->mems_allowed;
- if (nodes_equal(oldmem, trialcs->mems_allowed)) {
+ *oldmem = cs->mems_allowed;
+ if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
retval = 0; /* Too easy - nothing to do */
goto done;
}
@@ -1096,10 +1105,11 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
cs->mems_allowed = trialcs->mems_allowed;
mutex_unlock(&callback_mutex);
- update_tasks_nodemask(cs, &oldmem, &heap);
+ update_tasks_nodemask(cs, oldmem, &heap);
heap_free(&heap);
done:
+ NODEMASK_FREE(oldmem);
return retval;
}
@@ -1384,40 +1394,47 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
struct cgroup *oldcont, struct task_struct *tsk,
bool threadgroup)
{
- nodemask_t from, to;
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
+ NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL);
+ NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
+
+ if (from == NULL || to == NULL)
+ goto alloc_fail;
if (cs == &top_cpuset) {
cpumask_copy(cpus_attach, cpu_possible_mask);
- to = node_possible_map;
} else {
guarantee_online_cpus(cs, cpus_attach);
- guarantee_online_mems(cs, &to);
}
+ guarantee_online_mems(cs, to);
/* do per-task migration stuff possibly for each in the threadgroup */
- cpuset_attach_task(tsk, &to, cs);
+ cpuset_attach_task(tsk, to, cs);
if (threadgroup) {
struct task_struct *c;
rcu_read_lock();
list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
- cpuset_attach_task(c, &to, cs);
+ cpuset_attach_task(c, to, cs);
}
rcu_read_unlock();
}
/* change mm; only needs to be done once even if threadgroup */
- from = oldcs->mems_allowed;
- to = cs->mems_allowed;
+ *from = oldcs->mems_allowed;
+ *to = cs->mems_allowed;
mm = get_task_mm(tsk);
if (mm) {
- mpol_rebind_mm(mm, &to);
+ mpol_rebind_mm(mm, to);
if (is_memory_migrate(cs))
- cpuset_migrate_mm(mm, &from, &to);
+ cpuset_migrate_mm(mm, from, to);
mmput(mm);
}
+
+alloc_fail:
+ NODEMASK_FREE(from);
+ NODEMASK_FREE(to);
}
/* The various types of files and directories in a cpuset file system */
@@ -1562,13 +1579,21 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
{
- nodemask_t mask;
+ NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL);
+ int retval;
+
+ if (mask == NULL)
+ return -ENOMEM;
mutex_lock(&callback_mutex);
- mask = cs->mems_allowed;
+ *mask = cs->mems_allowed;
mutex_unlock(&callback_mutex);
- return nodelist_scnprintf(page, PAGE_SIZE, mask);
+ retval = nodelist_scnprintf(page, PAGE_SIZE, *mask);
+
+ NODEMASK_FREE(mask);
+
+ return retval;
}
static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1997,7 +2022,10 @@ static void scan_for_empty_cpusets(struct cpuset *root)
struct cpuset *cp; /* scans cpusets being updated */
struct cpuset *child; /* scans child cpusets of cp */
struct cgroup *cont;
- nodemask_t oldmems;
+ NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
+
+ if (oldmems == NULL)
+ return;
list_add_tail((struct list_head *)&root->stack_list, &queue);
@@ -2014,7 +2042,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
continue;
- oldmems = cp->mems_allowed;
+ *oldmems = cp->mems_allowed;
/* Remove offline cpus and mems from this cpuset. */
mutex_lock(&callback_mutex);
@@ -2030,9 +2058,10 @@ static void scan_for_empty_cpusets(struct cpuset *root)
remove_tasks_in_empty_cpuset(cp);
else {
update_tasks_cpumask(cp, NULL);
- update_tasks_nodemask(cp, &oldmems, NULL);
+ update_tasks_nodemask(cp, oldmems, NULL);
}
}
+ NODEMASK_FREE(oldmems);
}
/*
@@ -2090,20 +2119,33 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
static int cpuset_track_online_nodes(struct notifier_block *self,
unsigned long action, void *arg)
{
+ NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
+
+ if (oldmems == NULL)
+ return NOTIFY_DONE;
+
cgroup_lock();
switch (action) {
case MEM_ONLINE:
- case MEM_OFFLINE:
+ *oldmems = top_cpuset.mems_allowed;
mutex_lock(&callback_mutex);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
mutex_unlock(&callback_mutex);
- if (action == MEM_OFFLINE)
- scan_for_empty_cpusets(&top_cpuset);
+ update_tasks_nodemask(&top_cpuset, oldmems, NULL);
+ break;
+ case MEM_OFFLINE:
+ /*
+ * needn't update top_cpuset.mems_allowed explicitly because
+ * scan_for_empty_cpusets() will update it.
+ */
+ scan_for_empty_cpusets(&top_cpuset);
break;
default:
break;
}
cgroup_unlock();
+
+ NODEMASK_FREE(oldmems);
return NOTIFY_OK;
}
#endif
diff --git a/kernel/cred.c b/kernel/cred.c
index 1ed8ca18790..1b1129d0cce 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -364,7 +364,7 @@ struct cred *prepare_usermodehelper_creds(void)
new = kmem_cache_alloc(cred_jar, GFP_ATOMIC);
if (!new)
- return NULL;
+ goto free_tgcred;
kdebug("prepare_usermodehelper_creds() alloc %p", new);
@@ -397,6 +397,10 @@ struct cred *prepare_usermodehelper_creds(void)
error:
put_cred(new);
+free_tgcred:
+#ifdef CONFIG_KEYS
+ kfree(tgcred);
+#endif
return NULL;
}
diff --git a/kernel/early_res.c b/kernel/early_res.c
index 3cb2c661bb7..31aa9332ef3 100644
--- a/kernel/early_res.c
+++ b/kernel/early_res.c
@@ -333,6 +333,12 @@ void __init free_early_partial(u64 start, u64 end)
struct early_res *r;
int i;
+ if (start == end)
+ return;
+
+ if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end))
+ return;
+
try_next:
i = find_overlapped_early(start, end);
if (i >= max_early_res)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 42ec11b2af8..b7091d5ca2f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -359,6 +359,23 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq)
if (desc->chip->ack)
desc->chip->ack(irq);
}
+ desc->status |= IRQ_MASKED;
+}
+
+static inline void mask_irq(struct irq_desc *desc, int irq)
+{
+ if (desc->chip->mask) {
+ desc->chip->mask(irq);
+ desc->status |= IRQ_MASKED;
+ }
+}
+
+static inline void unmask_irq(struct irq_desc *desc, int irq)
+{
+ if (desc->chip->unmask) {
+ desc->chip->unmask(irq);
+ desc->status &= ~IRQ_MASKED;
+ }
}
/*
@@ -484,10 +501,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
raw_spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
- if (unlikely(desc->status & IRQ_ONESHOT))
- desc->status |= IRQ_MASKED;
- else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
- desc->chip->unmask(irq);
+ if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT)))
+ unmask_irq(desc, irq);
out_unlock:
raw_spin_unlock(&desc->lock);
}
@@ -524,8 +539,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
action = desc->action;
if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
desc->status |= IRQ_PENDING;
- if (desc->chip->mask)
- desc->chip->mask(irq);
+ mask_irq(desc, irq);
goto out;
}
@@ -593,7 +607,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
irqreturn_t action_ret;
if (unlikely(!action)) {
- desc->chip->mask(irq);
+ mask_irq(desc, irq);
goto out_unlock;
}
@@ -605,8 +619,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
if (unlikely((desc->status &
(IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) ==
(IRQ_PENDING | IRQ_MASKED))) {
- desc->chip->unmask(irq);
- desc->status &= ~IRQ_MASKED;
+ unmask_irq(desc, irq);
}
desc->status &= ~IRQ_PENDING;
@@ -716,7 +729,7 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
__set_irq_handler(irq, handle, 0, name);
}
-void __init set_irq_noprobe(unsigned int irq)
+void set_irq_noprobe(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
@@ -731,7 +744,7 @@ void __init set_irq_noprobe(unsigned int irq)
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
-void __init set_irq_probe(unsigned int irq)
+void set_irq_probe(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index eb6078ca60c..398fda155f6 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -382,6 +382,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action;
+ unsigned long flags;
if (!desc)
return 0;
@@ -389,11 +390,14 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
if (desc->status & IRQ_NOREQUEST)
return 0;
+ raw_spin_lock_irqsave(&desc->lock, flags);
action = desc->action;
if (action)
if (irqflags & action->flags & IRQF_SHARED)
action = NULL;
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
return !action;
}
@@ -483,8 +487,26 @@ static int irq_wait_for_interrupt(struct irqaction *action)
*/
static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
{
+again:
chip_bus_lock(irq, desc);
raw_spin_lock_irq(&desc->lock);
+
+ /*
+ * Implausible though it may be we need to protect us against
+ * the following scenario:
+ *
+ * The thread is faster done than the hard interrupt handler
+ * on the other CPU. If we unmask the irq line then the
+ * interrupt can come in again and masks the line, leaves due
+ * to IRQ_INPROGRESS and the irq line is masked forever.
+ */
+ if (unlikely(desc->status & IRQ_INPROGRESS)) {
+ raw_spin_unlock_irq(&desc->lock);
+ chip_bus_sync_unlock(irq, desc);
+ cpu_relax();
+ goto again;
+ }
+
if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
desc->status &= ~IRQ_MASKED;
desc->chip->unmask(irq);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 82ed0ea1519..83911c78017 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -219,7 +219,7 @@ int kthreadd(void *unused)
set_task_comm(tsk, "kthreadd");
ignore_signals(tsk);
set_cpus_allowed_ptr(tsk, cpu_all_mask);
- set_mems_allowed(node_possible_map);
+ set_mems_allowed(node_states[N_HIGH_MEMORY]);
current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 455393e71ca..074ba2e036c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1164,11 +1164,9 @@ void perf_event_task_sched_out(struct task_struct *task,
struct perf_event_context *ctx = task->perf_event_ctxp;
struct perf_event_context *next_ctx;
struct perf_event_context *parent;
- struct pt_regs *regs;
int do_switch = 1;
- regs = task_pt_regs(task);
- perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
+ perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
if (likely(!ctx || !cpuctx->task_ctx))
return;
@@ -3382,15 +3380,23 @@ static void perf_event_task_output(struct perf_event *event,
struct perf_task_event *task_event)
{
struct perf_output_handle handle;
- int size;
struct task_struct *task = task_event->task;
- int ret;
+ unsigned long flags;
+ int size, ret;
+
+ /*
+ * If this CPU attempts to acquire an rq lock held by a CPU spinning
+ * in perf_output_lock() from interrupt context, it's game over.
+ */
+ local_irq_save(flags);
size = task_event->event_id.header.size;
ret = perf_output_begin(&handle, event, size, 0, 0);
- if (ret)
+ if (ret) {
+ local_irq_restore(flags);
return;
+ }
task_event->event_id.pid = perf_event_pid(event, task);
task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3401,6 +3407,7 @@ static void perf_event_task_output(struct perf_event *event,
perf_output_put(&handle, task_event->event_id);
perf_output_end(&handle);
+ local_irq_restore(flags);
}
static int perf_event_task_match(struct perf_event *event)
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 1a22dfd42df..bc7704b3a44 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1061,9 +1061,9 @@ static void check_thread_timers(struct task_struct *tsk,
}
}
-static void stop_process_timers(struct task_struct *tsk)
+static void stop_process_timers(struct signal_struct *sig)
{
- struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+ struct thread_group_cputimer *cputimer = &sig->cputimer;
unsigned long flags;
if (!cputimer->running)
@@ -1072,6 +1072,10 @@ static void stop_process_timers(struct task_struct *tsk)
spin_lock_irqsave(&cputimer->lock, flags);
cputimer->running = 0;
spin_unlock_irqrestore(&cputimer->lock, flags);
+
+ sig->cputime_expires.prof_exp = cputime_zero;
+ sig->cputime_expires.virt_exp = cputime_zero;
+ sig->cputime_expires.sched_exp = 0;
}
static u32 onecputick;
@@ -1133,7 +1137,7 @@ static void check_process_timers(struct task_struct *tsk,
list_empty(&timers[CPUCLOCK_VIRT]) &&
cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
list_empty(&timers[CPUCLOCK_SCHED])) {
- stop_process_timers(tsk);
+ stop_process_timers(sig);
return;
}
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f1125c1a632..63fe2543398 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -45,6 +45,7 @@
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/kernel_stat.h>
+#include <linux/hardirq.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
@@ -66,6 +67,28 @@ EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+/**
+ * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
+ *
+ * Check for bottom half being disabled, which covers both the
+ * CONFIG_PROVE_RCU and not cases. Note that if someone uses
+ * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
+ * will show the situation.
+ *
+ * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
+ */
+int rcu_read_lock_bh_held(void)
+{
+ if (!debug_lockdep_rcu_enabled())
+ return 1;
+ return in_softirq();
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
+
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
/*
* This function is invoked towards the end of the scheduler's initialization
* process. Before this is called, the idle task might contain
diff --git a/kernel/resource.c b/kernel/resource.c
index 2d5be5d9bf5..9c358e26353 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -219,19 +219,34 @@ void release_child_resources(struct resource *r)
}
/**
- * request_resource - request and reserve an I/O or memory resource
+ * request_resource_conflict - request and reserve an I/O or memory resource
* @root: root resource descriptor
* @new: resource descriptor desired by caller
*
- * Returns 0 for success, negative error code on error.
+ * Returns 0 for success, conflict resource on error.
*/
-int request_resource(struct resource *root, struct resource *new)
+struct resource *request_resource_conflict(struct resource *root, struct resource *new)
{
struct resource *conflict;
write_lock(&resource_lock);
conflict = __request_resource(root, new);
write_unlock(&resource_lock);
+ return conflict;
+}
+
+/**
+ * request_resource - request and reserve an I/O or memory resource
+ * @root: root resource descriptor
+ * @new: resource descriptor desired by caller
+ *
+ * Returns 0 for success, negative error code on error.
+ */
+int request_resource(struct resource *root, struct resource *new)
+{
+ struct resource *conflict;
+
+ conflict = request_resource_conflict(root, new);
return conflict ? -EBUSY : 0;
}
@@ -474,25 +489,40 @@ static struct resource * __insert_resource(struct resource *parent, struct resou
}
/**
- * insert_resource - Inserts a resource in the resource tree
+ * insert_resource_conflict - Inserts resource in the resource tree
* @parent: parent of the new resource
* @new: new resource to insert
*
- * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ * Returns 0 on success, conflict resource if the resource can't be inserted.
*
- * This function is equivalent to request_resource when no conflict
+ * This function is equivalent to request_resource_conflict when no conflict
* happens. If a conflict happens, and the conflicting resources
* entirely fit within the range of the new resource, then the new
* resource is inserted and the conflicting resources become children of
* the new resource.
*/
-int insert_resource(struct resource *parent, struct resource *new)
+struct resource *insert_resource_conflict(struct resource *parent, struct resource *new)
{
struct resource *conflict;
write_lock(&resource_lock);
conflict = __insert_resource(parent, new);
write_unlock(&resource_lock);
+ return conflict;
+}
+
+/**
+ * insert_resource - Inserts a resource in the resource tree
+ * @parent: parent of the new resource
+ * @new: new resource to insert
+ *
+ * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ */
+int insert_resource(struct resource *parent, struct resource *new)
+{
+ struct resource *conflict;
+
+ conflict = insert_resource_conflict(parent, new);
return conflict ? -EBUSY : 0;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 117b7cad31b..1038ca16389 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2607,7 +2607,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
{
unsigned long flags;
struct rq *rq;
- int cpu = get_cpu();
+ int cpu __maybe_unused = get_cpu();
#ifdef CONFIG_SMP
/*
@@ -4859,7 +4859,9 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
int ret;
cpumask_var_t mask;
- if (len < cpumask_size())
+ if (len < nr_cpu_ids)
+ return -EINVAL;
+ if (len & (sizeof(unsigned long)-1))
return -EINVAL;
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
@@ -4867,10 +4869,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
ret = sched_getaffinity(pid, mask);
if (ret == 0) {
- if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
+ size_t retlen = min_t(size_t, len, cpumask_size());
+
+ if (copy_to_user(user_mask_ptr, mask, retlen))
ret = -EFAULT;
else
- ret = cpumask_size();
+ ret = retlen;
}
free_cpumask_var(mask);
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 7494bbf5a27..7d3f4fa9ef4 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -637,7 +637,7 @@ int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
goto cancelled;
/* the timer holds a reference whilst it is pending */
- ret = work->ops->get_ref(work);
+ ret = slow_work_get_ref(work);
if (ret < 0)
goto cant_get_ref;
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
index 321f3c59d73..a29ebd1ef41 100644
--- a/kernel/slow-work.h
+++ b/kernel/slow-work.h
@@ -43,28 +43,28 @@ extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
*/
static inline void slow_work_set_thread_pid(int id, pid_t pid)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
slow_work_pids[id] = pid;
#endif
}
static inline void slow_work_mark_time(struct slow_work *work)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
work->mark = CURRENT_TIME;
#endif
}
static inline void slow_work_begin_exec(int id, struct slow_work *work)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
slow_work_execs[id] = work;
#endif
}
static inline void slow_work_end_exec(int id, struct slow_work *work)
{
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
write_lock(&slow_work_execs_lock);
slow_work_execs[id] = NULL;
write_unlock(&slow_work_execs_lock);
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 0d4c7898ab8..4b493f67dcb 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -155,11 +155,11 @@ void softlockup_tick(void)
* Wake up the high-prio watchdog task twice per
* threshold timespan.
*/
- if (now > touch_ts + softlockup_thresh/2)
+ if (time_after(now - softlockup_thresh/2, touch_ts))
wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
/* Warn about unreasonable delays: */
- if (now <= (touch_ts + softlockup_thresh))
+ if (time_before_eq(now - softlockup_thresh, touch_ts))
return;
per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 0a8a213016f..aada0e52680 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -22,6 +22,29 @@
#include "tick-internal.h"
+/* Limit min_delta to a jiffie */
+#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
+
+static int tick_increase_min_delta(struct clock_event_device *dev)
+{
+ /* Nothing to do if we already reached the limit */
+ if (dev->min_delta_ns >= MIN_DELTA_LIMIT)
+ return -ETIME;
+
+ if (dev->min_delta_ns < 5000)
+ dev->min_delta_ns = 5000;
+ else
+ dev->min_delta_ns += dev->min_delta_ns >> 1;
+
+ if (dev->min_delta_ns > MIN_DELTA_LIMIT)
+ dev->min_delta_ns = MIN_DELTA_LIMIT;
+
+ printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
+ dev->name ? dev->name : "?",
+ (unsigned long long) dev->min_delta_ns);
+ return 0;
+}
+
/**
* tick_program_event internal worker function
*/
@@ -37,23 +60,28 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
if (!ret || !force)
return ret;
+ dev->retries++;
/*
- * We tried 2 times to program the device with the given
- * min_delta_ns. If that's not working then we double it
+ * We tried 3 times to program the device with the given
+ * min_delta_ns. If that's not working then we increase it
* and emit a warning.
*/
if (++i > 2) {
/* Increase the min. delta and try again */
- if (!dev->min_delta_ns)
- dev->min_delta_ns = 5000;
- else
- dev->min_delta_ns += dev->min_delta_ns >> 1;
-
- printk(KERN_WARNING
- "CE: %s increasing min_delta_ns to %llu nsec\n",
- dev->name ? dev->name : "?",
- (unsigned long long) dev->min_delta_ns << 1);
-
+ if (tick_increase_min_delta(dev)) {
+ /*
+ * Get out of the loop if min_delta_ns
+ * hit the limit already. That's
+ * better than staying here forever.
+ *
+ * We clear next_event so we have a
+ * chance that the box survives.
+ */
+ printk(KERN_WARNING
+ "CE: Reprogramming failure. Giving up\n");
+ dev->next_event.tv64 = KTIME_MAX;
+ return -ETIME;
+ }
i = 0;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 16736379a9c..39f6177fafa 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -818,7 +818,8 @@ void update_wall_time(void)
shift = min(shift, maxshift);
while (offset >= timekeeper.cycle_interval) {
offset = logarithmic_accumulation(offset, shift);
- shift--;
+ if(offset < timekeeper.cycle_interval<<shift)
+ shift--;
}
/* correct the clock when NTP error is too big */
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index bdfb8dd1050..1a4a7dd7877 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,6 +228,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
SEQ_printf(m, " event_handler: ");
print_name_offset(m, dev->event_handler);
SEQ_printf(m, "\n");
+ SEQ_printf(m, " retries: %lu\n", dev->retries);
}
static void timer_list_show_tickdevices(struct seq_file *m)
@@ -257,7 +258,7 @@ static int timer_list_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
- SEQ_printf(m, "Timer List Version: v0.5\n");
+ SEQ_printf(m, "Timer List Version: v0.6\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
diff --git a/kernel/timer.c b/kernel/timer.c
index c61a7949387..fc965eae0e8 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -880,6 +880,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
if (base->running_timer == timer)
goto out;
+ timer_stats_timer_clear_start_info(timer);
ret = 0;
if (timer_pending(timer)) {
detach_timer(timer, 1);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 05a9f83b881..d1187ef20ca 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -207,6 +207,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
+#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+# define RB_FORCE_8BYTE_ALIGNMENT 0
+# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
+#else
+# define RB_FORCE_8BYTE_ALIGNMENT 1
+# define RB_ARCH_ALIGNMENT 8U
+#endif
+
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -1547,7 +1555,7 @@ rb_update_event(struct ring_buffer_event *event,
case 0:
length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA)
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
event->array[0] = length;
else
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
@@ -1722,11 +1730,11 @@ static unsigned rb_calculate_event_length(unsigned length)
if (!length)
length = 1;
- if (length > RB_MAX_SMALL_DATA)
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
- length = ALIGN(length, RB_ALIGNMENT);
+ length = ALIGN(length, RB_ARCH_ALIGNMENT);
return length;
}
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 81f691eb3a3..0565bb42566 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -17,7 +17,12 @@ EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
static char *perf_trace_buf;
static char *perf_trace_buf_nmi;
-typedef typeof(char [PERF_MAX_TRACE_SIZE]) perf_trace_t ;
+/*
+ * Force it to be aligned to unsigned long to avoid misaligned accesses
+ * suprises
+ */
+typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
+ perf_trace_t;
/* Count the events in use (per event id, not per instance) */
static int total_ref_count;
@@ -130,6 +135,8 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
char *trace_buf, *raw_data;
int pc, cpu;
+ BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
+
pc = preempt_count();
/* Protect the per cpu buffer, begin the rcu read side */
@@ -152,7 +159,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
- *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
tracing_generic_entry_update(entry, *irq_flags, pc);