From ac3d0da8f3290b3d394cdb7f50604424a7cd6092 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 26 Aug 2012 21:12:09 +0200 Subject: task_work: Make task_work_add() lockless Change task_work's to use llist-like code to avoid pi_lock in task_work_add(), this makes it useable under rq->lock. task_work_cancel() and task_work_run() still use pi_lock to synchronize with each other. (This is in preparation for a deadlock fix.) Suggested-by: Peter Zijlstra Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Cc: Al Viro Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20120826191209.GA4221@redhat.com Signed-off-by: Ingo Molnar --- kernel/task_work.c | 95 +++++++++++++++++++++++++++--------------------------- 1 file changed, 48 insertions(+), 47 deletions(-) (limited to 'kernel') diff --git a/kernel/task_work.c b/kernel/task_work.c index d320d44903bd..f13ec0bda1d5 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -3,25 +3,18 @@ #include int -task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) +task_work_add(struct task_struct *task, struct callback_head *work, bool notify) { - struct callback_head *last, *first; - unsigned long flags; - + struct callback_head *head; /* * Not inserting the new work if the task has already passed * exit_task_work() is the responisbility of callers. */ - raw_spin_lock_irqsave(&task->pi_lock, flags); - last = task->task_works; - first = last ? last->next : twork; - twork->next = first; - if (last) - last->next = twork; - task->task_works = twork; - raw_spin_unlock_irqrestore(&task->pi_lock, flags); + do { + head = ACCESS_ONCE(task->task_works); + work->next = head; + } while (cmpxchg(&task->task_works, head, work) != head); - /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ if (notify) set_notify_resume(task); return 0; @@ -30,52 +23,60 @@ task_work_add(struct task_struct *task, struct callback_head *twork, bool notify struct callback_head * task_work_cancel(struct task_struct *task, task_work_func_t func) { + struct callback_head **pprev = &task->task_works; + struct callback_head *work = NULL; unsigned long flags; - struct callback_head *last, *res = NULL; - + /* + * If cmpxchg() fails we continue without updating pprev. + * Either we raced with task_work_add() which added the + * new entry before this work, we will find it again. Or + * we raced with task_work_run(), *pprev == NULL. + */ raw_spin_lock_irqsave(&task->pi_lock, flags); - last = task->task_works; - if (last) { - struct callback_head *q = last, *p = q->next; - while (1) { - if (p->func == func) { - q->next = p->next; - if (p == last) - task->task_works = q == p ? NULL : q; - res = p; - break; - } - if (p == last) - break; - q = p; - p = q->next; - } + while ((work = ACCESS_ONCE(*pprev))) { + read_barrier_depends(); + if (work->func != func) + pprev = &work->next; + else if (cmpxchg(pprev, work, work->next) == work) + break; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); - return res; + + return work; } void task_work_run(void) { struct task_struct *task = current; - struct callback_head *p, *q; + struct callback_head *work, *head, *next; - while (1) { - raw_spin_lock_irq(&task->pi_lock); - p = task->task_works; - task->task_works = NULL; - raw_spin_unlock_irq(&task->pi_lock); + for (;;) { + work = xchg(&task->task_works, NULL); + if (!work) + break; + /* + * Synchronize with task_work_cancel(). It can't remove + * the first entry == work, cmpxchg(task_works) should + * fail, but it can play with *work and other entries. + */ + raw_spin_unlock_wait(&task->pi_lock); + smp_mb(); - if (unlikely(!p)) - return; + /* Reverse the list to run the works in fifo order */ + head = NULL; + do { + next = work->next; + work->next = head; + head = work; + work = next; + } while (work); - q = p->next; /* head */ - p->next = NULL; /* cut it */ - while (q) { - p = q->next; - q->func(q); - q = p; + work = head; + do { + next = work->next; + work->func(work); + work = next; cond_resched(); - } + } while (work); } } -- cgit v1.2.3 From 9da33de62431c7839f98156720862262272a8380 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 26 Aug 2012 21:12:11 +0200 Subject: task_work: task_work_add() should not succeed after exit_task_work() ed3e694d "move exit_task_work() past exit_files() et.al" destroyed the add/exit synchronization we had, the caller itself should ensure task_work_add() can't race with the exiting task. However, this is not convenient/simple, and the only user which tries to do this is buggy (see the next patch). Unless the task is current, there is simply no way to do this in general. Change exit_task_work()->task_work_run() to use the dummy "work_exited" entry to let task_work_add() know it should fail. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Cc: Al Viro Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20120826191211.GA4228@redhat.com Signed-off-by: Ingo Molnar --- kernel/task_work.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/task_work.c b/kernel/task_work.c index f13ec0bda1d5..65bd3c92d6f3 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -2,16 +2,17 @@ #include #include +static struct callback_head work_exited; /* all we need is ->next == NULL */ + int task_work_add(struct task_struct *task, struct callback_head *work, bool notify) { struct callback_head *head; - /* - * Not inserting the new work if the task has already passed - * exit_task_work() is the responisbility of callers. - */ + do { head = ACCESS_ONCE(task->task_works); + if (unlikely(head == &work_exited)) + return -ESRCH; work->next = head; } while (cmpxchg(&task->task_works, head, work) != head); @@ -30,7 +31,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) * If cmpxchg() fails we continue without updating pprev. * Either we raced with task_work_add() which added the * new entry before this work, we will find it again. Or - * we raced with task_work_run(), *pprev == NULL. + * we raced with task_work_run(), *pprev == NULL/exited. */ raw_spin_lock_irqsave(&task->pi_lock, flags); while ((work = ACCESS_ONCE(*pprev))) { @@ -51,7 +52,16 @@ void task_work_run(void) struct callback_head *work, *head, *next; for (;;) { - work = xchg(&task->task_works, NULL); + /* + * work->func() can do task_work_add(), do not set + * work_exited unless the list is empty. + */ + do { + work = ACCESS_ONCE(task->task_works); + head = !work && (task->flags & PF_EXITING) ? + &work_exited : NULL; + } while (cmpxchg(&task->task_works, work, head) != work); + if (!work) break; /* -- cgit v1.2.3 From f784e8a7989c0da3062d04bfea3db90f41e8f738 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 26 Aug 2012 21:12:17 +0200 Subject: task_work: Simplify the usage in ptrace_notify() and get_signal_to_deliver() ptrace_notify() and get_signal_to_deliver() do unnecessary things before task_work_run(): 1. smp_mb__after_clear_bit() is not needed, test_and_clear_bit() implies mb(). 2. And we do not need the barrier at all, in this case we only care about the "synchronous" works added by the task itself. 3. No need to clear TIF_NOTIFY_RESUME, and we should not assume task_works is the only user of this flag. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Cc: Al Viro Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20120826191217.GA4238@redhat.com Signed-off-by: Ingo Molnar --- kernel/signal.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index be4f856d52f8..2c681f11b7d2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1971,13 +1971,8 @@ static void ptrace_do_notify(int signr, int exit_code, int why) void ptrace_notify(int exit_code) { BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); - if (unlikely(current->task_works)) { - if (test_and_clear_ti_thread_flag(current_thread_info(), - TIF_NOTIFY_RESUME)) { - smp_mb__after_clear_bit(); - task_work_run(); - } - } + if (unlikely(current->task_works)) + task_work_run(); spin_lock_irq(¤t->sighand->siglock); ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED); @@ -2198,13 +2193,8 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct signal_struct *signal = current->signal; int signr; - if (unlikely(current->task_works)) { - if (test_and_clear_ti_thread_flag(current_thread_info(), - TIF_NOTIFY_RESUME)) { - smp_mb__after_clear_bit(); - task_work_run(); - } - } + if (unlikely(current->task_works)) + task_work_run(); if (unlikely(uprobe_deny_signal())) return 0; -- cgit v1.2.3