From e2d57f782c8a906f80d5b5f54da803c1638929d7 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:49 -0700 Subject: rwsem: make the waiter type an enumeration rather than a bitmask We are not planning to add some new waiter flags, so we can convert the waiter type into an enumeration. Background: David Howells suggested I do this back when I tried adding a new waiter type for unfair readers. However, I believe the cleanup applies regardless of that use case. Signed-off-by: Michel Lespinasse Reviewed-by: Rik van Riel Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Signed-off-by: Linus Torvalds --- lib/rwsem-spinlock.c | 19 +++++++++++-------- lib/rwsem.c | 23 +++++++++++++---------- 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index 7542afbb22b..5f117f37ac0 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -9,12 +9,15 @@ #include #include +enum rwsem_waiter_type { + RWSEM_WAITING_FOR_WRITE, + RWSEM_WAITING_FOR_READ +}; + struct rwsem_waiter { struct list_head list; struct task_struct *task; - unsigned int flags; -#define RWSEM_WAITING_FOR_READ 0x00000001 -#define RWSEM_WAITING_FOR_WRITE 0x00000002 + enum rwsem_waiter_type type; }; int rwsem_is_locked(struct rw_semaphore *sem) @@ -68,7 +71,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (!wakewrite) { - if (waiter->flags & RWSEM_WAITING_FOR_WRITE) + if (waiter->type == RWSEM_WAITING_FOR_WRITE) goto out; goto dont_wake_writers; } @@ -78,7 +81,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) * to -1 here to indicate we get the lock. Instead, we wake it up * to let it go get it again. */ - if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { + if (waiter->type == RWSEM_WAITING_FOR_WRITE) { wake_up_process(waiter->task); goto out; } @@ -86,7 +89,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) /* grant an infinite number of read locks to the front of the queue */ dont_wake_writers: woken = 0; - while (waiter->flags & RWSEM_WAITING_FOR_READ) { + while (waiter->type == RWSEM_WAITING_FOR_READ) { struct list_head *next = waiter->list.next; list_del(&waiter->list); @@ -144,7 +147,7 @@ void __sched __down_read(struct rw_semaphore *sem) /* set up my own style of waitqueue */ waiter.task = tsk; - waiter.flags = RWSEM_WAITING_FOR_READ; + waiter.type = RWSEM_WAITING_FOR_READ; get_task_struct(tsk); list_add_tail(&waiter.list, &sem->wait_list); @@ -201,7 +204,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) /* set up my own style of waitqueue */ tsk = current; waiter.task = tsk; - waiter.flags = RWSEM_WAITING_FOR_WRITE; + waiter.type = RWSEM_WAITING_FOR_WRITE; list_add_tail(&waiter.list, &sem->wait_list); /* wait for someone to release the lock */ diff --git a/lib/rwsem.c b/lib/rwsem.c index ad5e0df16ab..672eb33218a 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -30,12 +30,15 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, EXPORT_SYMBOL(__init_rwsem); +enum rwsem_waiter_type { + RWSEM_WAITING_FOR_WRITE, + RWSEM_WAITING_FOR_READ +}; + struct rwsem_waiter { struct list_head list; struct task_struct *task; - unsigned int flags; -#define RWSEM_WAITING_FOR_READ 0x00000001 -#define RWSEM_WAITING_FOR_WRITE 0x00000002 + enum rwsem_waiter_type type; }; /* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and @@ -65,7 +68,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) signed long woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) + if (waiter->type != RWSEM_WAITING_FOR_WRITE) goto readers_only; if (wake_type == RWSEM_WAKE_READ_OWNED) @@ -112,10 +115,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) waiter = list_entry(waiter->list.next, struct rwsem_waiter, list); - } while (waiter->flags & RWSEM_WAITING_FOR_READ); + } while (waiter->type != RWSEM_WAITING_FOR_WRITE); adjustment = woken * RWSEM_ACTIVE_READ_BIAS; - if (waiter->flags & RWSEM_WAITING_FOR_READ) + if (waiter->type != RWSEM_WAITING_FOR_WRITE) /* hit end of list above */ adjustment -= RWSEM_WAITING_BIAS; @@ -148,7 +151,7 @@ static int try_get_writer_sem(struct rw_semaphore *sem, /* only steal when first waiter is writing */ fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) + if (fwaiter->type != RWSEM_WAITING_FOR_WRITE) return 0; adjustment = RWSEM_ACTIVE_WRITE_BIAS; @@ -179,7 +182,7 @@ try_again_write: */ static struct rw_semaphore __sched * rwsem_down_failed_common(struct rw_semaphore *sem, - unsigned int flags, signed long adjustment) + enum rwsem_waiter_type type, signed long adjustment) { struct rwsem_waiter waiter; struct task_struct *tsk = current; @@ -190,7 +193,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem, /* set up my own style of waitqueue */ raw_spin_lock_irq(&sem->wait_lock); waiter.task = tsk; - waiter.flags = flags; + waiter.type = type; get_task_struct(tsk); if (list_empty(&sem->wait_list)) @@ -221,7 +224,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem, raw_spin_lock_irq(&sem->wait_lock); /* Try to get the writer sem, may steal from the head writer: */ - if (flags == RWSEM_WAITING_FOR_WRITE) + if (type == RWSEM_WAITING_FOR_WRITE) if (try_get_writer_sem(sem, &waiter)) { raw_spin_unlock_irq(&sem->wait_lock); return sem; -- cgit v1.2.3 From f7dd1cee9a4e2b1450e4a3732636dfbf28562ee4 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:50 -0700 Subject: rwsem: shorter spinlocked section in rwsem_down_failed_common() This change reduces the size of the spinlocked and TASK_UNINTERRUPTIBLE sections in rwsem_down_failed_common(): - We only need the sem->wait_lock to insert ourselves on the wait_list; the waiter node can be prepared outside of the wait_lock. - The task state only needs to be set to TASK_UNINTERRUPTIBLE immediately before checking if we actually need to sleep; it doesn't need to protect the entire function. Signed-off-by: Michel Lespinasse Reviewed-by: Rik van Riel Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Signed-off-by: Linus Torvalds --- lib/rwsem.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index 672eb33218a..40636454cf3 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -188,14 +188,12 @@ rwsem_down_failed_common(struct rw_semaphore *sem, struct task_struct *tsk = current; signed long count; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ - raw_spin_lock_irq(&sem->wait_lock); waiter.task = tsk; waiter.type = type; get_task_struct(tsk); + raw_spin_lock_irq(&sem->wait_lock); if (list_empty(&sem->wait_list)) adjustment += RWSEM_WAITING_BIAS; list_add_tail(&waiter.list, &sem->wait_list); @@ -218,7 +216,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem, raw_spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ - for (;;) { + while (true) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!waiter.task) break; @@ -231,7 +230,6 @@ rwsem_down_failed_common(struct rw_semaphore *sem, } raw_spin_unlock_irq(&sem->wait_lock); schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); } tsk->state = TASK_RUNNING; -- cgit v1.2.3 From 1e78277ccbbb48af32a618d1ef0e8534e0b648d7 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:51 -0700 Subject: rwsem: move rwsem_down_failed_common code into rwsem_down_{read,write}_failed Remove the rwsem_down_failed_common function and replace it with two identical copies of its code in rwsem_down_{read,write}_failed. This is because we want to make different optimizations in rwsem_down_{read,write}_failed; we are adding this pure-duplication step as a separate commit in order to make it easier to check the following steps. Signed-off-by: Michel Lespinasse Reviewed-by: Rik van Riel Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Signed-off-by: Linus Torvalds --- lib/rwsem.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index 40636454cf3..fb658af1c12 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -178,12 +178,12 @@ try_again_write: } /* - * wait for a lock to be granted + * wait for the read lock to be granted */ -static struct rw_semaphore __sched * -rwsem_down_failed_common(struct rw_semaphore *sem, - enum rwsem_waiter_type type, signed long adjustment) +struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) { + enum rwsem_waiter_type type = RWSEM_WAITING_FOR_READ; + signed long adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; signed long count; @@ -237,22 +237,64 @@ rwsem_down_failed_common(struct rw_semaphore *sem, return sem; } -/* - * wait for the read lock to be granted - */ -struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) -{ - return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, - -RWSEM_ACTIVE_READ_BIAS); -} - /* * wait for the write lock to be granted */ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) { - return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, - -RWSEM_ACTIVE_WRITE_BIAS); + enum rwsem_waiter_type type = RWSEM_WAITING_FOR_WRITE; + signed long adjustment = -RWSEM_ACTIVE_WRITE_BIAS; + struct rwsem_waiter waiter; + struct task_struct *tsk = current; + signed long count; + + /* set up my own style of waitqueue */ + waiter.task = tsk; + waiter.type = type; + get_task_struct(tsk); + + raw_spin_lock_irq(&sem->wait_lock); + if (list_empty(&sem->wait_list)) + adjustment += RWSEM_WAITING_BIAS; + list_add_tail(&waiter.list, &sem->wait_list); + + /* we're now waiting on the lock, but no longer actively locking */ + count = rwsem_atomic_update(adjustment, sem); + + /* If there are no active locks, wake the front queued process(es) up. + * + * Alternatively, if we're called from a failed down_write(), there + * were already threads queued before us and there are no active + * writers, the lock must be read owned; so we try to wake any read + * locks that were queued ahead of us. */ + if (count == RWSEM_WAITING_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); + else if (count > RWSEM_WAITING_BIAS && + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + + raw_spin_unlock_irq(&sem->wait_lock); + + /* wait to be given the lock */ + while (true) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!waiter.task) + break; + + raw_spin_lock_irq(&sem->wait_lock); + /* Try to get the writer sem, may steal from the head writer: */ + if (type == RWSEM_WAITING_FOR_WRITE) + if (try_get_writer_sem(sem, &waiter)) { + raw_spin_unlock_irq(&sem->wait_lock); + return sem; + } + raw_spin_unlock_irq(&sem->wait_lock); + schedule(); + } + + tsk->state = TASK_RUNNING; + + return sem; } /* -- cgit v1.2.3 From da16922cc031b9c0221c836994276ab193b31de8 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:52 -0700 Subject: rwsem: simplify rwsem_down_read_failed When trying to acquire a read lock, the RWSEM_ACTIVE_READ_BIAS adjustment doesn't cause other readers to block, so we never have to worry about waking them back after canceling this adjustment in rwsem_down_read_failed(). We also never want to steal the lock in rwsem_down_read_failed(), so we don't have to grab the wait_lock either. Signed-off-by: Michel Lespinasse Reviewed-by: Rik van Riel Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Signed-off-by: Linus Torvalds --- lib/rwsem.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index fb658af1c12..66f307e9076 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -182,7 +182,6 @@ try_again_write: */ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) { - enum rwsem_waiter_type type = RWSEM_WAITING_FOR_READ; signed long adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; @@ -190,7 +189,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* set up my own style of waitqueue */ waiter.task = tsk; - waiter.type = type; + waiter.type = RWSEM_WAITING_FOR_READ; get_task_struct(tsk); raw_spin_lock_irq(&sem->wait_lock); @@ -201,17 +200,9 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* we're now waiting on the lock, but no longer actively locking */ count = rwsem_atomic_update(adjustment, sem); - /* If there are no active locks, wake the front queued process(es) up. - * - * Alternatively, if we're called from a failed down_write(), there - * were already threads queued before us and there are no active - * writers, the lock must be read owned; so we try to wake any read - * locks that were queued ahead of us. */ + /* If there are no active locks, wake the front queued process(es). */ if (count == RWSEM_WAITING_BIAS) sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); - else if (count > RWSEM_WAITING_BIAS && - adjustment == -RWSEM_ACTIVE_WRITE_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); raw_spin_unlock_irq(&sem->wait_lock); @@ -220,15 +211,6 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!waiter.task) break; - - raw_spin_lock_irq(&sem->wait_lock); - /* Try to get the writer sem, may steal from the head writer: */ - if (type == RWSEM_WAITING_FOR_WRITE) - if (try_get_writer_sem(sem, &waiter)) { - raw_spin_unlock_irq(&sem->wait_lock); - return sem; - } - raw_spin_unlock_irq(&sem->wait_lock); schedule(); } -- cgit v1.2.3 From 023fe4f712028d25b42d31984abae1f3d3f0e3e2 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:53 -0700 Subject: rwsem: simplify rwsem_down_write_failed When waking writers, we never grant them the lock - instead, they have to acquire it themselves when they run, and remove themselves from the wait_list when they succeed. As a result, we can do a few simplifications in rwsem_down_write_failed(): - We don't need to check for !waiter.task since __rwsem_do_wake() doesn't remove writers from the wait_list - There is no point releaseing the wait_lock before entering the wait loop, as we will need to reacquire it immediately. We can change the loop so that the lock is always held at the start of each loop iteration. - We don't need to get a reference on the task structure, since the task is responsible for removing itself from the wait_list. There is no risk, like in the rwsem_down_read_failed() case, that a task would wake up and exit (thus destroying its task structure) while __rwsem_do_wake() is still running - wait_lock protects against that. Signed-off-by: Michel Lespinasse Reviewed-by: Rik van Riel Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Signed-off-by: Linus Torvalds --- lib/rwsem.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index 66f307e9076..c73bd96dc30 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -161,16 +161,8 @@ static int try_get_writer_sem(struct rw_semaphore *sem, try_again_write: oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (!(oldcount & RWSEM_ACTIVE_MASK)) { - /* No active lock: */ - struct task_struct *tsk = waiter->task; - - list_del(&waiter->list); - smp_mb(); - put_task_struct(tsk); - tsk->state = TASK_RUNNING; + if (!(oldcount & RWSEM_ACTIVE_MASK)) return 1; - } /* some one grabbed the sem already */ if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) return 0; @@ -220,11 +212,10 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) } /* - * wait for the write lock to be granted + * wait until we successfully acquire the write lock */ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) { - enum rwsem_waiter_type type = RWSEM_WAITING_FOR_WRITE; signed long adjustment = -RWSEM_ACTIVE_WRITE_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; @@ -232,8 +223,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* set up my own style of waitqueue */ waiter.task = tsk; - waiter.type = type; - get_task_struct(tsk); + waiter.type = RWSEM_WAITING_FOR_WRITE; raw_spin_lock_irq(&sem->wait_lock); if (list_empty(&sem->wait_list)) @@ -255,25 +245,20 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) adjustment == -RWSEM_ACTIVE_WRITE_BIAS) sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); - raw_spin_unlock_irq(&sem->wait_lock); - - /* wait to be given the lock */ + /* wait until we successfully acquire the lock */ while (true) { set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (!waiter.task) + + if (try_get_writer_sem(sem, &waiter)) break; - raw_spin_lock_irq(&sem->wait_lock); - /* Try to get the writer sem, may steal from the head writer: */ - if (type == RWSEM_WAITING_FOR_WRITE) - if (try_get_writer_sem(sem, &waiter)) { - raw_spin_unlock_irq(&sem->wait_lock); - return sem; - } raw_spin_unlock_irq(&sem->wait_lock); schedule(); + raw_spin_lock_irq(&sem->wait_lock); } + list_del(&waiter.list); + raw_spin_unlock_irq(&sem->wait_lock); tsk->state = TASK_RUNNING; return sem; -- cgit v1.2.3 From ed00f64346631dff035adfb9b0240daaa8b46c4e Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:54 -0700 Subject: rwsem: more agressive lock stealing in rwsem_down_write_failed Some small code simplifications can be achieved by doing more agressive lock stealing: - When rwsem_down_write_failed() notices that there are no active locks (and thus no thread to wake us if we decided to sleep), it used to wake the first queued process. However, stealing the lock is also sufficient to deal with this case, so we don't need this check anymore. - In try_get_writer_sem(), we can steal the lock even when the first waiter is a reader. This is correct because the code path that wakes readers is protected by the wait_lock. As to the performance effects of this change, they are expected to be minimal: readers are still granted the lock (rather than having to acquire it themselves) when they reach the front of the wait queue, so we have essentially the same behavior as in rwsem-spinlock. Signed-off-by: Michel Lespinasse Reviewed-by: Rik van Riel Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Signed-off-by: Linus Torvalds --- lib/rwsem.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index c73bd96dc30..2360bf20409 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -143,20 +143,12 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) } /* Try to get write sem, caller holds sem->wait_lock: */ -static int try_get_writer_sem(struct rw_semaphore *sem, - struct rwsem_waiter *waiter) +static int try_get_writer_sem(struct rw_semaphore *sem) { - struct rwsem_waiter *fwaiter; long oldcount, adjustment; - /* only steal when first waiter is writing */ - fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (fwaiter->type != RWSEM_WAITING_FOR_WRITE) - return 0; - adjustment = RWSEM_ACTIVE_WRITE_BIAS; - /* Only one waiter in the queue: */ - if (fwaiter == waiter && waiter->list.next == &sem->wait_list) + if (list_is_singular(&sem->wait_list)) adjustment -= RWSEM_WAITING_BIAS; try_again_write: @@ -233,23 +225,18 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* we're now waiting on the lock, but no longer actively locking */ count = rwsem_atomic_update(adjustment, sem); - /* If there are no active locks, wake the front queued process(es) up. - * - * Alternatively, if we're called from a failed down_write(), there - * were already threads queued before us and there are no active - * writers, the lock must be read owned; so we try to wake any read - * locks that were queued ahead of us. */ - if (count == RWSEM_WAITING_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); - else if (count > RWSEM_WAITING_BIAS && - adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + /* If there were already threads queued before us and there are no + * active writers, the lock must be read owned; so we try to wake + * any read locks that were queued ahead of us. */ + if (count > RWSEM_WAITING_BIAS && + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); /* wait until we successfully acquire the lock */ while (true) { set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (try_get_writer_sem(sem, &waiter)) + if (try_get_writer_sem(sem)) break; raw_spin_unlock_irq(&sem->wait_lock); -- cgit v1.2.3 From 5ede972df1cd9294c82e9515949fd2103be81d7b Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:55 -0700 Subject: rwsem: use cmpxchg for trying to steal write lock Using rwsem_atomic_update to try stealing the write lock forced us to undo the adjustment in the failure path. We can have simpler and faster code by using cmpxchg instead. Signed-off-by: Michel Lespinasse Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Acked-by: Rik van Riel Signed-off-by: Linus Torvalds --- lib/rwsem.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index 2360bf20409..64c2dc007be 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -142,25 +142,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) return sem; } -/* Try to get write sem, caller holds sem->wait_lock: */ -static int try_get_writer_sem(struct rw_semaphore *sem) -{ - long oldcount, adjustment; - - adjustment = RWSEM_ACTIVE_WRITE_BIAS; - if (list_is_singular(&sem->wait_list)) - adjustment -= RWSEM_WAITING_BIAS; - -try_again_write: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (!(oldcount & RWSEM_ACTIVE_MASK)) - return 1; - /* some one grabbed the sem already */ - if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) - return 0; - goto try_again_write; -} - /* * wait for the read lock to be granted */ @@ -236,7 +217,12 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) while (true) { set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (try_get_writer_sem(sem)) + /* Try acquiring the write lock. */ + count = RWSEM_ACTIVE_WRITE_BIAS; + if (!list_is_singular(&sem->wait_list)) + count += RWSEM_WAITING_BIAS; + if (cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == + RWSEM_WAITING_BIAS) break; raw_spin_unlock_irq(&sem->wait_lock); -- cgit v1.2.3 From a7d2c573ae7fad1b2c877d1a1342fa5bb0d6478c Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:56 -0700 Subject: rwsem: avoid taking wait_lock in rwsem_down_write_failed In rwsem_down_write_failed(), if there are active locks after we wake up (i.e. the lock got stolen from us), skip taking the wait_lock and go back to sleep immediately. Signed-off-by: Michel Lespinasse Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Acked-by: Rik van Riel Signed-off-by: Linus Torvalds --- lib/rwsem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index 64c2dc007be..edf3d9ca670 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -214,8 +214,8 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); /* wait until we successfully acquire the lock */ + set_task_state(tsk, TASK_UNINTERRUPTIBLE); while (true) { - set_task_state(tsk, TASK_UNINTERRUPTIBLE); /* Try acquiring the write lock. */ count = RWSEM_ACTIVE_WRITE_BIAS; @@ -226,7 +226,13 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) break; raw_spin_unlock_irq(&sem->wait_lock); - schedule(); + + /* Block until there are no active lockers. */ + do { + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } while (sem->count & RWSEM_ACTIVE_MASK); + raw_spin_lock_irq(&sem->wait_lock); } -- cgit v1.2.3 From 9b0fc9c09f1b262b7fe697eba6b05095d78850e5 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:57 -0700 Subject: rwsem: skip initial trylock in rwsem_down_write_failed We can skip the initial trylock in rwsem_down_write_failed() if there are known active lockers already, thus saving one likely-to-fail cmpxchg. Signed-off-by: Michel Lespinasse Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Acked-by: Rik van Riel Signed-off-by: Linus Torvalds --- lib/rwsem.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index edf3d9ca670..0d50e46d5b0 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -216,14 +216,15 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* wait until we successfully acquire the lock */ set_task_state(tsk, TASK_UNINTERRUPTIBLE); while (true) { - - /* Try acquiring the write lock. */ - count = RWSEM_ACTIVE_WRITE_BIAS; - if (!list_is_singular(&sem->wait_list)) - count += RWSEM_WAITING_BIAS; - if (cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == + if (!(count & RWSEM_ACTIVE_MASK)) { + /* Try acquiring the write lock. */ + count = RWSEM_ACTIVE_WRITE_BIAS; + if (!list_is_singular(&sem->wait_list)) + count += RWSEM_WAITING_BIAS; + if (cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == RWSEM_WAITING_BIAS) - break; + break; + } raw_spin_unlock_irq(&sem->wait_lock); @@ -231,7 +232,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) do { schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } while (sem->count & RWSEM_ACTIVE_MASK); + } while ((count = sem->count) & RWSEM_ACTIVE_MASK); raw_spin_lock_irq(&sem->wait_lock); } -- cgit v1.2.3 From 8cf5322ce69afea1fab6a6270db24d057d664798 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:58 -0700 Subject: rwsem: simplify __rwsem_do_wake This is mostly for cleanup value: - We don't need several gotos to handle the case where the first waiter is a writer. Two simple tests will do (and generate very similar code). - In the remainder of the function, we know the first waiter is a reader, so we don't have to double check that. We can use do..while loops to iterate over the readers to wake (generates slightly better code). Signed-off-by: Michel Lespinasse Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Signed-off-by: Linus Torvalds --- lib/rwsem-spinlock.c | 23 +++++++---------------- lib/rwsem.c | 26 ++++++++++++-------------- 2 files changed, 19 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index 5f117f37ac0..9be8a914497 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -70,26 +70,17 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (!wakewrite) { - if (waiter->type == RWSEM_WAITING_FOR_WRITE) - goto out; - goto dont_wake_writers; - } - - /* - * as we support write lock stealing, we can't set sem->activity - * to -1 here to indicate we get the lock. Instead, we wake it up - * to let it go get it again. - */ if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - wake_up_process(waiter->task); + if (wakewrite) + /* Wake up a writer. Note that we do not grant it the + * lock - it will have to acquire it when it runs. */ + wake_up_process(waiter->task); goto out; } /* grant an infinite number of read locks to the front of the queue */ - dont_wake_writers: woken = 0; - while (waiter->type == RWSEM_WAITING_FOR_READ) { + do { struct list_head *next = waiter->list.next; list_del(&waiter->list); @@ -99,10 +90,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) wake_up_process(tsk); put_task_struct(tsk); woken++; - if (list_empty(&sem->wait_list)) + if (next == &sem->wait_list) break; waiter = list_entry(next, struct rwsem_waiter, list); - } + } while (waiter->type != RWSEM_WAITING_FOR_WRITE); sem->activity += woken; diff --git a/lib/rwsem.c b/lib/rwsem.c index 0d50e46d5b0..9a675fa9d78 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -68,20 +68,17 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) signed long woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (waiter->type != RWSEM_WAITING_FOR_WRITE) - goto readers_only; - - if (wake_type == RWSEM_WAKE_READ_OWNED) - /* Another active reader was observed, so wakeup is not - * likely to succeed. Save the atomic op. - */ + if (waiter->type == RWSEM_WAITING_FOR_WRITE) { + if (wake_type != RWSEM_WAKE_READ_OWNED) + /* Wake writer at the front of the queue, but do not + * grant it the lock yet as we want other writers + * to be able to steal it. Readers, on the other hand, + * will block as they will notice the queued writer. + */ + wake_up_process(waiter->task); goto out; + } - /* Wake up the writing waiter and let the task grab the sem: */ - wake_up_process(waiter->task); - goto out; - - readers_only: /* If we come here from up_xxxx(), another thread might have reached * rwsem_down_failed_common() before we acquired the spinlock and * woken up a waiter, making it now active. We prefer to check for @@ -125,7 +122,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) rwsem_atomic_add(adjustment, sem); next = sem->wait_list.next; - for (loop = woken; loop > 0; loop--) { + loop = woken; + do { waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; @@ -133,7 +131,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) waiter->task = NULL; wake_up_process(tsk); put_task_struct(tsk); - } + } while (--loop); sem->wait_list.next = next; next->prev = &sem->wait_list; -- cgit v1.2.3 From fe6e674c6187d4f452a679ced7e95262bd517936 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:45:59 -0700 Subject: rwsem: implement support for write lock stealing on the fastpath When we decide to wake up readers, we must first grant them as many read locks as necessary, and then actually wake up all these readers. But in order to know how many read shares to grant, we must first count the readers at the head of the queue. This might take a while if there are many readers, and we want to be protected against a writer stealing the lock while we're counting. To that end, we grant the first reader lock before counting how many more readers are queued. We also require some adjustments to the wake_type semantics. RWSEM_WAKE_NO_ACTIVE used to mean that we had found the count to be RWSEM_WAITING_BIAS, in which case the rwsem was known to be free as nobody could steal it while we hold the wait_lock. This doesn't make sense once we implement fastpath write lock stealing, so we now use RWSEM_WAKE_ANY in that case. Similarly, when rwsem_down_write_failed found that a read lock was active, it would use RWSEM_WAKE_READ_OWNED which signalled that new readers could be woken without checking first that the rwsem was available. We can't do that anymore since the existing readers might release their read locks, and a writer could steal the lock before we wake up additional readers. So, we have to use a new RWSEM_WAKE_READERS value to indicate we only want to wake readers, but we don't currently hold any read lock. Signed-off-by: Michel Lespinasse Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Signed-off-by: Linus Torvalds --- lib/rwsem.c | 64 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index 9a675fa9d78..bbe48c04f36 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -4,6 +4,7 @@ * Derived from arch/i386/kernel/semaphore.c * * Writer lock-stealing by Alex Shi + * and Michel Lespinasse */ #include #include @@ -41,13 +42,11 @@ struct rwsem_waiter { enum rwsem_waiter_type type; }; -/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and - * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held - * since the rwsem value was observed. - */ -#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ -#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ -#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ +enum rwsem_wake_type { + RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ + RWSEM_WAKE_READERS, /* Wake readers only */ + RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ +}; /* * handle the lock release when processes blocked on it that can now run @@ -60,16 +59,16 @@ struct rwsem_waiter { * - writers are only woken if downgrading is false */ static struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) +__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) { struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long woken, loop, adjustment; + signed long oldcount, woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - if (wake_type != RWSEM_WAKE_READ_OWNED) + if (wake_type == RWSEM_WAKE_ANY) /* Wake writer at the front of the queue, but do not * grant it the lock yet as we want other writers * to be able to steal it. Readers, on the other hand, @@ -79,24 +78,24 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) goto out; } - /* If we come here from up_xxxx(), another thread might have reached - * rwsem_down_failed_common() before we acquired the spinlock and - * woken up a waiter, making it now active. We prefer to check for - * this first in order to not spend too much time with the spinlock - * held if we're not going to be able to wake up readers in the end. - * - * Note that we do not need to update the rwsem count: any writer - * trying to acquire rwsem will run rwsem_down_write_failed() due - * to the waiting threads and block trying to acquire the spinlock. - * - * We use a dummy atomic update in order to acquire the cache line - * exclusively since we expect to succeed and run the final rwsem - * count adjustment pretty soon. + /* Writers might steal the lock before we grant it to the next reader. + * We prefer to do the first reader grant before counting readers + * so we can bail out early if a writer stole the lock. */ - if (wake_type == RWSEM_WAKE_ANY && - rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) - /* Someone grabbed the sem for write already */ - goto out; + adjustment = 0; + if (wake_type != RWSEM_WAKE_READ_OWNED) { + adjustment = RWSEM_ACTIVE_READ_BIAS; + try_reader_grant: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { + /* A writer stole the lock. Undo our reader grant. */ + if (rwsem_atomic_update(-adjustment, sem) & + RWSEM_ACTIVE_MASK) + goto out; + /* Last active locker left. Retry waking readers. */ + goto try_reader_grant; + } + } /* Grant an infinite number of read locks to the readers at the front * of the queue. Note we increment the 'active part' of the count by @@ -114,12 +113,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - adjustment = woken * RWSEM_ACTIVE_READ_BIAS; + adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; if (waiter->type != RWSEM_WAITING_FOR_WRITE) /* hit end of list above */ adjustment -= RWSEM_WAITING_BIAS; - rwsem_atomic_add(adjustment, sem); + if (adjustment) + rwsem_atomic_add(adjustment, sem); next = sem->wait_list.next; loop = woken; @@ -164,8 +164,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) count = rwsem_atomic_update(adjustment, sem); /* If there are no active locks, wake the front queued process(es). */ - if (count == RWSEM_WAITING_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); + if (!(count & RWSEM_ACTIVE_MASK)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); raw_spin_unlock_irq(&sem->wait_lock); @@ -209,7 +209,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) * any read locks that were queued ahead of us. */ if (count > RWSEM_WAITING_BIAS && adjustment == -RWSEM_ACTIVE_WRITE_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); /* wait until we successfully acquire the lock */ set_task_state(tsk, TASK_UNINTERRUPTIBLE); -- cgit v1.2.3 From 25c39325968bbcebe6cd2a1991228c9dfb48d655 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Tue, 7 May 2013 06:46:00 -0700 Subject: rwsem: do not block readers at head of queue if other readers are active This change fixes a race condition where a reader might determine it needs to block, but by the time it acquires the wait_lock the rwsem has active readers and no queued waiters. In this situation the reader can run in parallel with the existing active readers; it does not need to block until the active readers complete. Thanks to Peter Hurley for noticing this possible race. Signed-off-by: Michel Lespinasse Reviewed-by: Peter Hurley Acked-by: Davidlohr Bueso Signed-off-by: Linus Torvalds --- lib/rwsem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index bbe48c04f36..61f91ca75e4 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -163,8 +163,14 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* we're now waiting on the lock, but no longer actively locking */ count = rwsem_atomic_update(adjustment, sem); - /* If there are no active locks, wake the front queued process(es). */ - if (!(count & RWSEM_ACTIVE_MASK)) + /* If there are no active locks, wake the front queued process(es). + * + * If there are no writers and we are first in the queue, + * wake our own waiter to join the existing active readers ! + */ + if (count == RWSEM_WAITING_BIAS || + (count > RWSEM_WAITING_BIAS && + adjustment != -RWSEM_ACTIVE_READ_BIAS)) sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); raw_spin_unlock_irq(&sem->wait_lock); -- cgit v1.2.3 From b5f541810ea9fb98d93c0ee0e00e07a22874856f Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 7 May 2013 06:46:02 -0700 Subject: rwsem: no need for explicit signed longs Change explicit "signed long" declarations into plain "long" as suggested by Peter Hurley. Signed-off-by: Davidlohr Bueso Reviewed-by: Michel Lespinasse Signed-off-by: Michel Lespinasse Signed-off-by: Linus Torvalds --- lib/rwsem.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index 61f91ca75e4..cf0ad2ad19f 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -64,7 +64,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long oldcount, woken, loop, adjustment; + long oldcount, woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (waiter->type == RWSEM_WAITING_FOR_WRITE) { @@ -145,10 +145,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) */ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) { - signed long adjustment = -RWSEM_ACTIVE_READ_BIAS; + long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; - signed long count; /* set up my own style of waitqueue */ waiter.task = tsk; @@ -193,10 +192,9 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) */ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) { - signed long adjustment = -RWSEM_ACTIVE_WRITE_BIAS; + long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; - signed long count; /* set up my own style of waitqueue */ waiter.task = tsk; -- cgit v1.2.3