aboutsummaryrefslogtreecommitdiff
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c232
1 files changed, 102 insertions, 130 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index c47d1015d759..e68db7745039 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -763,6 +763,29 @@ static struct futex_pi_state *alloc_pi_state(void)
return pi_state;
}
+static void pi_state_update_owner(struct futex_pi_state *pi_state,
+ struct task_struct *new_owner)
+{
+ struct task_struct *old_owner = pi_state->owner;
+
+ lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
+
+ if (old_owner) {
+ raw_spin_lock(&old_owner->pi_lock);
+ WARN_ON(list_empty(&pi_state->list));
+ list_del_init(&pi_state->list);
+ raw_spin_unlock(&old_owner->pi_lock);
+ }
+
+ if (new_owner) {
+ raw_spin_lock(&new_owner->pi_lock);
+ WARN_ON(!list_empty(&pi_state->list));
+ list_add(&pi_state->list, &new_owner->pi_state_list);
+ pi_state->owner = new_owner;
+ raw_spin_unlock(&new_owner->pi_lock);
+ }
+}
+
static void get_pi_state(struct futex_pi_state *pi_state)
{
WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
@@ -785,17 +808,11 @@ static void put_pi_state(struct futex_pi_state *pi_state)
* and has cleaned up the pi_state already
*/
if (pi_state->owner) {
- struct task_struct *owner;
unsigned long flags;
raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
- owner = pi_state->owner;
- if (owner) {
- raw_spin_lock(&owner->pi_lock);
- list_del_init(&pi_state->list);
- raw_spin_unlock(&owner->pi_lock);
- }
- rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
+ pi_state_update_owner(pi_state, NULL);
+ rt_mutex_proxy_unlock(&pi_state->pi_mutex);
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
}
@@ -941,7 +958,8 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
* FUTEX_OWNER_DIED bit. See [4]
*
* [10] There is no transient state which leaves owner and user space
- * TID out of sync.
+ * TID out of sync. Except one error case where the kernel is denied
+ * write access to the user address, see fixup_pi_state_owner().
*
*
* Serialization and lifetime rules:
@@ -1521,26 +1539,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
ret = -EINVAL;
}
- if (ret)
- goto out_unlock;
-
- /*
- * This is a point of no return; once we modify the uval there is no
- * going back and subsequent operations must not fail.
- */
-
- raw_spin_lock(&pi_state->owner->pi_lock);
- WARN_ON(list_empty(&pi_state->list));
- list_del_init(&pi_state->list);
- raw_spin_unlock(&pi_state->owner->pi_lock);
-
- raw_spin_lock(&new_owner->pi_lock);
- WARN_ON(!list_empty(&pi_state->list));
- list_add(&pi_state->list, &new_owner->pi_state_list);
- pi_state->owner = new_owner;
- raw_spin_unlock(&new_owner->pi_lock);
-
- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+ if (!ret) {
+ /*
+ * This is a point of no return; once we modified the uval
+ * there is no going back and subsequent operations must
+ * not fail.
+ */
+ pi_state_update_owner(pi_state, new_owner);
+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+ }
out_unlock:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
@@ -2323,18 +2330,13 @@ static void unqueue_me_pi(struct futex_q *q)
spin_unlock(q->lock_ptr);
}
-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
- struct task_struct *argowner)
+static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+ struct task_struct *argowner)
{
struct futex_pi_state *pi_state = q->pi_state;
- u32 uval, curval, newval;
struct task_struct *oldowner, *newowner;
- u32 newtid;
- int ret, err = 0;
-
- lockdep_assert_held(q->lock_ptr);
-
- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ u32 uval, curval, newval, newtid;
+ int err = 0;
oldowner = pi_state->owner;
@@ -2368,14 +2370,12 @@ retry:
* We raced against a concurrent self; things are
* already fixed up. Nothing to do.
*/
- ret = 0;
- goto out_unlock;
+ return 0;
}
if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
- /* We got the lock after all, nothing to fix. */
- ret = 0;
- goto out_unlock;
+ /* We got the lock. pi_state is correct. Tell caller. */
+ return 1;
}
/*
@@ -2402,8 +2402,7 @@ retry:
* We raced against a concurrent self; things are
* already fixed up. Nothing to do.
*/
- ret = 0;
- goto out_unlock;
+ return 1;
}
newowner = argowner;
}
@@ -2433,22 +2432,9 @@ retry:
* We fixed up user space. Now we need to fix the pi_state
* itself.
*/
- if (pi_state->owner != NULL) {
- raw_spin_lock(&pi_state->owner->pi_lock);
- WARN_ON(list_empty(&pi_state->list));
- list_del_init(&pi_state->list);
- raw_spin_unlock(&pi_state->owner->pi_lock);
- }
-
- pi_state->owner = newowner;
+ pi_state_update_owner(pi_state, newowner);
- raw_spin_lock(&newowner->pi_lock);
- WARN_ON(!list_empty(&pi_state->list));
- list_add(&pi_state->list, &newowner->pi_state_list);
- raw_spin_unlock(&newowner->pi_lock);
- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-
- return 0;
+ return argowner == current;
/*
* In order to reschedule or handle a page fault, we need to drop the
@@ -2469,17 +2455,16 @@ handle_err:
switch (err) {
case -EFAULT:
- ret = fault_in_user_writeable(uaddr);
+ err = fault_in_user_writeable(uaddr);
break;
case -EAGAIN:
cond_resched();
- ret = 0;
+ err = 0;
break;
default:
WARN_ON_ONCE(1);
- ret = err;
break;
}
@@ -2489,17 +2474,44 @@ handle_err:
/*
* Check if someone else fixed it for us:
*/
- if (pi_state->owner != oldowner) {
- ret = 0;
- goto out_unlock;
- }
+ if (pi_state->owner != oldowner)
+ return argowner == current;
- if (ret)
- goto out_unlock;
+ /* Retry if err was -EAGAIN or the fault in succeeded */
+ if (!err)
+ goto retry;
- goto retry;
+ /*
+ * fault_in_user_writeable() failed so user state is immutable. At
+ * best we can make the kernel state consistent but user state will
+ * be most likely hosed and any subsequent unlock operation will be
+ * rejected due to PI futex rule [10].
+ *
+ * Ensure that the rtmutex owner is also the pi_state owner despite
+ * the user space value claiming something different. There is no
+ * point in unlocking the rtmutex if current is the owner as it
+ * would need to wait until the next waiter has taken the rtmutex
+ * to guarantee consistent state. Keep it simple. Userspace asked
+ * for this wreckaged state.
+ *
+ * The rtmutex has an owner - either current or some other
+ * task. See the EAGAIN loop above.
+ */
+ pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
-out_unlock:
+ return err;
+}
+
+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+ struct task_struct *argowner)
+{
+ struct futex_pi_state *pi_state = q->pi_state;
+ int ret;
+
+ lockdep_assert_held(q->lock_ptr);
+
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ ret = __fixup_pi_state_owner(uaddr, q, argowner);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
return ret;
}
@@ -2523,8 +2535,6 @@ static long futex_wait_restart(struct restart_block *restart);
*/
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
{
- int ret = 0;
-
if (locked) {
/*
* Got the lock. We might not be the anticipated owner if we
@@ -2535,8 +2545,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* stable state, anything else needs more attention.
*/
if (q->pi_state->owner != current)
- ret = fixup_pi_state_owner(uaddr, q, current);
- return ret ? ret : locked;
+ return fixup_pi_state_owner(uaddr, q, current);
+ return 1;
}
/*
@@ -2547,23 +2557,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* Another speculative read; pi_state->owner == current is unstable
* but needs our attention.
*/
- if (q->pi_state->owner == current) {
- ret = fixup_pi_state_owner(uaddr, q, NULL);
- return ret;
- }
+ if (q->pi_state->owner == current)
+ return fixup_pi_state_owner(uaddr, q, NULL);
/*
* Paranoia check. If we did not take the lock, then we should not be
- * the owner of the rt_mutex.
+ * the owner of the rt_mutex. Warn and establish consistent state.
*/
- if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
- printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
- "pi-state %p\n", ret,
- q->pi_state->pi_mutex.owner,
- q->pi_state->owner);
- }
+ if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
+ return fixup_pi_state_owner(uaddr, q, current);
- return ret;
+ return 0;
}
/**
@@ -2771,7 +2775,6 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock)
{
struct hrtimer_sleeper timeout, *to;
- struct futex_pi_state *pi_state = NULL;
struct task_struct *exiting = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
@@ -2907,23 +2910,8 @@ no_block:
if (res)
ret = (res < 0) ? res : 0;
- /*
- * If fixup_owner() faulted and was unable to handle the fault, unlock
- * it and return the fault to userspace.
- */
- if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
- pi_state = q.pi_state;
- get_pi_state(pi_state);
- }
-
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
-
- if (pi_state) {
- rt_mutex_futex_unlock(&pi_state->pi_mutex);
- put_pi_state(pi_state);
- }
-
goto out;
out_unlock_put_key:
@@ -3024,7 +3012,7 @@ retry:
* Success, we're done! No tricky corner cases.
*/
if (!ret)
- goto out_putkey;
+ return ret;
/*
* The atomic access to the futex value generated a
* pagefault, so retry the user-access and the wakeup:
@@ -3041,7 +3029,7 @@ retry:
* wake_futex_pi has detected invalid state. Tell user
* space.
*/
- goto out_putkey;
+ return ret;
}
/*
@@ -3062,7 +3050,7 @@ retry:
default:
WARN_ON_ONCE(1);
- goto out_putkey;
+ return ret;
}
}
@@ -3073,7 +3061,6 @@ retry:
out_unlock:
spin_unlock(&hb->lock);
-out_putkey:
return ret;
pi_retry:
@@ -3183,7 +3170,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
u32 __user *uaddr2)
{
struct hrtimer_sleeper timeout, *to;
- struct futex_pi_state *pi_state = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
union futex_key key2 = FUTEX_KEY_INIT;
@@ -3261,16 +3247,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (q.pi_state && (q.pi_state->owner != current)) {
spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current);
- if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
- pi_state = q.pi_state;
- get_pi_state(pi_state);
- }
/*
* Drop the reference to the pi state which
* the requeue_pi() code acquired for us.
*/
put_pi_state(q.pi_state);
spin_unlock(q.lock_ptr);
+ /*
+ * Adjust the return value. It's either -EFAULT or
+ * success (1) but the caller expects 0 for success.
+ */
+ ret = ret < 0 ? ret : 0;
}
} else {
struct rt_mutex *pi_mutex;
@@ -3301,25 +3288,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (res)
ret = (res < 0) ? res : 0;
- /*
- * If fixup_pi_state_owner() faulted and was unable to handle
- * the fault, unlock the rt_mutex and return the fault to
- * userspace.
- */
- if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
- pi_state = q.pi_state;
- get_pi_state(pi_state);
- }
-
/* Unqueue and drop the lock. */
unqueue_me_pi(&q);
}
- if (pi_state) {
- rt_mutex_futex_unlock(&pi_state->pi_mutex);
- put_pi_state(pi_state);
- }
-
if (ret == -EINTR) {
/*
* We've already been requeued, but cannot restart by calling
@@ -3790,8 +3762,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
- u32, val3)
+ const struct __kernel_timespec __user *, utime,
+ u32 __user *, uaddr2, u32, val3)
{
struct timespec64 ts;
ktime_t t, *tp = NULL;
@@ -3986,7 +3958,7 @@ err_unlock:
#ifdef CONFIG_COMPAT_32BIT_TIME
SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
+ const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
u32, val3)
{
struct timespec64 ts;