From 6d53d39270ccd29938dcbd611a44870c63032521 Mon Sep 17 00:00:00 2001 From: Tomoki Sekiyama Date: Tue, 15 Oct 2013 16:42:16 -0600 Subject: elevator: Fix a race in elevator switching and md device initialization commit eb1c160b22655fd4ec44be732d6594fd1b1e44f4 upstream. The soft lockup below happens at the boot time of the system using dm multipath and the udev rules to switch scheduler. [ 356.127001] BUG: soft lockup - CPU#3 stuck for 22s! [sh:483] [ 356.127001] RIP: 0010:[] [] lock_timer_base.isra.35+0x1d/0x50 ... [ 356.127001] Call Trace: [ 356.127001] [] try_to_del_timer_sync+0x20/0x70 [ 356.127001] [] ? kmem_cache_alloc_node_trace+0x20a/0x230 [ 356.127001] [] del_timer_sync+0x52/0x60 [ 356.127001] [] cfq_exit_queue+0x32/0xf0 [ 356.127001] [] elevator_exit+0x2f/0x50 [ 356.127001] [] elevator_change+0xf1/0x1c0 [ 356.127001] [] elv_iosched_store+0x20/0x50 [ 356.127001] [] queue_attr_store+0x59/0xb0 [ 356.127001] [] sysfs_write_file+0xc6/0x140 [ 356.127001] [] vfs_write+0xbd/0x1e0 [ 356.127001] [] SyS_write+0x49/0xa0 [ 356.127001] [] system_call_fastpath+0x16/0x1b This is caused by a race between md device initialization by multipathd and shell script to switch the scheduler using sysfs. - multipathd: SyS_ioctl -> do_vfs_ioctl -> dm_ctl_ioctl -> ctl_ioctl -> table_load -> dm_setup_md_queue -> blk_init_allocated_queue -> elevator_init q->elevator = elevator_alloc(q, e); // not yet initialized - sh -c 'echo deadline > /sys/$DEVPATH/queue/scheduler': elevator_switch (in the call trace above) struct elevator_queue *old = q->elevator; q->elevator = elevator_alloc(q, new_e); elevator_exit(old); // lockup! (*) - multipathd: (cont.) err = e->ops.elevator_init_fn(q); // init fails; q->elevator is modified (*) When del_timer_sync() is called, lock_timer_base() will loop infinitely while timer->base == NULL. In this case, as timer will never initialized, it results in lockup. This patch introduces acquisition of q->sysfs_lock around elevator_init() into blk_init_allocated_queue(), to provide mutual exclusion between initialization of the q->scheduler and switching of the scheduler. This should fix this bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=902012 Signed-off-by: Tomoki Sekiyama Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 10 +++++++++- block/elevator.c | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index acf3bf6a44d..2c66daba44d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -741,9 +741,17 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, q->sg_reserved_size = INT_MAX; + /* Protect q->elevator from elevator_change */ + mutex_lock(&q->sysfs_lock); + /* init elevator */ - if (elevator_init(q, NULL)) + if (elevator_init(q, NULL)) { + mutex_unlock(&q->sysfs_lock); return NULL; + } + + mutex_unlock(&q->sysfs_lock); + return q; } EXPORT_SYMBOL(blk_init_allocated_queue); diff --git a/block/elevator.c b/block/elevator.c index 668394d1858..02d4390afee 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -186,6 +186,12 @@ int elevator_init(struct request_queue *q, char *name) struct elevator_type *e = NULL; int err; + /* + * q->sysfs_lock must be held to provide mutual exclusion between + * elevator_switch() and here. + */ + lockdep_assert_held(&q->sysfs_lock); + if (unlikely(q->elevator)) return 0; -- cgit v1.2.3 From 72b9401c2f5ac465d97969ef7933753642bde8bf Mon Sep 17 00:00:00 2001 From: Tomoki Sekiyama Date: Tue, 15 Oct 2013 16:42:19 -0600 Subject: elevator: acquire q->sysfs_lock in elevator_change() commit 7c8a3679e3d8e9d92d58f282161760a0e247df97 upstream. Add locking of q->sysfs_lock into elevator_change() (an exported function) to ensure it is held to protect q->elevator from elevator_init(), even if elevator_change() is called from non-sysfs paths. sysfs path (elv_iosched_store) uses __elevator_change(), non-locking version, as the lock is already taken by elv_iosched_store(). Signed-off-by: Tomoki Sekiyama Signed-off-by: Jens Axboe Cc: Josh Boyer Signed-off-by: Greg Kroah-Hartman --- block/elevator.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/elevator.c b/block/elevator.c index 02d4390afee..6d765f7e2b2 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -965,7 +965,7 @@ fail_init: /* * Switch this queue to the given IO scheduler. */ -int elevator_change(struct request_queue *q, const char *name) +static int __elevator_change(struct request_queue *q, const char *name) { char elevator_name[ELV_NAME_MAX]; struct elevator_type *e; @@ -987,6 +987,18 @@ int elevator_change(struct request_queue *q, const char *name) return elevator_switch(q, e); } + +int elevator_change(struct request_queue *q, const char *name) +{ + int ret; + + /* Protect q->elevator from elevator_init() */ + mutex_lock(&q->sysfs_lock); + ret = __elevator_change(q, name); + mutex_unlock(&q->sysfs_lock); + + return ret; +} EXPORT_SYMBOL(elevator_change); ssize_t elv_iosched_store(struct request_queue *q, const char *name, @@ -997,7 +1009,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, if (!q->elevator) return count; - ret = elevator_change(q, name); + ret = __elevator_change(q, name); if (!ret) return count; -- cgit v1.2.3