From 3d106fba2e7eb6967b1e2cc147a6894ec4307cef Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Tue, 6 Nov 2012 12:39:51 +0100
Subject: block CFQ: avoid moving request to different queue

request is queued in cfqq->fifo list. Looks it's possible we are moving a
request from one cfqq to another in request merge case. In such case, adjusting
the fifo list order doesn't make sense and is impossible if we don't iterate
the whole fifo list.

My test does hit one case the two cfqq are different, but didn't cause kernel
crash, maybe it's because fifo list isn't used frequently. Anyway, from the
code logic, this is buggy.

I thought we can re-enable the recusive merge logic after this is fixed.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index fb52df9744f..e62e9205b80 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1973,7 +1973,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
 	 * reposition in fifo if next is older than rq
 	 */
 	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
-	    time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
+	    time_before(rq_fifo_time(next), rq_fifo_time(rq)) &&
+	    cfqq == RQ_CFQQ(next)) {
 		list_move(&rq->queuelist, &next->queuelist);
 		rq_set_fifo_time(rq, rq_fifo_time(next));
 	}
-- 
cgit v1.2.3


From bee0393cc12b6d8f10e884e555a095e050e0b2b9 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fusionio.com>
Date: Fri, 9 Nov 2012 08:44:27 +0100
Subject: block: recursive merge requests

In a workload, thread 1 accesses a, a+2, ..., thread 2 accesses a+1, a+3,....
When the requests are flushed to queue, a and a+1 are merged to (a, a+1), a+2
and a+3 too to (a+2, a+3), but (a, a+1) and (a+2, a+3) aren't merged.

If we do recursive merge for such interleave access, some workloads throughput
get improvement. A recent worload I'm checking on is swap, below change
boostes the throughput around 5% ~ 10%.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/elevator.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/elevator.c b/block/elevator.c
index 9b1d42b62f2..9edba1b8323 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -458,6 +458,7 @@ static bool elv_attempt_insert_merge(struct request_queue *q,
 				     struct request *rq)
 {
 	struct request *__rq;
+	bool ret;
 
 	if (blk_queue_nomerges(q))
 		return false;
@@ -471,14 +472,21 @@ static bool elv_attempt_insert_merge(struct request_queue *q,
 	if (blk_queue_noxmerges(q))
 		return false;
 
+	ret = false;
 	/*
 	 * See if our hash lookup can find a potential backmerge.
 	 */
-	__rq = elv_rqhash_find(q, blk_rq_pos(rq));
-	if (__rq && blk_attempt_req_merge(q, __rq, rq))
-		return true;
+	while (1) {
+		__rq = elv_rqhash_find(q, blk_rq_pos(rq));
+		if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
+			break;
 
-	return false;
+		/* The merged request could be merged with others, try again */
+		ret = true;
+		rq = __rq;
+	}
+
+	return ret;
 }
 
 void elv_merged_request(struct request_queue *q, struct request *rq, int type)
-- 
cgit v1.2.3


From c304a51bf42a7ce48f430a371e1213472c89e13d Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <elezegarcia@gmail.com>
Date: Sat, 10 Nov 2012 10:39:44 +0100
Subject: block: use NUMA_NO_NODE instead of -1

Signed-off-by: Ezequiel Garcia <elezegarcia@gmail.com>

Modified by me to cover blk_init_queue() as well.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 4 ++--
 block/genhd.c    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 3c95c4d6e31..ee0e5cafa85 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -549,7 +549,7 @@ void blk_exit_rl(struct request_list *rl)
 
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
-	return blk_alloc_queue_node(gfp_mask, -1);
+	return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
@@ -660,7 +660,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node);
 
 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
 {
-	return blk_init_queue_node(rfn, lock, -1);
+	return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
 }
 EXPORT_SYMBOL(blk_init_queue);
 
diff --git a/block/genhd.c b/block/genhd.c
index 6cace663a80..2a6fdf539a6 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1245,7 +1245,7 @@ EXPORT_SYMBOL(blk_lookup_devt);
 
 struct gendisk *alloc_disk(int minors)
 {
-	return alloc_disk_node(minors, -1);
+	return alloc_disk_node(minors, NUMA_NO_NODE);
 }
 EXPORT_SYMBOL(alloc_disk);
 
-- 
cgit v1.2.3


From 3f3299d5c0268d6cc3f47b446e8aca436e4a5651 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 28 Nov 2012 13:42:38 +0100
Subject: block: Rename queue dead flag

QUEUE_FLAG_DEAD is used to indicate that queuing new requests must
stop. After this flag has been set queue draining starts. However,
during the queue draining phase it is still safe to invoke the
queue's request_fn, so QUEUE_FLAG_DYING is a better name for this
flag.

This patch has been generated by running the following command
over the kernel source tree:

git grep -lEw 'blk_queue_dead|QUEUE_FLAG_DEAD' |
    xargs sed -i.tmp -e 's/blk_queue_dead/blk_queue_dying/g'      \
        -e 's/QUEUE_FLAG_DEAD/QUEUE_FLAG_DYING/g';                \
sed -i.tmp -e "s/QUEUE_FLAG_DYING$(printf \\t)*5/QUEUE_FLAG_DYING$(printf \\t)5/g" \
    include/linux/blkdev.h;                                       \
sed -i.tmp -e 's/ DEAD/ DYING/g' -e 's/dead queue/a dying queue/' \
    -e 's/Dead queue/A dying queue/' block/blk-core.c

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: James Bottomley <JBottomley@Parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Chanho Min <chanho.min@lge.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c   |  2 +-
 block/blk-core.c     | 26 +++++++++++++-------------
 block/blk-exec.c     |  2 +-
 block/blk-sysfs.c    |  4 ++--
 block/blk-throttle.c |  2 +-
 block/blk.h          |  2 +-
 6 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'block')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d0b770391ad..5dea4e8dbc5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -231,7 +231,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 	 * we shouldn't allow anything to go through for a bypassing queue.
 	 */
 	if (unlikely(blk_queue_bypass(q)))
-		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
+		return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
 	return __blkg_lookup_create(blkcg, q, NULL);
 }
 EXPORT_SYMBOL_GPL(blkg_lookup_create);
diff --git a/block/blk-core.c b/block/blk-core.c
index ee0e5cafa85..1a95272cca5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -473,20 +473,20 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
  *
- * Mark @q DEAD, drain all pending requests, destroy and put it.  All
+ * Mark @q DYING, drain all pending requests, destroy and put it.  All
  * future requests will be failed immediately with -ENODEV.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
 	spinlock_t *lock = q->queue_lock;
 
-	/* mark @q DEAD, no new request or merges will be allowed afterwards */
+	/* mark @q DYING, no new request or merges will be allowed afterwards */
 	mutex_lock(&q->sysfs_lock);
-	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
 	spin_lock_irq(lock);
 
 	/*
-	 * Dead queue is permanently in bypass mode till released.  Note
+	 * A dying queue is permanently in bypass mode till released.  Note
 	 * that, unlike blk_queue_bypass_start(), we aren't performing
 	 * synchronize_rcu() after entering bypass mode to avoid the delay
 	 * as some drivers create and destroy a lot of queues while
@@ -499,11 +499,11 @@ void blk_cleanup_queue(struct request_queue *q)
 
 	queue_flag_set(QUEUE_FLAG_NOMERGES, q);
 	queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
-	queue_flag_set(QUEUE_FLAG_DEAD, q);
+	queue_flag_set(QUEUE_FLAG_DYING, q);
 	spin_unlock_irq(lock);
 	mutex_unlock(&q->sysfs_lock);
 
-	/* drain all requests queued before DEAD marking */
+	/* drain all requests queued before DYING marking */
 	blk_drain_queue(q, true);
 
 	/* @q won't process any more request, flush async actions */
@@ -716,7 +716,7 @@ EXPORT_SYMBOL(blk_init_allocated_queue);
 
 bool blk_get_queue(struct request_queue *q)
 {
-	if (likely(!blk_queue_dead(q))) {
+	if (likely(!blk_queue_dying(q))) {
 		__blk_get_queue(q);
 		return true;
 	}
@@ -870,7 +870,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	int may_queue;
 
-	if (unlikely(blk_queue_dead(q)))
+	if (unlikely(blk_queue_dying(q)))
 		return NULL;
 
 	may_queue = elv_may_queue(q, rw_flags);
@@ -1050,7 +1050,7 @@ retry:
 	if (rq)
 		return rq;
 
-	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
+	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
 		blk_put_rl(rl);
 		return NULL;
 	}
@@ -1910,7 +1910,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 		return -EIO;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock_irqrestore(q->queue_lock, flags);
 		return -ENODEV;
 	}
@@ -2885,9 +2885,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 	trace_block_unplug(q, depth, !from_schedule);
 
 	/*
-	 * Don't mess with dead queue.
+	 * Don't mess with a dying queue.
 	 */
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock(q->queue_lock);
 		return;
 	}
@@ -2996,7 +2996,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		/*
 		 * Short-circuit if @q is dead
 		 */
-		if (unlikely(blk_queue_dead(q))) {
+		if (unlikely(blk_queue_dying(q))) {
 			__blk_end_request_all(rq, -ENODEV);
 			continue;
 		}
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 8b6dc5bd4dd..4aec98df7ba 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -60,7 +60,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
 	spin_lock_irq(q->queue_lock);
 
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		rq->errors = -ENXIO;
 		if (rq->end_io)
 			rq->end_io(rq, rq->errors);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ce620460882..788147797a7 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -466,7 +466,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 	if (!entry->show)
 		return -EIO;
 	mutex_lock(&q->sysfs_lock);
-	if (blk_queue_dead(q)) {
+	if (blk_queue_dying(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
@@ -488,7 +488,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 
 	q = container_of(kobj, struct request_queue, kobj);
 	mutex_lock(&q->sysfs_lock);
-	if (blk_queue_dead(q)) {
+	if (blk_queue_dying(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a9664fa0b60..31146225f3d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -302,7 +302,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
 		/* if %NULL and @q is alive, fall back to root_tg */
 		if (!IS_ERR(blkg))
 			tg = blkg_to_tg(blkg);
-		else if (!blk_queue_dead(q))
+		else if (!blk_queue_dying(q))
 			tg = td_root_tg(td);
 	}
 
diff --git a/block/blk.h b/block/blk.h
index ca51543b248..2218a8a7829 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -96,7 +96,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			q->flush_queue_delayed = 1;
 			return NULL;
 		}
-		if (unlikely(blk_queue_dead(q)) ||
+		if (unlikely(blk_queue_dying(q)) ||
 		    !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
-- 
cgit v1.2.3


From 807592a4fafba1fea6e98b9cf1fb02b7c38fb24c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 28 Nov 2012 13:43:38 +0100
Subject: block: Let blk_drain_queue() caller obtain the queue lock

Let the caller of blk_drain_queue() obtain the queue lock to improve
readability of the patch called "Avoid that request_fn is invoked on
a dead queue".

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: James Bottomley <JBottomley@Parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Chanho Min <chanho.min@lge.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 1a95272cca5..a182b586b06 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -349,7 +349,7 @@ void blk_put_queue(struct request_queue *q)
 EXPORT_SYMBOL(blk_put_queue);
 
 /**
- * blk_drain_queue - drain requests from request_queue
+ * __blk_drain_queue - drain requests from request_queue
  * @q: queue to drain
  * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV
  *
@@ -357,15 +357,17 @@ EXPORT_SYMBOL(blk_put_queue);
  * If not, only ELVPRIV requests are drained.  The caller is responsible
  * for ensuring that no new requests which need to be drained are queued.
  */
-void blk_drain_queue(struct request_queue *q, bool drain_all)
+static void __blk_drain_queue(struct request_queue *q, bool drain_all)
+	__releases(q->queue_lock)
+	__acquires(q->queue_lock)
 {
 	int i;
 
+	lockdep_assert_held(q->queue_lock);
+
 	while (true) {
 		bool drain = false;
 
-		spin_lock_irq(q->queue_lock);
-
 		/*
 		 * The caller might be trying to drain @q before its
 		 * elevator is initialized.
@@ -401,11 +403,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
 			}
 		}
 
-		spin_unlock_irq(q->queue_lock);
-
 		if (!drain)
 			break;
+
+		spin_unlock_irq(q->queue_lock);
+
 		msleep(10);
+
+		spin_lock_irq(q->queue_lock);
 	}
 
 	/*
@@ -416,13 +421,9 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
 	if (q->request_fn) {
 		struct request_list *rl;
 
-		spin_lock_irq(q->queue_lock);
-
 		blk_queue_for_each_rl(rl, q)
 			for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
 				wake_up_all(&rl->wait[i]);
-
-		spin_unlock_irq(q->queue_lock);
 	}
 }
 
@@ -446,7 +447,10 @@ void blk_queue_bypass_start(struct request_queue *q)
 	spin_unlock_irq(q->queue_lock);
 
 	if (drain) {
-		blk_drain_queue(q, false);
+		spin_lock_irq(q->queue_lock);
+		__blk_drain_queue(q, false);
+		spin_unlock_irq(q->queue_lock);
+
 		/* ensure blk_queue_bypass() is %true inside RCU read lock */
 		synchronize_rcu();
 	}
@@ -504,7 +508,9 @@ void blk_cleanup_queue(struct request_queue *q)
 	mutex_unlock(&q->sysfs_lock);
 
 	/* drain all requests queued before DYING marking */
-	blk_drain_queue(q, true);
+	spin_lock_irq(lock);
+	__blk_drain_queue(q, true);
+	spin_unlock_irq(lock);
 
 	/* @q won't process any more request, flush async actions */
 	del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
-- 
cgit v1.2.3


From c246e80d86736312933646896c4157daf511dadc Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 6 Dec 2012 14:32:01 +0100
Subject: block: Avoid that request_fn is invoked on a dead queue

A block driver may start cleaning up resources needed by its
request_fn as soon as blk_cleanup_queue() finished, so request_fn
must not be invoked after draining finished. This is important
when blk_run_queue() is invoked without any requests in progress.
As an example, if blk_drain_queue() and scsi_run_queue() run in
parallel, blk_drain_queue() may have finished all requests after
scsi_run_queue() has taken a SCSI device off the starved list but
before that last function has had a chance to run the queue.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: James Bottomley <JBottomley@Parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Chanho Min <chanho.min@lge.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 31 +++++++++++++++++++++++++++----
 block/blk-exec.c |  2 +-
 block/blk.h      |  2 ++
 3 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index a182b586b06..f52d05ff5d2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -292,6 +292,25 @@ void blk_sync_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
+/**
+ * __blk_run_queue_uncond - run a queue whether or not it has been stopped
+ * @q:	The queue to run
+ *
+ * Description:
+ *    Invoke request handling on a queue if there are any pending requests.
+ *    May be used to restart request handling after a request has completed.
+ *    This variant runs the queue whether or not the queue has been
+ *    stopped. Must be called with the queue lock held and interrupts
+ *    disabled. See also @blk_run_queue.
+ */
+inline void __blk_run_queue_uncond(struct request_queue *q)
+{
+	if (unlikely(blk_queue_dead(q)))
+		return;
+
+	q->request_fn(q);
+}
+
 /**
  * __blk_run_queue - run a single device queue
  * @q:	The queue to run
@@ -305,7 +324,7 @@ void __blk_run_queue(struct request_queue *q)
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
-	q->request_fn(q);
+	__blk_run_queue_uncond(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -477,8 +496,8 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
  *
- * Mark @q DYING, drain all pending requests, destroy and put it.  All
- * future requests will be failed immediately with -ENODEV.
+ * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
+ * put it.  All future requests will be failed immediately with -ENODEV.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
@@ -507,9 +526,13 @@ void blk_cleanup_queue(struct request_queue *q)
 	spin_unlock_irq(lock);
 	mutex_unlock(&q->sysfs_lock);
 
-	/* drain all requests queued before DYING marking */
+	/*
+	 * Drain all requests queued before DYING marking. Set DEAD flag to
+	 * prevent that q->request_fn() gets invoked after draining finished.
+	 */
 	spin_lock_irq(lock);
 	__blk_drain_queue(q, true);
+	queue_flag_set(QUEUE_FLAG_DEAD, q);
 	spin_unlock_irq(lock);
 
 	/* @q won't process any more request, flush async actions */
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 4aec98df7ba..1320e74d79b 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -72,7 +72,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	__blk_run_queue(q);
 	/* the queue is stopped so it won't be run */
 	if (rq->cmd_type == REQ_TYPE_PM_RESUME)
-		q->request_fn(q);
+		__blk_run_queue_uncond(q);
 	spin_unlock_irq(q->queue_lock);
 }
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
diff --git a/block/blk.h b/block/blk.h
index 2218a8a7829..47fdfdd4152 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -145,6 +145,8 @@ int blk_try_merge(struct request *rq, struct bio *bio);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
+void __blk_run_queue_uncond(struct request_queue *q);
+
 int blk_dev_init(void);
 
 
-- 
cgit v1.2.3


From 704605711ef048a7c6ad2ec599f15d2e0baf86b2 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 28 Nov 2012 13:45:56 +0100
Subject: block: Avoid scheduling delayed work on a dead queue

Running a queue must continue after it has been marked dying until
it has been marked dead. So the function blk_run_queue_async() must
not schedule delayed work after blk_cleanup_queue() has marked a queue
dead. Hence add a test for that queue state in blk_run_queue_async()
and make sure that queue_unplugged() invokes that function with the
queue lock held. This avoids that the queue state can change after
it has been tested and before mod_delayed_work() is invoked. Drop
the queue dying test in queue_unplugged() since it is now
superfluous: __blk_run_queue() already tests whether or not the
queue is dead.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 33 +++++++++------------------------
 1 file changed, 9 insertions(+), 24 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index f52d05ff5d2..9fb23537c7a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -219,12 +219,13 @@ static void blk_delay_work(struct work_struct *work)
  * Description:
  *   Sometimes queueing needs to be postponed for a little while, to allow
  *   resources to come back. This function will make sure that queueing is
- *   restarted around the specified time.
+ *   restarted around the specified time. Queue lock must be held.
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
-	queue_delayed_work(kblockd_workqueue, &q->delay_work,
-				msecs_to_jiffies(msecs));
+	if (likely(!blk_queue_dead(q)))
+		queue_delayed_work(kblockd_workqueue, &q->delay_work,
+				   msecs_to_jiffies(msecs));
 }
 EXPORT_SYMBOL(blk_delay_queue);
 
@@ -334,11 +335,11 @@ EXPORT_SYMBOL(__blk_run_queue);
  *
  * Description:
  *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- *    of us.
+ *    of us. The caller must hold the queue lock.
  */
 void blk_run_queue_async(struct request_queue *q)
 {
-	if (likely(!blk_queue_stopped(q)))
+	if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
 		mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
 EXPORT_SYMBOL(blk_run_queue_async);
@@ -2913,27 +2914,11 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 {
 	trace_block_unplug(q, depth, !from_schedule);
 
-	/*
-	 * Don't mess with a dying queue.
-	 */
-	if (unlikely(blk_queue_dying(q))) {
-		spin_unlock(q->queue_lock);
-		return;
-	}
-
-	/*
-	 * If we are punting this to kblockd, then we can safely drop
-	 * the queue_lock before waking kblockd (which needs to take
-	 * this lock).
-	 */
-	if (from_schedule) {
-		spin_unlock(q->queue_lock);
+	if (from_schedule)
 		blk_run_queue_async(q);
-	} else {
+	else
 		__blk_run_queue(q);
-		spin_unlock(q->queue_lock);
-	}
-
+	spin_unlock(q->queue_lock);
 }
 
 static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
-- 
cgit v1.2.3


From 24faf6f604efe18236bded4303009fc252913bf0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 28 Nov 2012 13:46:45 +0100
Subject: block: Make blk_cleanup_queue() wait until request_fn finished

Some request_fn implementations, e.g. scsi_request_fn(), unlock
the queue lock internally. This may result in multiple threads
executing request_fn for the same queue simultaneously. Keep
track of the number of active request_fn calls and make sure that
blk_cleanup_queue() waits until all active request_fn invocations
have finished. A block driver may start cleaning up resources
needed by its request_fn as soon as blk_cleanup_queue() finished,
so blk_cleanup_queue() must wait for all outstanding request_fn
invocations to finish.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reported-by: Chanho Min <chanho.min@lge.com>
Cc: James Bottomley <JBottomley@Parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 9fb23537c7a..473015ee28a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -309,7 +309,16 @@ inline void __blk_run_queue_uncond(struct request_queue *q)
 	if (unlikely(blk_queue_dead(q)))
 		return;
 
+	/*
+	 * Some request_fn implementations, e.g. scsi_request_fn(), unlock
+	 * the queue lock internally. As a result multiple threads may be
+	 * running such a request function concurrently. Keep track of the
+	 * number of active request_fn invocations such that blk_drain_queue()
+	 * can wait until all these request_fn calls have finished.
+	 */
+	q->request_fn_active++;
 	q->request_fn(q);
+	q->request_fn_active--;
 }
 
 /**
@@ -408,6 +417,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 			__blk_run_queue(q);
 
 		drain |= q->nr_rqs_elvpriv;
+		drain |= q->request_fn_active;
 
 		/*
 		 * Unfortunately, requests are queued at and tracked from
-- 
cgit v1.2.3


From 80729beb3326fd682543f4f4ea534df47ab48967 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 28 Nov 2012 13:47:36 +0100
Subject: bsg: Remove unused function bsg_goose_queue()

The function bsg_goose_queue() does not have any in-tree callers,
so let's remove it.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'block')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index deee61fbb74..650f427d915 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -151,19 +151,6 @@ failjob_rls_job:
 	return -ENOMEM;
 }
 
-/*
- * bsg_goose_queue - restart queue in case it was stopped
- * @q: request q to be restarted
- */
-void bsg_goose_queue(struct request_queue *q)
-{
-	if (!q)
-		return;
-
-	blk_run_queue_async(q);
-}
-EXPORT_SYMBOL_GPL(bsg_goose_queue);
-
 /**
  * bsg_request_fn - generic handler for bsg requests
  * @q: request queue to manage
-- 
cgit v1.2.3


From 5f6f38dbb0fc8518b19542b0ad0739a8a95f05a1 Mon Sep 17 00:00:00 2001
From: Diego Calleja <diegocg@gmail.com>
Date: Mon, 3 Dec 2012 16:04:21 +0100
Subject: partitions: enable EFI/GPT support by default

The Kconfig currently enables MSDOS partitions by default because they
are assumed to be essential, but it's necessary to enable "advanced
partition selection" in order to get GPT support. IMO GPT partitions
are becoming common enought to deserve the same treatment MSDOS
partitions get.

(Side note: I got bit by a disk that had MSDOS and GPT partition
tables, but for some reason the MSDOS table was different from the
GPT one. I was stupid enought to disable "advanced partition
selection" in my .config, which disabled GPT partitioning and made
my btrfs pool unbootable because it couldn't find the partitions)

Signed-off-by: Diego Calleja <diegocg@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partitions/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig
index cb5f0a3f1b0..75a54e1adbb 100644
--- a/block/partitions/Kconfig
+++ b/block/partitions/Kconfig
@@ -234,8 +234,8 @@ config KARMA_PARTITION
 	  uses a proprietary partition table.
 
 config EFI_PARTITION
-	bool "EFI GUID Partition support"
-	depends on PARTITION_ADVANCED
+	bool "EFI GUID Partition support" if PARTITION_ADVANCED
+	default y
 	select CRC32
 	help
 	  Say Y here if you would like to use hard disks under Linux which
-- 
cgit v1.2.3


From 75274551c81796b636c5acb0c2597dec7ec2e6c4 Mon Sep 17 00:00:00 2001
From: xiaobing tu <xiaobing.tu@intel.com>
Date: Sun, 9 Dec 2012 19:19:23 +0100
Subject: deadline: Allow 0ms deadline latency, increase the read speed

Change a timer compare from after to after-equals, thus allowing
0 timeout and making deadline schedule FIFO.

Signed-off-by: xiaobing tu <xiaobing.tu@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/deadline-iosched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 599b12e5380..90037b5eb17 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -230,7 +230,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 	/*
 	 * rq is expired!
 	 */
-	if (time_after(jiffies, rq_fifo_time(rq)))
+	if (time_after_eq(jiffies, rq_fifo_time(rq)))
 		return 1;
 
 	return 0;
-- 
cgit v1.2.3


From 8dd2cb7e880d2f77fba53b523c99133ad5054cfd Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Fri, 14 Dec 2012 11:15:36 +0800
Subject: block: discard granularity might not be power of 2

In MD raid case, discard granularity might not be power of 2, for example, a
4-disk raid5 has 3*chunk_size discard granularity. Correct the calculation for
such cases.

Reported-by: Neil Brown <neilb@suse.de>
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c      | 23 +++++++++++++----------
 block/blk-settings.c |  6 +++---
 2 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'block')

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9373b58dfab..5677fd33d7d 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -43,8 +43,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = REQ_WRITE | REQ_DISCARD;
-	unsigned int max_discard_sectors;
-	unsigned int granularity, alignment, mask;
+	sector_t max_discard_sectors;
+	sector_t granularity, alignment;
 	struct bio_batch bb;
 	struct bio *bio;
 	int ret = 0;
@@ -57,15 +57,16 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
 	granularity = max(q->limits.discard_granularity >> 9, 1U);
-	mask = granularity - 1;
-	alignment = (bdev_discard_alignment(bdev) >> 9) & mask;
+	alignment = bdev_discard_alignment(bdev) >> 9;
+	alignment = sector_div(alignment, granularity);
 
 	/*
 	 * Ensure that max_discard_sectors is of the proper
 	 * granularity, so that requests stay aligned after a split.
 	 */
 	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
-	max_discard_sectors = round_down(max_discard_sectors, granularity);
+	sector_div(max_discard_sectors, granularity);
+	max_discard_sectors *= granularity;
 	if (unlikely(!max_discard_sectors)) {
 		/* Avoid infinite loop below. Being cautious never hurts. */
 		return -EOPNOTSUPP;
@@ -83,7 +84,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	while (nr_sects) {
 		unsigned int req_sects;
-		sector_t end_sect;
+		sector_t end_sect, tmp;
 
 		bio = bio_alloc(gfp_mask, 1);
 		if (!bio) {
@@ -98,10 +99,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		 * misaligned, stop the discard at the previous aligned sector.
 		 */
 		end_sect = sector + req_sects;
-		if (req_sects < nr_sects && (end_sect & mask) != alignment) {
-			end_sect =
-				round_down(end_sect - alignment, granularity)
-				+ alignment;
+		tmp = end_sect;
+		if (req_sects < nr_sects &&
+		    sector_div(tmp, granularity) != alignment) {
+			end_sect = end_sect - alignment;
+			sector_div(end_sect, granularity);
+			end_sect = end_sect * granularity + alignment;
 			req_sects = end_sect - sector;
 		}
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 779bb7646bc..c50ecf0ea3b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -611,7 +611,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			bottom = b->discard_granularity + alignment;
 
 			/* Verify that top and bottom intervals line up */
-			if (max(top, bottom) & (min(top, bottom) - 1))
+			if ((max(top, bottom) % min(top, bottom)) != 0)
 				t->discard_misaligned = 1;
 		}
 
@@ -619,8 +619,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 						      b->max_discard_sectors);
 		t->discard_granularity = max(t->discard_granularity,
 					     b->discard_granularity);
-		t->discard_alignment = lcm(t->discard_alignment, alignment) &
-			(t->discard_granularity - 1);
+		t->discard_alignment = lcm(t->discard_alignment, alignment) %
+			t->discard_granularity;
 	}
 
 	return ret;
-- 
cgit v1.2.3


From 0cfbcafcae8b7364b5fa96c2b26ccde7a3a296a9 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Fri, 14 Dec 2012 11:15:51 +0800
Subject: block: add plug for blkdev_issue_discard

Last post of this patch appears lost, so I resend this.

Now discard merge works, add plug for blkdev_issue_discard. This will help
discard request merge especially for raid0 case. In raid0, a big discard
request is split to small requests, and if correct plug is added, such small
requests can be merged in low layer.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'block')

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 5677fd33d7d..b3a1f2b70b3 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -48,6 +48,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	struct bio_batch bb;
 	struct bio *bio;
 	int ret = 0;
+	struct blk_plug plug;
 
 	if (!q)
 		return -ENXIO;
@@ -82,6 +83,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	bb.flags = 1 << BIO_UPTODATE;
 	bb.wait = &wait;
 
+	blk_start_plug(&plug);
 	while (nr_sects) {
 		unsigned int req_sects;
 		sector_t end_sect, tmp;
@@ -120,6 +122,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		atomic_inc(&bb.done);
 		submit_bio(type, bio);
 	}
+	blk_finish_plug(&plug);
 
 	/* Wait for bios in-flight */
 	if (!atomic_dec_and_test(&bb.done))
-- 
cgit v1.2.3


From cbae8d45d61f3a8c155caf267d01e5e0f0b2f4b7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 14 Dec 2012 20:49:27 +0100
Subject: block: export block_unplug tracepoint

This allows stacked devices (like md/raid5) to provide blktrace
tracing, including unplug events.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 473015ee28a..c973249d68c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -40,6 +40,7 @@
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
 
 DEFINE_IDA(blk_queue_ida);
 
-- 
cgit v1.2.3