From 00380a404fc4235e9b8b39598138bd3223a27b8a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 23 Mar 2012 09:58:54 +0100
Subject: block: blk_alloc_queue_node(): use caller's GFP flags instead of
 GFP_KERNEL

We should use the GFP flags that the caller specified instead of picking
our own.  All the callers specify GFP_KERNEL so this doesn't make a
difference to how the kernel runs, it's just a cleanup.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 3a78b00edd71..414e8224588f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -483,7 +483,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (!q)
 		return NULL;
 
-	q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
+	q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
 	if (q->id < 0)
 		goto fail_q;
 
-- 
cgit v1.2.3


From 8bcb6c7d48eb341b1f49f814cdcbe05eb6f15680 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 30 Mar 2012 12:33:28 +0200
Subject: block: use lockdep_assert_held for queue locking

Instead of an ugly open coded variant.

Cc: axboe@kernel.dk
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-throttle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 5eed6a76721d..f2ddb94626bd 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1218,7 +1218,7 @@ void blk_throtl_drain(struct request_queue *q)
 	struct bio_list bl;
 	struct bio *bio;
 
-	WARN_ON_ONCE(!queue_is_locked(q));
+	queue_lockdep_assert_held(q);
 
 	bio_list_init(&bl);
 
-- 
cgit v1.2.3


From 5bf14c0727a07ded1bd9fa6d77923d7e6dc32833 Mon Sep 17 00:00:00 2001
From: Tao Ma <boyu.mt@taobao.com>
Date: Sun, 1 Apr 2012 14:33:39 -0700
Subject: block: Make cfq_target_latency tunable through sysfs.

In cfq, when we calculate a time slice for a process(or a cfqq to
be precise), we have to consider the cfq_target_latency so that all the
sync request have an estimated latency(300ms) and it is controlled by
cfq_target_latency. But in some hadoop test, we have found that if
there are many processes doing sequential read(24 for example), the
throughput is bad because every process can only work for about 25ms
and the cfqq is switched. That leads to a higher disk seek. We can
achive the good throughput by setting low_latency=0, but then some
read's latency is too much for the application.

So this patch makes cfq_target_latency tunable through sysfs so that
we can tune it and find some magic number which is not bad for both
the throughput and the read latency.

Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 457295253566..3c38536bd52c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -295,6 +295,7 @@ struct cfq_data {
 	unsigned int cfq_slice_idle;
 	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
+	unsigned int cfq_target_latency;
 
 	/*
 	 * Fallback dummy cfqq for extreme OOM conditions
@@ -604,7 +605,7 @@ cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
 	struct cfq_rb_root *st = &cfqd->grp_service_tree;
 
-	return cfq_target_latency * cfqg->weight / st->total_weight;
+	return cfqd->cfq_target_latency * cfqg->weight / st->total_weight;
 }
 
 static inline unsigned
@@ -2271,7 +2272,8 @@ new_workload:
 		 * to have higher weight. A more accurate thing would be to
 		 * calculate system wide asnc/sync ratio.
 		 */
-		tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg);
+		tmp = cfqd->cfq_target_latency *
+			cfqg_busy_async_queues(cfqd, cfqg);
 		tmp = tmp/cfqd->busy_queues;
 		slice = min_t(unsigned, slice, tmp);
 
@@ -3737,6 +3739,7 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->cfq_back_penalty = cfq_back_penalty;
 	cfqd->cfq_slice[0] = cfq_slice_async;
 	cfqd->cfq_slice[1] = cfq_slice_sync;
+	cfqd->cfq_target_latency = cfq_target_latency;
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
 	cfqd->cfq_group_idle = cfq_group_idle;
@@ -3788,6 +3791,7 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
 SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
+SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -3821,6 +3825,7 @@ STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
 		UINT_MAX, 0);
 STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
+STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -3838,6 +3843,7 @@ static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(slice_idle),
 	CFQ_ATTR(group_idle),
 	CFQ_ATTR(low_latency),
+	CFQ_ATTR(target_latency),
 	__ATTR_NULL
 };
 
-- 
cgit v1.2.3


From 1b2e19f17ed327af6add02978efdf354e4f8e4df Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fusionio.com>
Date: Fri, 6 Apr 2012 11:37:47 -0600
Subject: block: make auto block plug flush threshold per-disk based

We do auto block plug flush to reduce latency, the threshold is 16
requests. This works well if the task is accessing one or two drives.
The problem is if the task is accessing a raid 0 device and the raid
disk number is big, say 8 or 16, 16/8 = 2 or 16/16=1, we will have
heavy lock contention.

This patch makes the threshold per-disk based. The latency should be
still ok accessing one or two drives. The setup with application
accessing a lot of drives in the meantime uaually is big machine,
avoiding lock contention is more important, because any contention
will actually increase latency.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 414e8224588f..1f61b74867e4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1277,7 +1277,8 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 	list_for_each_entry_reverse(rq, &plug->list, queuelist) {
 		int el_ret;
 
-		(*request_count)++;
+		if (rq->q == q)
+			(*request_count)++;
 
 		if (rq->q != q || !blk_rq_merge_ok(rq, bio))
 			continue;
-- 
cgit v1.2.3