Blame - fs/bio.c - kernel/linux-linaro-stable.git

blob: 1628917e262a43e4ac99d3cea88bccbd4f9ccf22 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Jens Axboe	0fe2347	2006-09-04 15:41:16 +0200	[diff] [blame]	2	* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License version 2 as
				6	* published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	* GNU General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public Licens
				14	* along with this program; if not, write to the Free Software
				15	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
				16	*
				17	*/
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
				20	#include <linux/bio.h>
				21	#include <linux/blkdev.h>
Kent Overstreet	a27bb33	2013-05-07 16:19:08 -0700	[diff] [blame]	22	#include <linux/uio.h>
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	23	#include <linux/iocontext.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include <linux/slab.h>
				25	#include <linux/init.h>
				26	#include <linux/kernel.h>
Paul Gortmaker	630d9c4	2011-11-16 23:57:37 -0500	[diff] [blame]	27	#include <linux/export.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include <linux/mempool.h>
				29	#include <linux/workqueue.h>
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	30	#include <linux/cgroup.h>
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	31	#include <scsi/sg.h> /* for struct sg_iovec */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	32
Li Zefan	5578213	2009-06-09 13:43:05 +0800	[diff] [blame]	33	#include <trace/events/block.h>
Ingo Molnar	0bfc245	2008-11-26 11:59:56 +0100	[diff] [blame]	34
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	35	/*
				36	* Test patch to inline a certain number of bi_io_vec's inside the bio
				37	* itself, to shrink a bio data allocation from two mempool calls to one
				38	*/
				39	#define BIO_INLINE_VECS 4
				40
Denis ChengRq	6feef53	2008-10-09 08:57:05 +0200	[diff] [blame]	41	static mempool_t *bio_split_pool __read_mostly;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	42
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	43	/*
				44	* if you change this list, also change bvec_alloc or things will
				45	* break badly! cannot be bigger than what you can fit into an
				46	* unsigned short
				47	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	48	#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
Martin K. Petersen	df67714	2011-03-08 08:28:01 +0100	[diff] [blame]	49	static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	50	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
				51	};
				52	#undef BV
				53
				54	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	55	* fs_bio_set is the bio_set containing bio and iovec memory pools used by
				56	* IO code that does not need private memory pools.
				57	*/
Martin K. Petersen	51d654e	2008-06-17 18:59:56 +0200	[diff] [blame]	58	struct bio_set *fs_bio_set;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	59	EXPORT_SYMBOL(fs_bio_set);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	60
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	61	/*
				62	* Our slab pool management
				63	*/
				64	struct bio_slab {
				65	struct kmem_cache *slab;
				66	unsigned int slab_ref;
				67	unsigned int slab_size;
				68	char name[8];
				69	};
				70	static DEFINE_MUTEX(bio_slab_lock);
				71	static struct bio_slab *bio_slabs;
				72	static unsigned int bio_slab_nr, bio_slab_max;
				73
				74	static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
				75	{
				76	unsigned int sz = sizeof(struct bio) + extra_size;
				77	struct kmem_cache *slab = NULL;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	78	struct bio_slab bslab, new_bio_slabs;
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	79	unsigned int new_bio_slab_max;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	80	unsigned int i, entry = -1;
				81
				82	mutex_lock(&bio_slab_lock);
				83
				84	i = 0;
				85	while (i < bio_slab_nr) {
Thiago Farina	f06f135	2010-01-19 14:07:09 +0100	[diff] [blame]	86	bslab = &bio_slabs[i];
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	87
				88	if (!bslab->slab && entry == -1)
				89	entry = i;
				90	else if (bslab->slab_size == sz) {
				91	slab = bslab->slab;
				92	bslab->slab_ref++;
				93	break;
				94	}
				95	i++;
				96	}
				97
				98	if (slab)
				99	goto out_unlock;
				100
				101	if (bio_slab_nr == bio_slab_max && entry == -1) {
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	102	new_bio_slab_max = bio_slab_max << 1;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	103	new_bio_slabs = krealloc(bio_slabs,
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	104	new_bio_slab_max * sizeof(struct bio_slab),
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	105	GFP_KERNEL);
				106	if (!new_bio_slabs)
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	107	goto out_unlock;
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	108	bio_slab_max = new_bio_slab_max;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	109	bio_slabs = new_bio_slabs;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	110	}
				111	if (entry == -1)
				112	entry = bio_slab_nr++;
				113
				114	bslab = &bio_slabs[entry];
				115
				116	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
				117	slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
				118	if (!slab)
				119	goto out_unlock;
				120
Mandeep Singh Baines	80cdc6d	2011-03-22 16:33:54 -0700	[diff] [blame]	121	printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	122	bslab->slab = slab;
				123	bslab->slab_ref = 1;
				124	bslab->slab_size = sz;
				125	out_unlock:
				126	mutex_unlock(&bio_slab_lock);
				127	return slab;
				128	}
				129
				130	static void bio_put_slab(struct bio_set *bs)
				131	{
				132	struct bio_slab *bslab = NULL;
				133	unsigned int i;
				134
				135	mutex_lock(&bio_slab_lock);
				136
				137	for (i = 0; i < bio_slab_nr; i++) {
				138	if (bs->bio_slab == bio_slabs[i].slab) {
				139	bslab = &bio_slabs[i];
				140	break;
				141	}
				142	}
				143
				144	if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
				145	goto out;
				146
				147	WARN_ON(!bslab->slab_ref);
				148
				149	if (--bslab->slab_ref)
				150	goto out;
				151
				152	kmem_cache_destroy(bslab->slab);
				153	bslab->slab = NULL;
				154
				155	out:
				156	mutex_unlock(&bio_slab_lock);
				157	}
				158
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	159	unsigned int bvec_nr_vecs(unsigned short idx)
				160	{
				161	return bvec_slabs[idx].nr_vecs;
				162	}
				163
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	164	void bvec_free(mempool_t pool, struct bio_vec bv, unsigned int idx)
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	165	{
				166	BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
				167
				168	if (idx == BIOVEC_MAX_IDX)
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	169	mempool_free(bv, pool);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	170	else {
				171	struct biovec_slab *bvs = bvec_slabs + idx;
				172
				173	kmem_cache_free(bvs->slab, bv);
				174	}
				175	}
				176
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	177	struct bio_vec bvec_alloc(gfp_t gfp_mask, int nr, unsigned long idx,
				178	mempool_t *pool)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	179	{
				180	struct bio_vec *bvl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	181
				182	/*
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	183	* see comment near bvec_array define!
				184	*/
				185	switch (nr) {
				186	case 1:
				187	*idx = 0;
				188	break;
				189	case 2 ... 4:
				190	*idx = 1;
				191	break;
				192	case 5 ... 16:
				193	*idx = 2;
				194	break;
				195	case 17 ... 64:
				196	*idx = 3;
				197	break;
				198	case 65 ... 128:
				199	*idx = 4;
				200	break;
				201	case 129 ... BIO_MAX_PAGES:
				202	*idx = 5;
				203	break;
				204	default:
				205	return NULL;
				206	}
				207
				208	/*
				209	* idx now points to the pool we want to allocate from. only the
				210	* 1-vec entry pool is mempool backed.
				211	*/
				212	if (*idx == BIOVEC_MAX_IDX) {
				213	fallback:
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	214	bvl = mempool_alloc(pool, gfp_mask);
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	215	} else {
				216	struct biovec_slab bvs = bvec_slabs + idx;
				217	gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT \| __GFP_IO);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	218
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	219	/*
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	220	* Make this allocation restricted and don't dump info on
				221	* allocation failures, since we'll fallback to the mempool
				222	* in case of failure.
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	223	*/
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	224	__gfp_mask \|= __GFP_NOMEMALLOC \| __GFP_NORETRY \| __GFP_NOWARN;
				225
				226	/*
				227	* Try a slab allocation. If this fails and __GFP_WAIT
				228	* is set, retry with the 1-entry mempool
				229	*/
				230	bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
				231	if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
				232	*idx = BIOVEC_MAX_IDX;
				233	goto fallback;
				234	}
				235	}
				236
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	237	return bvl;
				238	}
				239
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	240	static void __bio_free(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	241	{
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	242	bio_disassociate_task(bio);
Jens Axboe	992c5dd	2007-07-18 13:18:08 +0200	[diff] [blame]	243
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	244	if (bio_integrity(bio))
Kent Overstreet	1e2a410f	2012-09-06 15:34:56 -0700	[diff] [blame]	245	bio_integrity_free(bio);
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	246	}
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	247
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	248	static void bio_free(struct bio *bio)
				249	{
				250	struct bio_set *bs = bio->bi_pool;
				251	void *p;
				252
				253	__bio_free(bio);
				254
				255	if (bs) {
Kent Overstreet	a38352e	2012-05-25 13:03:11 -0700	[diff] [blame]	256	if (bio_flagged(bio, BIO_OWNS_VEC))
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	257	bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	258
				259	/*
				260	* If we have front padding, adjust the bio pointer before freeing
				261	*/
				262	p = bio;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	263	p -= bs->front_pad;
				264
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	265	mempool_free(p, bs->bio_pool);
				266	} else {
				267	/* Bio was allocated by bio_kmalloc() */
				268	kfree(bio);
				269	}
Peter Osterlund	3676347	2005-09-06 15:16:42 -0700	[diff] [blame]	270	}
				271
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	272	void bio_init(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	273	{
Jens Axboe	2b94de5	2007-07-18 13:14:03 +0200	[diff] [blame]	274	memset(bio, 0, sizeof(*bio));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	275	bio->bi_flags = 1 << BIO_UPTODATE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	276	atomic_set(&bio->bi_cnt, 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	277	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	278	EXPORT_SYMBOL(bio_init);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	279
				280	/**
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	281	* bio_reset - reinitialize a bio
				282	* @bio: bio to reset
				283	*
				284	* Description:
				285	* After calling bio_reset(), @bio will be in the same state as a freshly
				286	* allocated bio returned bio bio_alloc_bioset() - the only fields that are
				287	* preserved are the ones that are initialized by bio_alloc_bioset(). See
				288	* comment in struct bio.
				289	*/
				290	void bio_reset(struct bio *bio)
				291	{
				292	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
				293
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	294	__bio_free(bio);
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	295
				296	memset(bio, 0, BIO_RESET_BYTES);
				297	bio->bi_flags = flags\|(1 << BIO_UPTODATE);
				298	}
				299	EXPORT_SYMBOL(bio_reset);
				300
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	301	static void bio_alloc_rescue(struct work_struct *work)
				302	{
				303	struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
				304	struct bio *bio;
				305
				306	while (1) {
				307	spin_lock(&bs->rescue_lock);
				308	bio = bio_list_pop(&bs->rescue_list);
				309	spin_unlock(&bs->rescue_lock);
				310
				311	if (!bio)
				312	break;
				313
				314	generic_make_request(bio);
				315	}
				316	}
				317
				318	static void punt_bios_to_rescuer(struct bio_set *bs)
				319	{
				320	struct bio_list punt, nopunt;
				321	struct bio *bio;
				322
				323	/*
				324	* In order to guarantee forward progress we must punt only bios that
				325	* were allocated from this bio_set; otherwise, if there was a bio on
				326	* there for a stacking driver higher up in the stack, processing it
				327	* could require allocating bios from this bio_set, and doing that from
				328	* our own rescuer would be bad.
				329	*
				330	* Since bio lists are singly linked, pop them all instead of trying to
				331	* remove from the middle of the list:
				332	*/
				333
				334	bio_list_init(&punt);
				335	bio_list_init(&nopunt);
				336
				337	while ((bio = bio_list_pop(current->bio_list)))
				338	bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
				339
				340	*current->bio_list = nopunt;
				341
				342	spin_lock(&bs->rescue_lock);
				343	bio_list_merge(&bs->rescue_list, &punt);
				344	spin_unlock(&bs->rescue_lock);
				345
				346	queue_work(bs->rescue_workqueue, &bs->rescue_work);
				347	}
				348
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	349	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	350	* bio_alloc_bioset - allocate a bio for I/O
				351	* @gfp_mask: the GFP_ mask given to the slab allocator
				352	* @nr_iovecs: number of iovecs to pre-allocate
Jaak Ristioja	db18efa	2010-01-15 12:05:07 +0200	[diff] [blame]	353	* @bs: the bio_set to allocate from.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	354	*
				355	* Description:
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	356	* If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
				357	* backed by the @bs's mempool.
				358	*
				359	* When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
				360	* able to allocate a bio. This is due to the mempool guarantees. To make this
				361	* work, callers must never allocate more than 1 bio at a time from this pool.
				362	* Callers that need to allocate more than 1 bio must always submit the
				363	* previously allocated bio for IO before attempting to allocate a new one.
				364	* Failure to do so can cause deadlocks under memory pressure.
				365	*
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	366	* Note that when running under generic_make_request() (i.e. any block
				367	* driver), bios are not submitted until after you return - see the code in
				368	* generic_make_request() that converts recursion into iteration, to prevent
				369	* stack overflows.
				370	*
				371	* This would normally mean allocating multiple bios under
				372	* generic_make_request() would be susceptible to deadlocks, but we have
				373	* deadlock avoidance code that resubmits any blocked bios from a rescuer
				374	* thread.
				375	*
				376	* However, we do not guarantee forward progress for allocations from other
				377	* mempools. Doing multiple allocations from the same mempool under
				378	* generic_make_request() should be avoided - instead, use bio_set's front_pad
				379	* for per bio allocations.
				380	*
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	381	* RETURNS:
				382	* Pointer to new bio on success, NULL on failure.
				383	*/
Al Viro	dd0fc66	2005-10-07 07:46:04 +0100	[diff] [blame]	384	struct bio bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set bs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	385	{
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	386	gfp_t saved_gfp = gfp_mask;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	387	unsigned front_pad;
				388	unsigned inline_vecs;
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	389	unsigned long idx = BIO_POOL_NONE;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	390	struct bio_vec *bvl = NULL;
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	391	struct bio *bio;
				392	void *p;
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	393
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	394	if (!bs) {
				395	if (nr_iovecs > UIO_MAXIOV)
				396	return NULL;
				397
				398	p = kmalloc(sizeof(struct bio) +
				399	nr_iovecs * sizeof(struct bio_vec),
				400	gfp_mask);
				401	front_pad = 0;
				402	inline_vecs = nr_iovecs;
				403	} else {
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	404	/*
				405	* generic_make_request() converts recursion to iteration; this
				406	* means if we're running beneath it, any bios we allocate and
				407	* submit will not be submitted (and thus freed) until after we
				408	* return.
				409	*
				410	* This exposes us to a potential deadlock if we allocate
				411	* multiple bios from the same bio_set() while running
				412	* underneath generic_make_request(). If we were to allocate
				413	* multiple bios (say a stacking block driver that was splitting
				414	* bios), we would deadlock if we exhausted the mempool's
				415	* reserve.
				416	*
				417	* We solve this, and guarantee forward progress, with a rescuer
				418	* workqueue per bio_set. If we go to allocate and there are
				419	* bios on current->bio_list, we first try the allocation
				420	* without __GFP_WAIT; if that fails, we punt those bios we
				421	* would be blocking to the rescuer workqueue before we retry
				422	* with the original gfp_flags.
				423	*/
				424
				425	if (current->bio_list && !bio_list_empty(current->bio_list))
				426	gfp_mask &= ~__GFP_WAIT;
				427
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	428	p = mempool_alloc(bs->bio_pool, gfp_mask);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	429	if (!p && gfp_mask != saved_gfp) {
				430	punt_bios_to_rescuer(bs);
				431	gfp_mask = saved_gfp;
				432	p = mempool_alloc(bs->bio_pool, gfp_mask);
				433	}
				434
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	435	front_pad = bs->front_pad;
				436	inline_vecs = BIO_INLINE_VECS;
				437	}
				438
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	439	if (unlikely(!p))
				440	return NULL;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	441
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	442	bio = p + front_pad;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	443	bio_init(bio);
				444
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	445	if (nr_iovecs > inline_vecs) {
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	446	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	447	if (!bvl && gfp_mask != saved_gfp) {
				448	punt_bios_to_rescuer(bs);
				449	gfp_mask = saved_gfp;
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	450	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	451	}
				452
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	453	if (unlikely(!bvl))
				454	goto err_free;
Kent Overstreet	a38352e	2012-05-25 13:03:11 -0700	[diff] [blame]	455
				456	bio->bi_flags \|= 1 << BIO_OWNS_VEC;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	457	} else if (nr_iovecs) {
				458	bvl = bio->bi_inline_vecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	459	}
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	460
				461	bio->bi_pool = bs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	462	bio->bi_flags \|= idx << BIO_POOL_OFFSET;
				463	bio->bi_max_vecs = nr_iovecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	464	bio->bi_io_vec = bvl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	465	return bio;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	466
				467	err_free:
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	468	mempool_free(p, bs->bio_pool);
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	469	return NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	470	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	471	EXPORT_SYMBOL(bio_alloc_bioset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	472
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	473	void zero_fill_bio(struct bio *bio)
				474	{
				475	unsigned long flags;
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	476	struct bio_vec bv;
				477	struct bvec_iter iter;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	478
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	479	bio_for_each_segment(bv, bio, iter) {
				480	char *data = bvec_kmap_irq(&bv, &flags);
				481	memset(data, 0, bv.bv_len);
				482	flush_dcache_page(bv.bv_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	483	bvec_kunmap_irq(data, &flags);
				484	}
				485	}
				486	EXPORT_SYMBOL(zero_fill_bio);
				487
				488	/**
				489	* bio_put - release a reference to a bio
				490	* @bio: bio to release reference to
				491	*
				492	* Description:
				493	* Put a reference to a &struct bio, either one you have gotten with
Alberto Bertogli	ad0bf11	2009-11-02 11:39:22 +0100	[diff] [blame]	494	* bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	495	**/
				496	void bio_put(struct bio *bio)
				497	{
				498	BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
				499
				500	/*
				501	* last put frees it
				502	*/
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	503	if (atomic_dec_and_test(&bio->bi_cnt))
				504	bio_free(bio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	505	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	506	EXPORT_SYMBOL(bio_put);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	507
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	508	inline int bio_phys_segments(struct request_queue q, struct bio bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	509	{
				510	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
				511	blk_recount_segments(q, bio);
				512
				513	return bio->bi_phys_segments;
				514	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	515	EXPORT_SYMBOL(bio_phys_segments);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	516
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	517	/**
				518	* __bio_clone - clone a bio
				519	* @bio: destination bio
				520	* @bio_src: bio to clone
				521	*
				522	* Clone a &bio. Caller will own the returned bio, but not
				523	* the actual data it points to. Reference count of returned
				524	* bio will be one.
				525	*/
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	526	void __bio_clone(struct bio bio, struct bio bio_src)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	527	{
Kent Overstreet	003b5c5	2013-10-11 15:45:43 -0700	[diff] [blame]	528	if (bio_is_rw(bio_src)) {
				529	struct bio_vec bv;
				530	struct bvec_iter iter;
				531
				532	bio_for_each_segment(bv, bio_src, iter)
				533	bio->bi_io_vec[bio->bi_vcnt++] = bv;
				534	} else if (bio_has_data(bio_src)) {
				535	memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
				536	bio_src->bi_max_vecs * sizeof(struct bio_vec));
				537	bio->bi_vcnt = bio_src->bi_vcnt;
				538	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	539
Jens Axboe	5d84070	2008-01-25 12:44:44 +0100	[diff] [blame]	540	/*
				541	* most users will be overriding ->bi_bdev with a new target,
				542	* so we don't set nor calculate new physical/hw segment counts here
				543	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	544	bio->bi_bdev = bio_src->bi_bdev;
				545	bio->bi_flags \|= 1 << BIO_CLONED;
				546	bio->bi_rw = bio_src->bi_rw;
Kent Overstreet	4550dd6	2013-08-07 14:26:21 -0700	[diff] [blame]	547	bio->bi_iter = bio_src->bi_iter;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	548	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	549	EXPORT_SYMBOL(__bio_clone);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	550
				551	/**
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame^]	552	* bio_clone_bioset - clone a bio
				553	* @bio_src: bio to clone
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	554	* @gfp_mask: allocation priority
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	555	* @bs: bio_set to allocate from
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	556	*
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame^]	557	* Clone bio. Caller will own the returned bio, but not the actual data it
				558	* points to. Reference count of returned bio will be one.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	559	*/
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame^]	560	struct bio bio_clone_bioset(struct bio bio_src, gfp_t gfp_mask,
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	561	struct bio_set *bs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	562	{
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame^]	563	unsigned nr_iovecs = 0;
				564	struct bvec_iter iter;
				565	struct bio_vec bv;
				566	struct bio *bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	567
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame^]	568	/*
				569	* Pre immutable biovecs, __bio_clone() used to just do a memcpy from
				570	* bio_src->bi_io_vec to bio->bi_io_vec.
				571	*
				572	* We can't do that anymore, because:
				573	*
				574	* - The point of cloning the biovec is to produce a bio with a biovec
				575	* the caller can modify: bi_idx and bi_bvec_done should be 0.
				576	*
				577	* - The original bio could've had more than BIO_MAX_PAGES biovecs; if
				578	* we tried to clone the whole thing bio_alloc_bioset() would fail.
				579	* But the clone should succeed as long as the number of biovecs we
				580	* actually need to allocate is fewer than BIO_MAX_PAGES.
				581	*
				582	* - Lastly, bi_vcnt should not be looked at or relied upon by code
				583	* that does not own the bio - reason being drivers don't use it for
				584	* iterating over the biovec anymore, so expecting it to be kept up
				585	* to date (i.e. for clones that share the parent biovec) is just
				586	* asking for trouble and would force extra work on
				587	* __bio_clone_fast() anyways.
				588	*/
				589
				590	bio_for_each_segment(bv, bio_src, iter)
				591	nr_iovecs++;
				592
				593	bio = bio_alloc_bioset(gfp_mask, nr_iovecs, bs);
				594	if (!bio)
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	595	return NULL;
				596
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame^]	597	bio->bi_bdev = bio_src->bi_bdev;
				598	bio->bi_rw = bio_src->bi_rw;
				599	bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
				600	bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	601
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame^]	602	bio_for_each_segment(bv, bio_src, iter)
				603	bio->bi_io_vec[bio->bi_vcnt++] = bv;
				604
				605	if (bio_integrity(bio_src)) {
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	606	int ret;
				607
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame^]	608	ret = bio_integrity_clone(bio, bio_src, gfp_mask);
Li Zefan	059ea33	2009-03-09 10:42:45 +0100	[diff] [blame]	609	if (ret < 0) {
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame^]	610	bio_put(bio);
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	611	return NULL;
Li Zefan	059ea33	2009-03-09 10:42:45 +0100	[diff] [blame]	612	}
Peter Osterlund	3676347	2005-09-06 15:16:42 -0700	[diff] [blame]	613	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	614
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame^]	615	return bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	616	}
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	617	EXPORT_SYMBOL(bio_clone_bioset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	618
				619	/**
				620	* bio_get_nr_vecs - return approx number of vecs
				621	* @bdev: I/O target
				622	*
				623	* Return the approximate number of pages we can send to this target.
				624	* There's no guarantee that you will be able to fit this number of pages
				625	* into a bio, it does not account for dynamic restrictions that vary
				626	* on offset.
				627	*/
				628	int bio_get_nr_vecs(struct block_device *bdev)
				629	{
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	630	struct request_queue *q = bdev_get_queue(bdev);
Bernd Schubert	f908ee9	2012-05-11 16:36:44 +0200	[diff] [blame]	631	int nr_pages;
				632
				633	nr_pages = min_t(unsigned,
Kent Overstreet	5abebfd	2012-02-08 22:07:18 +0100	[diff] [blame]	634	queue_max_segments(q),
				635	queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
Bernd Schubert	f908ee9	2012-05-11 16:36:44 +0200	[diff] [blame]	636
				637	return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
				638
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	639	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	640	EXPORT_SYMBOL(bio_get_nr_vecs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	641
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	642	static int __bio_add_page(struct request_queue q, struct bio bio, struct page
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	643	*page, unsigned int len, unsigned int offset,
Akinobu Mita	34f2fd8	2013-11-18 22:11:42 +0900	[diff] [blame]	644	unsigned int max_sectors)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	645	{
				646	int retried_segments = 0;
				647	struct bio_vec *bvec;
				648
				649	/*
				650	* cloned bio must not modify vec list
				651	*/
				652	if (unlikely(bio_flagged(bio, BIO_CLONED)))
				653	return 0;
				654
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	655	if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	656	return 0;
				657
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	658	/*
				659	* For filesystems with a blocksize smaller than the pagesize
				660	* we will often be called with the same page as last time and
				661	* a consecutive offset. Optimize this special case.
				662	*/
				663	if (bio->bi_vcnt > 0) {
				664	struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
				665
				666	if (page == prev->bv_page &&
				667	offset == prev->bv_offset + prev->bv_len) {
Dmitry Monakhov	1d61658	2010-01-27 22:44:36 +0300	[diff] [blame]	668	unsigned int prev_bv_len = prev->bv_len;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	669	prev->bv_len += len;
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	670
				671	if (q->merge_bvec_fn) {
				672	struct bvec_merge_data bvm = {
Dmitry Monakhov	1d61658	2010-01-27 22:44:36 +0300	[diff] [blame]	673	/* prev_bvec is already charged in
				674	bi_size, discharge it in order to
				675	simulate merging updated prev_bvec
				676	as new bvec. */
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	677	.bi_bdev = bio->bi_bdev,
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	678	.bi_sector = bio->bi_iter.bi_sector,
				679	.bi_size = bio->bi_iter.bi_size -
				680	prev_bv_len,
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	681	.bi_rw = bio->bi_rw,
				682	};
				683
Dmitry Monakhov	8bf8c37	2010-03-03 06:28:06 +0300	[diff] [blame]	684	if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	685	prev->bv_len -= len;
				686	return 0;
				687	}
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	688	}
				689
				690	goto done;
				691	}
				692	}
				693
				694	if (bio->bi_vcnt >= bio->bi_max_vecs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	695	return 0;
				696
				697	/*
				698	* we might lose a segment or two here, but rather that than
				699	* make this too complex.
				700	*/
				701
Martin K. Petersen	8a78362	2010-02-26 00:20:39 -0500	[diff] [blame]	702	while (bio->bi_phys_segments >= queue_max_segments(q)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	703
				704	if (retried_segments)
				705	return 0;
				706
				707	retried_segments = 1;
				708	blk_recount_segments(q, bio);
				709	}
				710
				711	/*
				712	* setup the new entry, we might clear it again later if we
				713	* cannot add the page
				714	*/
				715	bvec = &bio->bi_io_vec[bio->bi_vcnt];
				716	bvec->bv_page = page;
				717	bvec->bv_len = len;
				718	bvec->bv_offset = offset;
				719
				720	/*
				721	* if queue has other restrictions (eg varying max sector size
				722	* depending on offset), it can specify a merge_bvec_fn in the
				723	* queue to get further control
				724	*/
				725	if (q->merge_bvec_fn) {
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	726	struct bvec_merge_data bvm = {
				727	.bi_bdev = bio->bi_bdev,
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	728	.bi_sector = bio->bi_iter.bi_sector,
				729	.bi_size = bio->bi_iter.bi_size,
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	730	.bi_rw = bio->bi_rw,
				731	};
				732
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	733	/*
				734	* merge_bvec_fn() returns number of bytes it can accept
				735	* at this offset
				736	*/
Dmitry Monakhov	8bf8c37	2010-03-03 06:28:06 +0300	[diff] [blame]	737	if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	738	bvec->bv_page = NULL;
				739	bvec->bv_len = 0;
				740	bvec->bv_offset = 0;
				741	return 0;
				742	}
				743	}
				744
				745	/* If we may be able to merge these biovecs, force a recount */
Mikulas Patocka	b8b3e16	2008-08-15 10:15:19 +0200	[diff] [blame]	746	if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	747	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
				748
				749	bio->bi_vcnt++;
				750	bio->bi_phys_segments++;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	751	done:
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	752	bio->bi_iter.bi_size += len;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	753	return len;
				754	}
				755
				756	/**
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	757	* bio_add_pc_page - attempt to add page to bio
Jens Axboe	fddfdea	2006-01-31 15:24:34 +0100	[diff] [blame]	758	* @q: the target queue
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	759	* @bio: destination bio
				760	* @page: page to add
				761	* @len: vec entry length
				762	* @offset: vec entry offset
				763	*
				764	* Attempt to add a page to the bio_vec maplist. This can fail for a
Andreas Gruenbacher	c642808	2011-05-27 14:52:09 +0200	[diff] [blame]	765	* number of reasons, such as the bio being full or target block device
				766	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				767	* so it is always possible to add a single page to an empty bio.
				768	*
				769	* This should only be used by REQ_PC bios.
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	770	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	771	int bio_add_pc_page(struct request_queue q, struct bio bio, struct page *page,
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	772	unsigned int len, unsigned int offset)
				773	{
Martin K. Petersen	ae03bf6	2009-05-22 17:17:50 -0400	[diff] [blame]	774	return __bio_add_page(q, bio, page, len, offset,
				775	queue_max_hw_sectors(q));
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	776	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	777	EXPORT_SYMBOL(bio_add_pc_page);
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	778
				779	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	780	* bio_add_page - attempt to add page to bio
				781	* @bio: destination bio
				782	* @page: page to add
				783	* @len: vec entry length
				784	* @offset: vec entry offset
				785	*
				786	* Attempt to add a page to the bio_vec maplist. This can fail for a
Andreas Gruenbacher	c642808	2011-05-27 14:52:09 +0200	[diff] [blame]	787	* number of reasons, such as the bio being full or target block device
				788	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				789	* so it is always possible to add a single page to an empty bio.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	790	*/
				791	int bio_add_page(struct bio bio, struct page page, unsigned int len,
				792	unsigned int offset)
				793	{
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	794	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
Martin K. Petersen	ae03bf6	2009-05-22 17:17:50 -0400	[diff] [blame]	795	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	796	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	797	EXPORT_SYMBOL(bio_add_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	798
Kent Overstreet	9e88224	2012-09-10 14:41:12 -0700	[diff] [blame]	799	struct submit_bio_ret {
				800	struct completion event;
				801	int error;
				802	};
				803
				804	static void submit_bio_wait_endio(struct bio *bio, int error)
				805	{
				806	struct submit_bio_ret *ret = bio->bi_private;
				807
				808	ret->error = error;
				809	complete(&ret->event);
				810	}
				811
				812	/**
				813	* submit_bio_wait - submit a bio, and wait until it completes
				814	* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
				815	* @bio: The &struct bio which describes the I/O
				816	*
				817	* Simple wrapper around submit_bio(). Returns 0 on success, or the error from
				818	* bio_endio() on failure.
				819	*/
				820	int submit_bio_wait(int rw, struct bio *bio)
				821	{
				822	struct submit_bio_ret ret;
				823
				824	rw \|= REQ_SYNC;
				825	init_completion(&ret.event);
				826	bio->bi_private = &ret;
				827	bio->bi_end_io = submit_bio_wait_endio;
				828	submit_bio(rw, bio);
				829	wait_for_completion(&ret.event);
				830
				831	return ret.error;
				832	}
				833	EXPORT_SYMBOL(submit_bio_wait);
				834
Kent Overstreet	054bdf6	2012-09-28 13:17:55 -0700	[diff] [blame]	835	/**
				836	* bio_advance - increment/complete a bio by some number of bytes
				837	* @bio: bio to advance
				838	* @bytes: number of bytes to complete
				839	*
				840	* This updates bi_sector, bi_size and bi_idx; if the number of bytes to
				841	* complete doesn't align with a bvec boundary, then bv_len and bv_offset will
				842	* be updated on the last bvec as well.
				843	*
				844	* @bio will then represent the remaining, uncompleted portion of the io.
				845	*/
				846	void bio_advance(struct bio *bio, unsigned bytes)
				847	{
				848	if (bio_integrity(bio))
				849	bio_integrity_advance(bio, bytes);
				850
Kent Overstreet	4550dd6	2013-08-07 14:26:21 -0700	[diff] [blame]	851	bio_advance_iter(bio, &bio->bi_iter, bytes);
Kent Overstreet	054bdf6	2012-09-28 13:17:55 -0700	[diff] [blame]	852	}
				853	EXPORT_SYMBOL(bio_advance);
				854
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	855	/**
Kent Overstreet	a078760	2012-09-10 14:03:28 -0700	[diff] [blame]	856	* bio_alloc_pages - allocates a single page for each bvec in a bio
				857	* @bio: bio to allocate pages for
				858	* @gfp_mask: flags for allocation
				859	*
				860	* Allocates pages up to @bio->bi_vcnt.
				861	*
				862	* Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
				863	* freed.
				864	*/
				865	int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
				866	{
				867	int i;
				868	struct bio_vec *bv;
				869
				870	bio_for_each_segment_all(bv, bio, i) {
				871	bv->bv_page = alloc_page(gfp_mask);
				872	if (!bv->bv_page) {
				873	while (--bv >= bio->bi_io_vec)
				874	__free_page(bv->bv_page);
				875	return -ENOMEM;
				876	}
				877	}
				878
				879	return 0;
				880	}
				881	EXPORT_SYMBOL(bio_alloc_pages);
				882
				883	/**
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	884	* bio_copy_data - copy contents of data buffers from one chain of bios to
				885	* another
				886	* @src: source bio list
				887	* @dst: destination bio list
				888	*
				889	* If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
				890	* @src and @dst as linked lists of bios.
				891	*
				892	* Stops when it reaches the end of either @src or @dst - that is, copies
				893	* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
				894	*/
				895	void bio_copy_data(struct bio dst, struct bio src)
				896	{
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	897	struct bvec_iter src_iter, dst_iter;
				898	struct bio_vec src_bv, dst_bv;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	899	void src_p, dst_p;
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	900	unsigned bytes;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	901
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	902	src_iter = src->bi_iter;
				903	dst_iter = dst->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	904
				905	while (1) {
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	906	if (!src_iter.bi_size) {
				907	src = src->bi_next;
				908	if (!src)
				909	break;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	910
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	911	src_iter = src->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	912	}
				913
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	914	if (!dst_iter.bi_size) {
				915	dst = dst->bi_next;
				916	if (!dst)
				917	break;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	918
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	919	dst_iter = dst->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	920	}
				921
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	922	src_bv = bio_iter_iovec(src, src_iter);
				923	dst_bv = bio_iter_iovec(dst, dst_iter);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	924
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	925	bytes = min(src_bv.bv_len, dst_bv.bv_len);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	926
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	927	src_p = kmap_atomic(src_bv.bv_page);
				928	dst_p = kmap_atomic(dst_bv.bv_page);
				929
				930	memcpy(dst_p + dst_bv.bv_offset,
				931	src_p + src_bv.bv_offset,
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	932	bytes);
				933
				934	kunmap_atomic(dst_p);
				935	kunmap_atomic(src_p);
				936
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	937	bio_advance_iter(src, &src_iter, bytes);
				938	bio_advance_iter(dst, &dst_iter, bytes);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	939	}
				940	}
				941	EXPORT_SYMBOL(bio_copy_data);
				942
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	943	struct bio_map_data {
				944	struct bio_vec *iovecs;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	945	struct sg_iovec *sgvecs;
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	946	int nr_sgvecs;
				947	int is_our_pages;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	948	};
				949
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	950	static void bio_set_map_data(struct bio_map_data bmd, struct bio bio,
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	951	struct sg_iovec *iov, int iov_count,
				952	int is_our_pages)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	953	{
				954	memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	955	memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
				956	bmd->nr_sgvecs = iov_count;
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	957	bmd->is_our_pages = is_our_pages;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	958	bio->bi_private = bmd;
				959	}
				960
				961	static void bio_free_map_data(struct bio_map_data *bmd)
				962	{
				963	kfree(bmd->iovecs);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	964	kfree(bmd->sgvecs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	965	kfree(bmd);
				966	}
				967
Dan Carpenter	121f099	2011-11-16 09:21:50 +0100	[diff] [blame]	968	static struct bio_map_data *bio_alloc_map_data(int nr_segs,
				969	unsigned int iov_count,
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	970	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	971	{
Jens Axboe	f3f63c1	2010-10-29 11:46:56 -0600	[diff] [blame]	972	struct bio_map_data *bmd;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	973
Jens Axboe	f3f63c1	2010-10-29 11:46:56 -0600	[diff] [blame]	974	if (iov_count > UIO_MAXIOV)
				975	return NULL;
				976
				977	bmd = kmalloc(sizeof(*bmd), gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	978	if (!bmd)
				979	return NULL;
				980
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	981	bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	982	if (!bmd->iovecs) {
				983	kfree(bmd);
				984	return NULL;
				985	}
				986
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	987	bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	988	if (bmd->sgvecs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	989	return bmd;
				990
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	991	kfree(bmd->iovecs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	992	kfree(bmd);
				993	return NULL;
				994	}
				995
FUJITA Tomonori	aefcc28	2008-08-25 20:36:08 +0200	[diff] [blame]	996	static int __bio_copy_iov(struct bio bio, struct bio_vec iovecs,
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	997	struct sg_iovec *iov, int iov_count,
				998	int to_user, int from_user, int do_free_page)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	999	{
				1000	int ret = 0, i;
				1001	struct bio_vec *bvec;
				1002	int iov_idx = 0;
				1003	unsigned int iov_off = 0;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1004
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1005	bio_for_each_segment_all(bvec, bio, i) {
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1006	char *bv_addr = page_address(bvec->bv_page);
FUJITA Tomonori	aefcc28	2008-08-25 20:36:08 +0200	[diff] [blame]	1007	unsigned int bv_len = iovecs[i].bv_len;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1008
				1009	while (bv_len && iov_idx < iov_count) {
				1010	unsigned int bytes;
Michal Simek	0e0c621	2009-06-10 12:57:07 -0700	[diff] [blame]	1011	char __user *iov_addr;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1012
				1013	bytes = min_t(unsigned int,
				1014	iov[iov_idx].iov_len - iov_off, bv_len);
				1015	iov_addr = iov[iov_idx].iov_base + iov_off;
				1016
				1017	if (!ret) {
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	1018	if (to_user)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1019	ret = copy_to_user(iov_addr, bv_addr,
				1020	bytes);
				1021
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	1022	if (from_user)
				1023	ret = copy_from_user(bv_addr, iov_addr,
				1024	bytes);
				1025
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1026	if (ret)
				1027	ret = -EFAULT;
				1028	}
				1029
				1030	bv_len -= bytes;
				1031	bv_addr += bytes;
				1032	iov_addr += bytes;
				1033	iov_off += bytes;
				1034
				1035	if (iov[iov_idx].iov_len == iov_off) {
				1036	iov_idx++;
				1037	iov_off = 0;
				1038	}
				1039	}
				1040
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1041	if (do_free_page)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1042	__free_page(bvec->bv_page);
				1043	}
				1044
				1045	return ret;
				1046	}
				1047
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1048	/**
				1049	* bio_uncopy_user - finish previously mapped bio
				1050	* @bio: bio being terminated
				1051	*
				1052	* Free pages allocated from bio_copy_user() and write back data
				1053	* to user space in case of a read.
				1054	*/
				1055	int bio_uncopy_user(struct bio *bio)
				1056	{
				1057	struct bio_map_data *bmd = bio->bi_private;
Roland Dreier	35dc248	2013-08-05 17:55:01 -0700	[diff] [blame]	1058	struct bio_vec *bvec;
				1059	int ret = 0, i;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1060
Roland Dreier	35dc248	2013-08-05 17:55:01 -0700	[diff] [blame]	1061	if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
				1062	/*
				1063	* if we're in a workqueue, the request is orphaned, so
				1064	* don't copy into a random user address space, just free.
				1065	*/
				1066	if (current->mm)
				1067	ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
				1068	bmd->nr_sgvecs, bio_data_dir(bio) == READ,
				1069	0, bmd->is_our_pages);
				1070	else if (bmd->is_our_pages)
				1071	bio_for_each_segment_all(bvec, bio, i)
				1072	__free_page(bvec->bv_page);
				1073	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1074	bio_free_map_data(bmd);
				1075	bio_put(bio);
				1076	return ret;
				1077	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1078	EXPORT_SYMBOL(bio_uncopy_user);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1079
				1080	/**
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1081	* bio_copy_user_iov - copy user data to bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1082	* @q: destination block queue
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1083	* @map_data: pointer to the rq_map_data holding pages (if necessary)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1084	* @iov: the iovec.
				1085	* @iov_count: number of elements in the iovec
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1086	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1087	* @gfp_mask: memory allocation flags
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1088	*
				1089	* Prepares and returns a bio for indirect user io, bouncing data
				1090	* to/from kernel pages as necessary. Must be paired with
				1091	* call bio_uncopy_user() on io completion.
				1092	*/
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1093	struct bio bio_copy_user_iov(struct request_queue q,
				1094	struct rq_map_data *map_data,
				1095	struct sg_iovec *iov, int iov_count,
				1096	int write_to_vm, gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1097	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1098	struct bio_map_data *bmd;
				1099	struct bio_vec *bvec;
				1100	struct page *page;
				1101	struct bio *bio;
				1102	int i, ret;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1103	int nr_pages = 0;
				1104	unsigned int len = 0;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1105	unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1106
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1107	for (i = 0; i < iov_count; i++) {
				1108	unsigned long uaddr;
				1109	unsigned long end;
				1110	unsigned long start;
				1111
				1112	uaddr = (unsigned long)iov[i].iov_base;
				1113	end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1114	start = uaddr >> PAGE_SHIFT;
				1115
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1116	/*
				1117	* Overflow, abort
				1118	*/
				1119	if (end < start)
				1120	return ERR_PTR(-EINVAL);
				1121
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1122	nr_pages += end - start;
				1123	len += iov[i].iov_len;
				1124	}
				1125
FUJITA Tomonori	6983872	2009-04-28 20:24:29 +0200	[diff] [blame]	1126	if (offset)
				1127	nr_pages++;
				1128
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1129	bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1130	if (!bmd)
				1131	return ERR_PTR(-ENOMEM);
				1132
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1133	ret = -ENOMEM;
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1134	bio = bio_kmalloc(gfp_mask, nr_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1135	if (!bio)
				1136	goto out_bmd;
				1137
Christoph Hellwig	7b6d91d	2010-08-07 18:20:39 +0200	[diff] [blame]	1138	if (!write_to_vm)
				1139	bio->bi_rw \|= REQ_WRITE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1140
				1141	ret = 0;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1142
				1143	if (map_data) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1144	nr_pages = 1 << map_data->page_order;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1145	i = map_data->offset / PAGE_SIZE;
				1146	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1147	while (len) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1148	unsigned int bytes = PAGE_SIZE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1149
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1150	bytes -= offset;
				1151
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1152	if (bytes > len)
				1153	bytes = len;
				1154
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1155	if (map_data) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1156	if (i == map_data->nr_entries * nr_pages) {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1157	ret = -ENOMEM;
				1158	break;
				1159	}
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1160
				1161	page = map_data->pages[i / nr_pages];
				1162	page += (i % nr_pages);
				1163
				1164	i++;
				1165	} else {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1166	page = alloc_page(q->bounce_gfp \| gfp_mask);
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1167	if (!page) {
				1168	ret = -ENOMEM;
				1169	break;
				1170	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1171	}
				1172
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1173	if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1174	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1175
				1176	len -= bytes;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1177	offset = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1178	}
				1179
				1180	if (ret)
				1181	goto cleanup;
				1182
				1183	/*
				1184	* success
				1185	*/
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	1186	if ((!write_to_vm && (!map_data \|\| !map_data->null_mapped)) \|\|
				1187	(map_data && map_data->from_user)) {
				1188	ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1189	if (ret)
				1190	goto cleanup;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1191	}
				1192
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1193	bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1194	return bio;
				1195	cleanup:
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1196	if (!map_data)
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1197	bio_for_each_segment_all(bvec, bio, i)
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1198	__free_page(bvec->bv_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1199
				1200	bio_put(bio);
				1201	out_bmd:
				1202	bio_free_map_data(bmd);
				1203	return ERR_PTR(ret);
				1204	}
				1205
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1206	/**
				1207	* bio_copy_user - copy user data to bio
				1208	* @q: destination block queue
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1209	* @map_data: pointer to the rq_map_data holding pages (if necessary)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1210	* @uaddr: start of user address
				1211	* @len: length in bytes
				1212	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1213	* @gfp_mask: memory allocation flags
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1214	*
				1215	* Prepares and returns a bio for indirect user io, bouncing data
				1216	* to/from kernel pages as necessary. Must be paired with
				1217	* call bio_uncopy_user() on io completion.
				1218	*/
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1219	struct bio bio_copy_user(struct request_queue q, struct rq_map_data *map_data,
				1220	unsigned long uaddr, unsigned int len,
				1221	int write_to_vm, gfp_t gfp_mask)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1222	{
				1223	struct sg_iovec iov;
				1224
				1225	iov.iov_base = (void __user *)uaddr;
				1226	iov.iov_len = len;
				1227
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1228	return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1229	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1230	EXPORT_SYMBOL(bio_copy_user);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1231
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1232	static struct bio __bio_map_user_iov(struct request_queue q,
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1233	struct block_device *bdev,
				1234	struct sg_iovec *iov, int iov_count,
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1235	int write_to_vm, gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1236	{
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1237	int i, j;
				1238	int nr_pages = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1239	struct page **pages;
				1240	struct bio *bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1241	int cur_page = 0;
				1242	int ret, offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1243
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1244	for (i = 0; i < iov_count; i++) {
				1245	unsigned long uaddr = (unsigned long)iov[i].iov_base;
				1246	unsigned long len = iov[i].iov_len;
				1247	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1248	unsigned long start = uaddr >> PAGE_SHIFT;
				1249
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1250	/*
				1251	* Overflow, abort
				1252	*/
				1253	if (end < start)
				1254	return ERR_PTR(-EINVAL);
				1255
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1256	nr_pages += end - start;
				1257	/*
Mike Christie	ad2d722	2006-12-01 10:40:20 +0100	[diff] [blame]	1258	* buffer must be aligned to at least hardsector size for now
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1259	*/
Mike Christie	ad2d722	2006-12-01 10:40:20 +0100	[diff] [blame]	1260	if (uaddr & queue_dma_alignment(q))
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1261	return ERR_PTR(-EINVAL);
				1262	}
				1263
				1264	if (!nr_pages)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1265	return ERR_PTR(-EINVAL);
				1266
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1267	bio = bio_kmalloc(gfp_mask, nr_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1268	if (!bio)
				1269	return ERR_PTR(-ENOMEM);
				1270
				1271	ret = -ENOMEM;
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1272	pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1273	if (!pages)
				1274	goto out;
				1275
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1276	for (i = 0; i < iov_count; i++) {
				1277	unsigned long uaddr = (unsigned long)iov[i].iov_base;
				1278	unsigned long len = iov[i].iov_len;
				1279	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1280	unsigned long start = uaddr >> PAGE_SHIFT;
				1281	const int local_nr_pages = end - start;
				1282	const int page_limit = cur_page + local_nr_pages;
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1283
Nick Piggin	f5dd33c	2008-07-25 19:45:25 -0700	[diff] [blame]	1284	ret = get_user_pages_fast(uaddr, local_nr_pages,
				1285	write_to_vm, &pages[cur_page]);
Jens Axboe	9917215	2006-06-16 13:02:29 +0200	[diff] [blame]	1286	if (ret < local_nr_pages) {
				1287	ret = -EFAULT;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1288	goto out_unmap;
Jens Axboe	9917215	2006-06-16 13:02:29 +0200	[diff] [blame]	1289	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1290
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1291	offset = uaddr & ~PAGE_MASK;
				1292	for (j = cur_page; j < page_limit; j++) {
				1293	unsigned int bytes = PAGE_SIZE - offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1294
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1295	if (len <= 0)
				1296	break;
				1297
				1298	if (bytes > len)
				1299	bytes = len;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1300
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1301	/*
				1302	* sorry...
				1303	*/
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	1304	if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
				1305	bytes)
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1306	break;
				1307
				1308	len -= bytes;
				1309	offset = 0;
				1310	}
				1311
				1312	cur_page = j;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1313	/*
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1314	* release the pages we didn't map into the bio, if any
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1315	*/
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1316	while (j < page_limit)
				1317	page_cache_release(pages[j++]);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1318	}
				1319
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1320	kfree(pages);
				1321
				1322	/*
				1323	* set data direction, and check if mapped pages need bouncing
				1324	*/
				1325	if (!write_to_vm)
Christoph Hellwig	7b6d91d	2010-08-07 18:20:39 +0200	[diff] [blame]	1326	bio->bi_rw \|= REQ_WRITE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1327
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1328	bio->bi_bdev = bdev;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1329	bio->bi_flags \|= (1 << BIO_USER_MAPPED);
				1330	return bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1331
				1332	out_unmap:
				1333	for (i = 0; i < nr_pages; i++) {
				1334	if(!pages[i])
				1335	break;
				1336	page_cache_release(pages[i]);
				1337	}
				1338	out:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1339	kfree(pages);
				1340	bio_put(bio);
				1341	return ERR_PTR(ret);
				1342	}
				1343
				1344	/**
				1345	* bio_map_user - map user address into bio
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1346	* @q: the struct request_queue for the bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1347	* @bdev: destination block device
				1348	* @uaddr: start of user address
				1349	* @len: length in bytes
				1350	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1351	* @gfp_mask: memory allocation flags
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1352	*
				1353	* Map the user space address into a bio suitable for io to a block
				1354	* device. Returns an error pointer in case of error.
				1355	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1356	struct bio bio_map_user(struct request_queue q, struct block_device *bdev,
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1357	unsigned long uaddr, unsigned int len, int write_to_vm,
				1358	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1359	{
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1360	struct sg_iovec iov;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1361
viro@ZenIV.linux.org.uk	3f70353	2005-09-09 16:53:56 +0100	[diff] [blame]	1362	iov.iov_base = (void __user *)uaddr;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1363	iov.iov_len = len;
				1364
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1365	return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1366	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1367	EXPORT_SYMBOL(bio_map_user);
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1368
				1369	/**
				1370	* bio_map_user_iov - map user sg_iovec table into bio
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1371	* @q: the struct request_queue for the bio
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1372	* @bdev: destination block device
				1373	* @iov: the iovec.
				1374	* @iov_count: number of elements in the iovec
				1375	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1376	* @gfp_mask: memory allocation flags
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1377	*
				1378	* Map the user space address into a bio suitable for io to a block
				1379	* device. Returns an error pointer in case of error.
				1380	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1381	struct bio bio_map_user_iov(struct request_queue q, struct block_device *bdev,
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1382	struct sg_iovec *iov, int iov_count,
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1383	int write_to_vm, gfp_t gfp_mask)
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1384	{
				1385	struct bio *bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1386
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1387	bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
				1388	gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1389	if (IS_ERR(bio))
				1390	return bio;
				1391
				1392	/*
				1393	* subtle -- if __bio_map_user() ended up bouncing a bio,
				1394	* it would normally disappear when its bi_end_io is run.
				1395	* however, we need it for the unmap, so grab an extra
				1396	* reference to it
				1397	*/
				1398	bio_get(bio);
				1399
Mike Christie	0e75f90	2006-12-01 10:40:55 +0100	[diff] [blame]	1400	return bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1401	}
				1402
				1403	static void __bio_unmap_user(struct bio *bio)
				1404	{
				1405	struct bio_vec *bvec;
				1406	int i;
				1407
				1408	/*
				1409	* make sure we dirty pages we wrote to
				1410	*/
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1411	bio_for_each_segment_all(bvec, bio, i) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1412	if (bio_data_dir(bio) == READ)
				1413	set_page_dirty_lock(bvec->bv_page);
				1414
				1415	page_cache_release(bvec->bv_page);
				1416	}
				1417
				1418	bio_put(bio);
				1419	}
				1420
				1421	/**
				1422	* bio_unmap_user - unmap a bio
				1423	* @bio: the bio being unmapped
				1424	*
				1425	* Unmap a bio previously mapped by bio_map_user(). Must be called with
				1426	* a process context.
				1427	*
				1428	* bio_unmap_user() may sleep.
				1429	*/
				1430	void bio_unmap_user(struct bio *bio)
				1431	{
				1432	__bio_unmap_user(bio);
				1433	bio_put(bio);
				1434	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1435	EXPORT_SYMBOL(bio_unmap_user);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1436
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1437	static void bio_map_kern_endio(struct bio *bio, int err)
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1438	{
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1439	bio_put(bio);
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1440	}
				1441
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1442	static struct bio __bio_map_kern(struct request_queue q, void *data,
Al Viro	27496a8	2005-10-21 03:20:48 -0400	[diff] [blame]	1443	unsigned int len, gfp_t gfp_mask)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1444	{
				1445	unsigned long kaddr = (unsigned long)data;
				1446	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1447	unsigned long start = kaddr >> PAGE_SHIFT;
				1448	const int nr_pages = end - start;
				1449	int offset, i;
				1450	struct bio *bio;
				1451
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1452	bio = bio_kmalloc(gfp_mask, nr_pages);
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1453	if (!bio)
				1454	return ERR_PTR(-ENOMEM);
				1455
				1456	offset = offset_in_page(kaddr);
				1457	for (i = 0; i < nr_pages; i++) {
				1458	unsigned int bytes = PAGE_SIZE - offset;
				1459
				1460	if (len <= 0)
				1461	break;
				1462
				1463	if (bytes > len)
				1464	bytes = len;
				1465
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	1466	if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
				1467	offset) < bytes)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1468	break;
				1469
				1470	data += bytes;
				1471	len -= bytes;
				1472	offset = 0;
				1473	}
				1474
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1475	bio->bi_end_io = bio_map_kern_endio;
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1476	return bio;
				1477	}
				1478
				1479	/**
				1480	* bio_map_kern - map kernel address into bio
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1481	* @q: the struct request_queue for the bio
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1482	* @data: pointer to buffer to map
				1483	* @len: length in bytes
				1484	* @gfp_mask: allocation flags for bio allocation
				1485	*
				1486	* Map the kernel address into a bio suitable for io to a block
				1487	* device. Returns an error pointer in case of error.
				1488	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1489	struct bio bio_map_kern(struct request_queue q, void *data, unsigned int len,
Al Viro	27496a8	2005-10-21 03:20:48 -0400	[diff] [blame]	1490	gfp_t gfp_mask)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1491	{
				1492	struct bio *bio;
				1493
				1494	bio = __bio_map_kern(q, data, len, gfp_mask);
				1495	if (IS_ERR(bio))
				1496	return bio;
				1497
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1498	if (bio->bi_iter.bi_size == len)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1499	return bio;
				1500
				1501	/*
				1502	* Don't support partial mappings.
				1503	*/
				1504	bio_put(bio);
				1505	return ERR_PTR(-EINVAL);
				1506	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1507	EXPORT_SYMBOL(bio_map_kern);
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1508
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1509	static void bio_copy_kern_endio(struct bio *bio, int err)
				1510	{
				1511	struct bio_vec *bvec;
				1512	const int read = bio_data_dir(bio) == READ;
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1513	struct bio_map_data *bmd = bio->bi_private;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1514	int i;
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1515	char *p = bmd->sgvecs[0].iov_base;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1516
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1517	bio_for_each_segment_all(bvec, bio, i) {
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1518	char *addr = page_address(bvec->bv_page);
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1519	int len = bmd->iovecs[i].bv_len;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1520
Tejun Heo	4fc981e	2009-05-19 18:33:06 +0900	[diff] [blame]	1521	if (read)
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1522	memcpy(p, addr, len);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1523
				1524	__free_page(bvec->bv_page);
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1525	p += len;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1526	}
				1527
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1528	bio_free_map_data(bmd);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1529	bio_put(bio);
				1530	}
				1531
				1532	/**
				1533	* bio_copy_kern - copy kernel address into bio
				1534	* @q: the struct request_queue for the bio
				1535	* @data: pointer to buffer to copy
				1536	* @len: length in bytes
				1537	* @gfp_mask: allocation flags for bio and page allocation
Randy Dunlap	ffee025	2008-04-30 09:08:54 +0200	[diff] [blame]	1538	* @reading: data direction is READ
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1539	*
				1540	* copy the kernel address into a bio suitable for io to a block
				1541	* device. Returns an error pointer in case of error.
				1542	*/
				1543	struct bio bio_copy_kern(struct request_queue q, void *data, unsigned int len,
				1544	gfp_t gfp_mask, int reading)
				1545	{
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1546	struct bio *bio;
				1547	struct bio_vec *bvec;
FUJITA Tomonori	4d8ab62	2008-08-28 15:05:57 +0900	[diff] [blame]	1548	int i;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1549
FUJITA Tomonori	4d8ab62	2008-08-28 15:05:57 +0900	[diff] [blame]	1550	bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
				1551	if (IS_ERR(bio))
				1552	return bio;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1553
				1554	if (!reading) {
				1555	void *p = data;
				1556
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1557	bio_for_each_segment_all(bvec, bio, i) {
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1558	char *addr = page_address(bvec->bv_page);
				1559
				1560	memcpy(addr, p, bvec->bv_len);
				1561	p += bvec->bv_len;
				1562	}
				1563	}
				1564
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1565	bio->bi_end_io = bio_copy_kern_endio;
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1566
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1567	return bio;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1568	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1569	EXPORT_SYMBOL(bio_copy_kern);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1570
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1571	/*
				1572	* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
				1573	* for performing direct-IO in BIOs.
				1574	*
				1575	* The problem is that we cannot run set_page_dirty() from interrupt context
				1576	* because the required locks are not interrupt-safe. So what we can do is to
				1577	* mark the pages dirty _before_ performing IO. And in interrupt context,
				1578	* check that the pages are still dirty. If so, fine. If not, redirty them
				1579	* in process context.
				1580	*
				1581	* We special-case compound pages here: normally this means reads into hugetlb
				1582	* pages. The logic in here doesn't really work right for compound pages
				1583	* because the VM does not uniformly chase down the head page in all cases.
				1584	* But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
				1585	* handle them at all. So we skip compound pages here at an early stage.
				1586	*
				1587	* Note that this code is very hard to test under normal circumstances because
				1588	* direct-io pins the pages with get_user_pages(). This makes
				1589	* is_page_cache_freeable return false, and the VM will not clean the pages.
Artem Bityutskiy	0d5c3eb	2012-07-25 18:12:08 +0300	[diff] [blame]	1590	* But other code (eg, flusher threads) could clean the pages if they are mapped
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1591	* pagecache.
				1592	*
				1593	* Simply disabling the call to bio_set_pages_dirty() is a good way to test the
				1594	* deferred bio dirtying paths.
				1595	*/
				1596
				1597	/*
				1598	* bio_set_pages_dirty() will mark all the bio's pages as dirty.
				1599	*/
				1600	void bio_set_pages_dirty(struct bio *bio)
				1601	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1602	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1603	int i;
				1604
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1605	bio_for_each_segment_all(bvec, bio, i) {
				1606	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1607
				1608	if (page && !PageCompound(page))
				1609	set_page_dirty_lock(page);
				1610	}
				1611	}
				1612
Adrian Bunk	86b6c7a	2008-02-18 13:48:32 +0100	[diff] [blame]	1613	static void bio_release_pages(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1614	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1615	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1616	int i;
				1617
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1618	bio_for_each_segment_all(bvec, bio, i) {
				1619	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1620
				1621	if (page)
				1622	put_page(page);
				1623	}
				1624	}
				1625
				1626	/*
				1627	* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
				1628	* If they are, then fine. If, however, some pages are clean then they must
				1629	* have been written out during the direct-IO read. So we take another ref on
				1630	* the BIO and the offending pages and re-dirty the pages in process context.
				1631	*
				1632	* It is expected that bio_check_pages_dirty() will wholly own the BIO from
				1633	* here on. It will run one page_cache_release() against each page and will
				1634	* run one bio_put() against the BIO.
				1635	*/
				1636
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1637	static void bio_dirty_fn(struct work_struct *work);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1638
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1639	static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1640	static DEFINE_SPINLOCK(bio_dirty_lock);
				1641	static struct bio *bio_dirty_list;
				1642
				1643	/*
				1644	* This runs in process context
				1645	*/
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1646	static void bio_dirty_fn(struct work_struct *work)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1647	{
				1648	unsigned long flags;
				1649	struct bio *bio;
				1650
				1651	spin_lock_irqsave(&bio_dirty_lock, flags);
				1652	bio = bio_dirty_list;
				1653	bio_dirty_list = NULL;
				1654	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1655
				1656	while (bio) {
				1657	struct bio *next = bio->bi_private;
				1658
				1659	bio_set_pages_dirty(bio);
				1660	bio_release_pages(bio);
				1661	bio_put(bio);
				1662	bio = next;
				1663	}
				1664	}
				1665
				1666	void bio_check_pages_dirty(struct bio *bio)
				1667	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1668	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1669	int nr_clean_pages = 0;
				1670	int i;
				1671
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1672	bio_for_each_segment_all(bvec, bio, i) {
				1673	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1674
				1675	if (PageDirty(page) \|\| PageCompound(page)) {
				1676	page_cache_release(page);
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1677	bvec->bv_page = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1678	} else {
				1679	nr_clean_pages++;
				1680	}
				1681	}
				1682
				1683	if (nr_clean_pages) {
				1684	unsigned long flags;
				1685
				1686	spin_lock_irqsave(&bio_dirty_lock, flags);
				1687	bio->bi_private = bio_dirty_list;
				1688	bio_dirty_list = bio;
				1689	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1690	schedule_work(&bio_dirty_work);
				1691	} else {
				1692	bio_put(bio);
				1693	}
				1694	}
				1695
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1696	#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
				1697	void bio_flush_dcache_pages(struct bio *bi)
				1698	{
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	1699	struct bio_vec bvec;
				1700	struct bvec_iter iter;
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1701
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	1702	bio_for_each_segment(bvec, bi, iter)
				1703	flush_dcache_page(bvec.bv_page);
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1704	}
				1705	EXPORT_SYMBOL(bio_flush_dcache_pages);
				1706	#endif
				1707
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1708	/**
				1709	* bio_endio - end I/O on a bio
				1710	* @bio: bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1711	* @error: error, if any
				1712	*
				1713	* Description:
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1714	* bio_endio() will end I/O on the whole bio. bio_endio() is the
NeilBrown	5bb23a6	2007-09-27 12:46:13 +0200	[diff] [blame]	1715	* preferred way to end I/O on a bio, it takes care of clearing
				1716	* BIO_UPTODATE on error. @error is 0 on success, and and one of the
				1717	* established -Exxxx (-EIO, for instance) error values in case
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	1718	* something went wrong. No one should call bi_end_io() directly on a
NeilBrown	5bb23a6	2007-09-27 12:46:13 +0200	[diff] [blame]	1719	* bio unless they own it and thus know that it has an end_io
				1720	* function.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1721	**/
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1722	void bio_endio(struct bio *bio, int error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1723	{
				1724	if (error)
				1725	clear_bit(BIO_UPTODATE, &bio->bi_flags);
NeilBrown	9cc54d4	2007-09-27 12:46:12 +0200	[diff] [blame]	1726	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
				1727	error = -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1728
NeilBrown	5bb23a6	2007-09-27 12:46:13 +0200	[diff] [blame]	1729	if (bio->bi_end_io)
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1730	bio->bi_end_io(bio, error);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1731	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1732	EXPORT_SYMBOL(bio_endio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1733
				1734	void bio_pair_release(struct bio_pair *bp)
				1735	{
				1736	if (atomic_dec_and_test(&bp->cnt)) {
				1737	struct bio *master = bp->bio1.bi_private;
				1738
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1739	bio_endio(master, bp->error);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1740	mempool_free(bp, bp->bio2.bi_private);
				1741	}
				1742	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1743	EXPORT_SYMBOL(bio_pair_release);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1744
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1745	static void bio_pair_end_1(struct bio *bi, int err)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1746	{
				1747	struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
				1748
				1749	if (err)
				1750	bp->error = err;
				1751
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1752	bio_pair_release(bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1753	}
				1754
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1755	static void bio_pair_end_2(struct bio *bi, int err)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1756	{
				1757	struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
				1758
				1759	if (err)
				1760	bp->error = err;
				1761
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1762	bio_pair_release(bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1763	}
				1764
				1765	/*
Alberto Bertogli	c7eee1b	2009-01-25 23:36:14 -0200	[diff] [blame]	1766	* split a bio - only worry about a bio with a single page in its iovec
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1767	*/
Denis ChengRq	6feef53	2008-10-09 08:57:05 +0200	[diff] [blame]	1768	struct bio_pair bio_split(struct bio bi, int first_sectors)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1769	{
Denis ChengRq	6feef53	2008-10-09 08:57:05 +0200	[diff] [blame]	1770	struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1771
				1772	if (!bp)
				1773	return bp;
				1774
Arnaldo Carvalho de Melo	5f3ea37	2008-10-30 08:34:33 +0100	[diff] [blame]	1775	trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1776	bi->bi_iter.bi_sector + first_sectors);
Jens Axboe	2056a78	2006-03-23 20:00:26 +0100	[diff] [blame]	1777
Kent Overstreet	458b76e	2013-09-24 16:26:05 -0700	[diff] [blame]	1778	BUG_ON(bio_multiple_segments(bi));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1779	atomic_set(&bp->cnt, 3);
				1780	bp->error = 0;
				1781	bp->bio1 = *bi;
				1782	bp->bio2 = *bi;
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1783	bp->bio2.bi_iter.bi_sector += first_sectors;
				1784	bp->bio2.bi_iter.bi_size -= first_sectors << 9;
				1785	bp->bio1.bi_iter.bi_size = first_sectors << 9;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1786
Shaohua Li	02f3939	2012-09-28 10:38:48 +0200	[diff] [blame]	1787	if (bi->bi_vcnt != 0) {
Kent Overstreet	a4ad39b1	2013-08-07 14:24:32 -0700	[diff] [blame]	1788	bp->bv1 = bio_iovec(bi);
				1789	bp->bv2 = bio_iovec(bi);
Martin K. Petersen	4363ac7	2012-09-18 12:19:27 -0400	[diff] [blame]	1790
Shaohua Li	02f3939	2012-09-28 10:38:48 +0200	[diff] [blame]	1791	if (bio_is_rw(bi)) {
				1792	bp->bv2.bv_offset += first_sectors << 9;
				1793	bp->bv2.bv_len -= first_sectors << 9;
				1794	bp->bv1.bv_len = first_sectors << 9;
				1795	}
				1796
				1797	bp->bio1.bi_io_vec = &bp->bv1;
				1798	bp->bio2.bi_io_vec = &bp->bv2;
				1799
				1800	bp->bio1.bi_max_vecs = 1;
				1801	bp->bio2.bi_max_vecs = 1;
Martin K. Petersen	4363ac7	2012-09-18 12:19:27 -0400	[diff] [blame]	1802	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1803
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1804	bp->bio1.bi_end_io = bio_pair_end_1;
				1805	bp->bio2.bi_end_io = bio_pair_end_2;
				1806
				1807	bp->bio1.bi_private = bi;
Denis ChengRq	6feef53	2008-10-09 08:57:05 +0200	[diff] [blame]	1808	bp->bio2.bi_private = bio_split_pool;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1809
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	1810	if (bio_integrity(bi))
				1811	bio_integrity_split(bi, bp, first_sectors);
				1812
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1813	return bp;
				1814	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1815	EXPORT_SYMBOL(bio_split);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1816
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1817	/**
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1818	* bio_trim - trim a bio
				1819	* @bio: bio to trim
				1820	* @offset: number of sectors to trim from the front of @bio
				1821	* @size: size we want to trim @bio to, in sectors
				1822	*/
				1823	void bio_trim(struct bio *bio, int offset, int size)
				1824	{
				1825	/* 'bio' is a cloned bio which we need to trim to match
				1826	* the given offset and size.
				1827	* This requires adjusting bi_sector, bi_size, and bi_io_vec
				1828	*/
				1829	int i;
				1830	struct bio_vec *bvec;
				1831	int sofar = 0;
				1832
				1833	size <<= 9;
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1834	if (offset == 0 && size == bio->bi_iter.bi_size)
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1835	return;
				1836
				1837	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
				1838
				1839	bio_advance(bio, offset << 9);
				1840
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1841	bio->bi_iter.bi_size = size;
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1842
				1843	/* avoid any complications with bi_idx being non-zero*/
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1844	if (bio->bi_iter.bi_idx) {
				1845	memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_iter.bi_idx,
				1846	(bio->bi_vcnt - bio->bi_iter.bi_idx) *
				1847	sizeof(struct bio_vec));
				1848	bio->bi_vcnt -= bio->bi_iter.bi_idx;
				1849	bio->bi_iter.bi_idx = 0;
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1850	}
				1851	/* Make sure vcnt and last bv are not too big */
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	1852	bio_for_each_segment_all(bvec, bio, i) {
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1853	if (sofar + bvec->bv_len > size)
				1854	bvec->bv_len = size - sofar;
				1855	if (bvec->bv_len == 0) {
				1856	bio->bi_vcnt = i;
				1857	break;
				1858	}
				1859	sofar += bvec->bv_len;
				1860	}
				1861	}
				1862	EXPORT_SYMBOL_GPL(bio_trim);
				1863
				1864	/**
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1865	* bio_sector_offset - Find hardware sector offset in bio
				1866	* @bio: bio to inspect
				1867	* @index: bio_vec index
				1868	* @offset: offset in bv_page
				1869	*
				1870	* Return the number of hardware sectors between beginning of bio
				1871	* and an end point indicated by a bio_vec index and an offset
				1872	* within that vector's page.
				1873	*/
				1874	sector_t bio_sector_offset(struct bio *bio, unsigned short index,
				1875	unsigned int offset)
				1876	{
Martin K. Petersen	e1defc4	2009-05-22 17:17:49 -0400	[diff] [blame]	1877	unsigned int sector_sz;
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1878	struct bio_vec *bv;
				1879	sector_t sectors;
				1880	int i;
				1881
Martin K. Petersen	e1defc4	2009-05-22 17:17:49 -0400	[diff] [blame]	1882	sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1883	sectors = 0;
				1884
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1885	if (index >= bio->bi_iter.bi_idx)
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1886	index = bio->bi_vcnt - 1;
				1887
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1888	bio_for_each_segment_all(bv, bio, i) {
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1889	if (i == index) {
				1890	if (offset > bv->bv_offset)
				1891	sectors += (offset - bv->bv_offset) / sector_sz;
				1892	break;
				1893	}
				1894
				1895	sectors += bv->bv_len / sector_sz;
				1896	}
				1897
				1898	return sectors;
				1899	}
				1900	EXPORT_SYMBOL(bio_sector_offset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1901
				1902	/*
				1903	* create memory pools for biovec's in a bio_set.
				1904	* use the global biovec slabs created for general use.
				1905	*/
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1906	mempool_t biovec_create_pool(struct bio_set bs, int pool_entries)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1907	{
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	1908	struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1909
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1910	return mempool_create_slab_pool(pool_entries, bp->slab);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1911	}
				1912
				1913	void bioset_free(struct bio_set *bs)
				1914	{
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1915	if (bs->rescue_workqueue)
				1916	destroy_workqueue(bs->rescue_workqueue);
				1917
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1918	if (bs->bio_pool)
				1919	mempool_destroy(bs->bio_pool);
				1920
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1921	if (bs->bvec_pool)
				1922	mempool_destroy(bs->bvec_pool);
				1923
Martin K. Petersen	7878cba	2009-06-26 15:37:49 +0200	[diff] [blame]	1924	bioset_integrity_free(bs);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1925	bio_put_slab(bs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1926
				1927	kfree(bs);
				1928	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1929	EXPORT_SYMBOL(bioset_free);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1930
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1931	/**
				1932	* bioset_create - Create a bio_set
				1933	* @pool_size: Number of bio and bio_vecs to cache in the mempool
				1934	* @front_pad: Number of bytes to allocate in front of the returned bio
				1935	*
				1936	* Description:
				1937	* Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
				1938	* to ask for a number of bytes to be allocated in front of the bio.
				1939	* Front pad allocation is useful for embedding the bio inside
				1940	* another structure, to avoid allocating extra data to go with the bio.
				1941	* Note that the bio must be embedded at the END of that structure always,
				1942	* or things will break badly.
				1943	*/
				1944	struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1945	{
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	1946	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1947	struct bio_set *bs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1948
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1949	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1950	if (!bs)
				1951	return NULL;
				1952
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1953	bs->front_pad = front_pad;
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1954
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1955	spin_lock_init(&bs->rescue_lock);
				1956	bio_list_init(&bs->rescue_list);
				1957	INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
				1958
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	1959	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1960	if (!bs->bio_slab) {
				1961	kfree(bs);
				1962	return NULL;
				1963	}
				1964
				1965	bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1966	if (!bs->bio_pool)
				1967	goto bad;
				1968
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1969	bs->bvec_pool = biovec_create_pool(bs, pool_size);
				1970	if (!bs->bvec_pool)
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1971	goto bad;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1972
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1973	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
				1974	if (!bs->rescue_workqueue)
				1975	goto bad;
				1976
				1977	return bs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1978	bad:
				1979	bioset_free(bs);
				1980	return NULL;
				1981	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1982	EXPORT_SYMBOL(bioset_create);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1983
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	1984	#ifdef CONFIG_BLK_CGROUP
				1985	/**
				1986	* bio_associate_current - associate a bio with %current
				1987	* @bio: target bio
				1988	*
				1989	* Associate @bio with %current if it hasn't been associated yet. Block
				1990	* layer will treat @bio as if it were issued by %current no matter which
				1991	* task actually issues it.
				1992	*
				1993	* This function takes an extra reference of @task's io_context and blkcg
				1994	* which will be put when @bio is released. The caller must own @bio,
				1995	* ensure %current->io_context exists, and is responsible for synchronizing
				1996	* calls to this function.
				1997	*/
				1998	int bio_associate_current(struct bio *bio)
				1999	{
				2000	struct io_context *ioc;
				2001	struct cgroup_subsys_state *css;
				2002
				2003	if (bio->bi_ioc)
				2004	return -EBUSY;
				2005
				2006	ioc = current->io_context;
				2007	if (!ioc)
				2008	return -ENOENT;
				2009
				2010	/* acquire active ref on @ioc and associate */
				2011	get_io_context_active(ioc);
				2012	bio->bi_ioc = ioc;
				2013
				2014	/* associate blkcg if exists */
				2015	rcu_read_lock();
Tejun Heo	8af01f5	2013-08-08 20:11:22 -0400	[diff] [blame]	2016	css = task_css(current, blkio_subsys_id);
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2017	if (css && css_tryget(css))
				2018	bio->bi_css = css;
				2019	rcu_read_unlock();
				2020
				2021	return 0;
				2022	}
				2023
				2024	/**
				2025	* bio_disassociate_task - undo bio_associate_current()
				2026	* @bio: target bio
				2027	*/
				2028	void bio_disassociate_task(struct bio *bio)
				2029	{
				2030	if (bio->bi_ioc) {
				2031	put_io_context(bio->bi_ioc);
				2032	bio->bi_ioc = NULL;
				2033	}
				2034	if (bio->bi_css) {
				2035	css_put(bio->bi_css);
				2036	bio->bi_css = NULL;
				2037	}
				2038	}
				2039
				2040	#endif /* CONFIG_BLK_CGROUP */
				2041
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2042	static void __init biovec_init_slabs(void)
				2043	{
				2044	int i;
				2045
				2046	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
				2047	int size;
				2048	struct biovec_slab *bvs = bvec_slabs + i;
				2049
Jens Axboe	a7fcd37	2008-12-05 16:10:29 +0100	[diff] [blame]	2050	if (bvs->nr_vecs <= BIO_INLINE_VECS) {
				2051	bvs->slab = NULL;
				2052	continue;
				2053	}
Jens Axboe	a7fcd37	2008-12-05 16:10:29 +0100	[diff] [blame]	2054
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2055	size = bvs->nr_vecs * sizeof(struct bio_vec);
				2056	bvs->slab = kmem_cache_create(bvs->name, size, 0,
Paul Mundt	20c2df8	2007-07-20 10:11:58 +0900	[diff] [blame]	2057	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2058	}
				2059	}
				2060
				2061	static int __init init_bio(void)
				2062	{
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	2063	bio_slab_max = 2;
				2064	bio_slab_nr = 0;
				2065	bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
				2066	if (!bio_slabs)
				2067	panic("bio: can't allocate bios\n");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2068
Martin K. Petersen	7878cba	2009-06-26 15:37:49 +0200	[diff] [blame]	2069	bio_integrity_init();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2070	biovec_init_slabs();
				2071
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	2072	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2073	if (!fs_bio_set)
				2074	panic("bio: can't allocate bios\n");
				2075
Martin K. Petersen	a91a278	2011-03-17 11:11:05 +0100	[diff] [blame]	2076	if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
				2077	panic("bio: can't create integrity pool\n");
				2078
Matthew Dobson	0eaae62a	2006-03-26 01:37:47 -0800	[diff] [blame]	2079	bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
				2080	sizeof(struct bio_pair));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2081	if (!bio_split_pool)
				2082	panic("bio: can't create split pool\n");
				2083
				2084	return 0;
				2085	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2086	subsys_initcall(init_bio);