From 30ce6e1cc5a0f781d60227e9096c86e188d2c2bd Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 23 Nov 2015 16:24:45 -0500 Subject: dm btree: fix leak of bufio-backed block in btree_split_sibling error path The block allocated at the start of btree_split_sibling() is never released if later insert_at() fails. Fix this by releasing the previously allocated bufio block using unlock_block(). Reported-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/persistent-data/dm-btree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index c573402033b2..0918a7c5f809 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -473,8 +473,10 @@ static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index, r = insert_at(sizeof(__le64), pn, parent_index + 1, le64_to_cpu(rn->keys[0]), &location); - if (r) + if (r) { + unlock_block(s->info, right); return r; + } if (key < le64_to_cpu(rn->keys[0])) { unlock_block(s->info, right); -- cgit v1.2.3 From 993ceab91986e2e737ce9a3e23bebc8cce649240 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 2 Dec 2015 12:24:39 +0000 Subject: dm thin metadata: fix bug in dm_thin_remove_range() dm_btree_remove_leaves() only unmaps a contiguous region so we need a loop, in __remove_range(), to handle ranges that contain multiple regions. A new btree function, dm_btree_lookup_next(), is introduced which is more efficiently able to skip over regions of the thin device which aren't mapped. __remove_range() uses dm_btree_lookup_next() for each iteration of __remove_range()'s loop. Also, improve description of dm_btree_remove_leaves(). Fixes: 6550f075 ("dm thin metadata: add dm_thin_remove_range()") Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 4.1+ --- drivers/md/dm-thin-metadata.c | 28 +++++++++--- drivers/md/persistent-data/dm-btree.c | 81 +++++++++++++++++++++++++++++++++++ drivers/md/persistent-data/dm-btree.h | 14 ++++-- 3 files changed, 115 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 1fa45695b68a..67871e74c82b 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1538,7 +1538,7 @@ static int __remove(struct dm_thin_device *td, dm_block_t block) static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end) { int r; - unsigned count; + unsigned count, total_count = 0; struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[1] = { td->id }; __le64 value; @@ -1561,11 +1561,29 @@ static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_ if (r) return r; - r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count); - if (r) - return r; + /* + * Remove leaves stops at the first unmapped entry, so we have to + * loop round finding mapped ranges. + */ + while (begin < end) { + r = dm_btree_lookup_next(&pmd->bl_info, mapping_root, &begin, &begin, &value); + if (r == -ENODATA) + break; + + if (r) + return r; + + if (begin >= end) + break; + + r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count); + if (r) + return r; + + total_count += count; + } - td->mapped_blocks -= count; + td->mapped_blocks -= total_count; td->changed = 1; /* diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 0918a7c5f809..7e5b7f12958a 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -63,6 +63,11 @@ int lower_bound(struct btree_node *n, uint64_t key) return bsearch(n, key, 0); } +static int upper_bound(struct btree_node *n, uint64_t key) +{ + return bsearch(n, key, 1); +} + void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt) { @@ -392,6 +397,82 @@ int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, } EXPORT_SYMBOL_GPL(dm_btree_lookup); +static int dm_btree_lookup_next_single(struct dm_btree_info *info, dm_block_t root, + uint64_t key, uint64_t *rkey, void *value_le) +{ + int r, i; + uint32_t flags, nr_entries; + struct dm_block *node; + struct btree_node *n; + + r = bn_read_lock(info, root, &node); + if (r) + return r; + + n = dm_block_data(node); + flags = le32_to_cpu(n->header.flags); + nr_entries = le32_to_cpu(n->header.nr_entries); + + if (flags & INTERNAL_NODE) { + i = lower_bound(n, key); + if (i < 0 || i >= nr_entries) { + r = -ENODATA; + goto out; + } + + r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le); + if (r == -ENODATA && i < (nr_entries - 1)) { + i++; + r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le); + } + + } else { + i = upper_bound(n, key); + if (i < 0 || i >= nr_entries) { + r = -ENODATA; + goto out; + } + + *rkey = le64_to_cpu(n->keys[i]); + memcpy(value_le, value_ptr(n, i), info->value_type.size); + } +out: + dm_tm_unlock(info->tm, node); + return r; +} + +int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root, + uint64_t *keys, uint64_t *rkey, void *value_le) +{ + unsigned level; + int r = -ENODATA; + __le64 internal_value_le; + struct ro_spine spine; + + init_ro_spine(&spine, info); + for (level = 0; level < info->levels - 1u; level++) { + r = btree_lookup_raw(&spine, root, keys[level], + lower_bound, rkey, + &internal_value_le, sizeof(uint64_t)); + if (r) + goto out; + + if (*rkey != keys[level]) { + r = -ENODATA; + goto out; + } + + root = le64_to_cpu(internal_value_le); + } + + r = dm_btree_lookup_next_single(info, root, keys[level], rkey, value_le); +out: + exit_ro_spine(&spine); + return r; +} + +EXPORT_SYMBOL_GPL(dm_btree_lookup_next); + /* * Splits a node by creating a sibling node and shifting half the nodes * contents across. Assumes there is a parent node, and it has room for diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index 11d8cf78621d..c74301fa5a37 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h @@ -109,6 +109,13 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root); int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, uint64_t *keys, void *value_le); +/* + * Tries to find the first key where the bottom level key is >= to that + * given. Useful for skipping empty sections of the btree. + */ +int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root, + uint64_t *keys, uint64_t *rkey, void *value_le); + /* * Insertion (or overwrite an existing value). O(ln(n)) */ @@ -135,9 +142,10 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, uint64_t *keys, dm_block_t *new_root); /* - * Removes values between 'keys' and keys2, where keys2 is keys with the - * final key replaced with 'end_key'. 'end_key' is the one-past-the-end - * value. 'keys' may be altered. + * Removes a _contiguous_ run of values starting from 'keys' and not + * reaching keys2 (where keys2 is keys with the final key replaced with + * 'end_key'). 'end_key' is the one-past-the-end value. 'keys' may be + * altered. */ int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root, uint64_t *keys, uint64_t end_key, -- cgit v1.2.3 From 49e99fc717f624aa75ca755d6e7bc029efd3f0e9 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 9 Dec 2015 16:23:24 +0000 Subject: dm thin metadata: fix bug when taking a metadata snapshot When you take a metadata snapshot the btree roots for the mapping and details tree need to have their reference counts incremented so they persist for the lifetime of the metadata snap. The roots being incremented were those currently written in the superblock, which could possibly be out of date if concurrent IO is triggering new mappings, breaking of sharing, etc. Fix this by performing a commit with the metadata lock held while taking a metadata snapshot. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-thin-metadata.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 67871e74c82b..c219a053c7f6 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1206,6 +1206,12 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) struct dm_block *copy, *sblock; dm_block_t held_root; + /* + * We commit to ensure the btree roots which we increment in a + * moment are up to date. + */ + __commit_transaction(pmd); + /* * Copy the superblock. */ -- cgit v1.2.3 From 50dd842ad83b43bed71790efb31cfb2f6c05c9c1 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 9 Dec 2015 16:38:12 +0000 Subject: dm space map metadata: fix ref counting bug when bootstrapping a new space map When applying block operations (BOPs) do not remove them from the uncommitted BOP ring-buffer until after they've been applied -- in case we recurse. Also, perform BOP_INC operation, in dm_sm_metadata_create() and sm_metadata_extend(), in terms of the uncommitted BOP ring-buffer rather than using direct calls to sm_ll_inc(). Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/persistent-data/dm-space-map-metadata.c | 32 +++++++++++++++------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 53091295fce9..fca6dbcf9a47 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -136,7 +136,7 @@ static int brb_push(struct bop_ring_buffer *brb, return 0; } -static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) +static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) { struct block_op *bop; @@ -147,6 +147,17 @@ static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) result->type = bop->type; result->block = bop->block; + return 0; +} + +static int brb_pop(struct bop_ring_buffer *brb) +{ + struct block_op *bop; + + if (brb_empty(brb)) + return -ENODATA; + + bop = brb->bops + brb->begin; brb->begin = brb_next(brb, brb->begin); return 0; @@ -211,7 +222,7 @@ static int apply_bops(struct sm_metadata *smm) while (!brb_empty(&smm->uncommitted)) { struct block_op bop; - r = brb_pop(&smm->uncommitted, &bop); + r = brb_peek(&smm->uncommitted, &bop); if (r) { DMERR("bug in bop ring buffer"); break; @@ -220,6 +231,8 @@ static int apply_bops(struct sm_metadata *smm) r = commit_bop(smm, &bop); if (r) break; + + brb_pop(&smm->uncommitted); } return r; @@ -683,7 +696,6 @@ static struct dm_space_map bootstrap_ops = { static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) { int r, i; - enum allocation_event ev; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); dm_block_t old_len = smm->ll.nr_blocks; @@ -705,11 +717,12 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) * allocate any new blocks. */ do { - for (i = old_len; !r && i < smm->begin; i++) { - r = sm_ll_inc(&smm->ll, i, &ev); - if (r) - goto out; - } + for (i = old_len; !r && i < smm->begin; i++) + r = add_bop(smm, BOP_INC, i); + + if (r) + goto out; + old_len = smm->begin; r = apply_bops(smm); @@ -754,7 +767,6 @@ int dm_sm_metadata_create(struct dm_space_map *sm, { int r; dm_block_t i; - enum allocation_event ev; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); smm->begin = superblock + 1; @@ -782,7 +794,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm, * allocated blocks that they were built from. */ for (i = superblock; !r && i < smm->begin; i++) - r = sm_ll_inc(&smm->ll, i, &ev); + r = add_bop(smm, BOP_INC, i); if (r) return r; -- cgit v1.2.3 From ed8b45a3679eb49069b094c0711b30833f27c734 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 10 Dec 2015 14:37:53 +0000 Subject: dm btree: fix bufio buffer leaks in dm_btree_del() error path If dm_btree_del()'s call to push_frame() fails, e.g. due to btree_node_validator finding invalid metadata, the dm_btree_del() error path must unlock all frames (which have active dm-bufio buffers) that were pushed onto the del_stack. Otherwise, dm_bufio_client_destroy() will BUG_ON() because dm-bufio buffers have leaked, e.g.: device-mapper: bufio: leaked buffer 3, hold count 1, list 0 Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/persistent-data/dm-btree.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 7e5b7f12958a..b1ced58eb5e1 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -257,6 +257,16 @@ static void pop_frame(struct del_stack *s) dm_tm_unlock(s->tm, f->b); } +static void unlock_all_frames(struct del_stack *s) +{ + struct frame *f; + + while (unprocessed_frames(s)) { + f = s->spine + s->top--; + dm_tm_unlock(s->tm, f->b); + } +} + int dm_btree_del(struct dm_btree_info *info, dm_block_t root) { int r; @@ -313,9 +323,13 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) pop_frame(s); } } - out: + if (r) { + /* cleanup all frames of del_stack */ + unlock_all_frames(s); + } kfree(s); + return r; } EXPORT_SYMBOL_GPL(dm_btree_del); -- cgit v1.2.3