diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 200 |
1 files changed, 169 insertions, 31 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 260f94b019c9..df2bb4b61a00 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2342,7 +2342,13 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; } - BUG_ON(node->ref_mod != 1); + if (node->ref_mod != 1) { + btrfs_err(root->fs_info, + "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", + node->bytenr, node->ref_mod, node->action, ref_root, + parent); + return -EIO; + } if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { BUG_ON(!extent_op || !extent_op->update_flags); ret = alloc_reserved_tree_block(trans, root, @@ -4128,7 +4134,7 @@ commit_trans: data_sinfo->flags, bytes, 1); spin_unlock(&data_sinfo->lock); - return ret; + return 0; } /* @@ -4392,6 +4398,7 @@ again: if (wait_for_alloc) { mutex_unlock(&fs_info->chunk_mutex); wait_for_alloc = 0; + cond_resched(); goto again; } @@ -7834,6 +7841,20 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, buf = btrfs_find_create_tree_block(root, bytenr); if (!buf) return ERR_PTR(-ENOMEM); + + /* + * Extra safety check in case the extent tree is corrupted and extent + * allocator chooses to use a tree block which is already used and + * locked. + */ + if (buf->lock_owner == current->pid) { + btrfs_err_rl(root->fs_info, +"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected", + buf->start, btrfs_header_owner(buf), current->pid); + free_extent_buffer(buf); + return ERR_PTR(-EUCLEAN); + } + btrfs_set_header_generation(buf, trans->transid); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); @@ -8703,15 +8724,14 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (eb == root->node) { if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = eb->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(eb)); + else if (root->root_key.objectid != btrfs_header_owner(eb)) + goto owner_mismatch; } else { if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = path->nodes[level + 1]->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(path->nodes[level + 1])); + else if (root->root_key.objectid != + btrfs_header_owner(path->nodes[level + 1])) + goto owner_mismatch; } btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); @@ -8719,6 +8739,11 @@ out: wc->refs[level] = 0; wc->flags[level] = 0; return 0; + +owner_mismatch: + btrfs_err_rl(root->fs_info, "unexpected tree owner, have %llu expect %llu", + btrfs_header_owner(eb), root->root_key.objectid); + return -EUCLEAN; } static noinline int walk_down_tree(struct btrfs_trans_handle *trans, @@ -8772,6 +8797,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, ret = walk_up_proc(trans, root, path, wc); if (ret > 0) return 0; + if (ret < 0) + return ret; if (path->locks[level]) { btrfs_tree_unlock_rw(path->nodes[level], @@ -9460,6 +9487,8 @@ static int find_first_block_group(struct btrfs_root *root, int ret = 0; struct btrfs_key found_key; struct extent_buffer *leaf; + struct btrfs_block_group_item bg; + u64 flags; int slot; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); @@ -9481,7 +9510,47 @@ static int find_first_block_group(struct btrfs_root *root, if (found_key.objectid >= key->objectid && found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { - ret = 0; + struct extent_map_tree *em_tree; + struct extent_map *em; + + em_tree = &root->fs_info->mapping_tree.map_tree; + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, found_key.objectid, + found_key.offset); + read_unlock(&em_tree->lock); + if (!em) { + btrfs_err(root->fs_info, + "logical %llu len %llu found bg but no related chunk", + found_key.objectid, found_key.offset); + ret = -ENOENT; + } else if (em->start != found_key.objectid || + em->len != found_key.offset) { + btrfs_err(root->fs_info, + "block group %llu len %llu mismatch with chunk %llu len %llu", + found_key.objectid, found_key.offset, + em->start, em->len); + ret = -EUCLEAN; + } else { + read_extent_buffer(leaf, &bg, + btrfs_item_ptr_offset(leaf, slot), + sizeof(bg)); + flags = btrfs_block_group_flags(&bg) & + BTRFS_BLOCK_GROUP_TYPE_MASK; + + if (flags != (em->map_lookup->type & + BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(root->fs_info, +"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx", + found_key.objectid, + found_key.offset, flags, + (BTRFS_BLOCK_GROUP_TYPE_MASK & + em->map_lookup->type)); + ret = -EUCLEAN; + } else { + ret = 0; + } + } + free_extent_map(em); goto out; } path->slots[0]++; @@ -9500,6 +9569,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) block_group = btrfs_lookup_first_block_group(info, last); while (block_group) { + wait_block_group_cache_done(block_group); spin_lock(&block_group->lock); if (block_group->iref) break; @@ -9695,6 +9765,62 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) return cache; } + +/* + * Iterate all chunks and verify that each of them has the corresponding block + * group + */ +static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map *em; + struct btrfs_block_group_cache *bg; + u64 start = 0; + int ret = 0; + + while (1) { + read_lock(&map_tree->map_tree.lock); + /* + * lookup_extent_mapping will return the first extent map + * intersecting the range, so setting @len to 1 is enough to + * get the first chunk. + */ + em = lookup_extent_mapping(&map_tree->map_tree, start, 1); + read_unlock(&map_tree->map_tree.lock); + if (!em) + break; + + bg = btrfs_lookup_block_group(fs_info, em->start); + if (!bg) { + btrfs_err(fs_info, + "chunk start=%llu len=%llu doesn't have corresponding block group", + em->start, em->len); + ret = -EUCLEAN; + free_extent_map(em); + break; + } + if (bg->key.objectid != em->start || + bg->key.offset != em->len || + (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != + (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { + btrfs_err(fs_info, +"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx", + em->start, em->len, + em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK, + bg->key.objectid, bg->key.offset, + bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); + ret = -EUCLEAN; + free_extent_map(em); + btrfs_put_block_group(bg); + break; + } + start = em->start + em->len; + free_extent_map(em); + btrfs_put_block_group(bg); + } + return ret; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -9881,7 +10007,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) } init_global_block_rsv(info); - ret = 0; + ret = check_chunk_block_group_mappings(info); error: btrfs_free_path(path); return ret; @@ -9890,7 +10016,7 @@ error: void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_block_group_cache *block_group, *tmp; + struct btrfs_block_group_cache *block_group; struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_block_group_item item; struct btrfs_key key; @@ -9898,7 +10024,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, bool can_flush_pending_bgs = trans->can_flush_pending_bgs; trans->can_flush_pending_bgs = false; - list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + while (!list_empty(&trans->new_bgs)) { + block_group = list_first_entry(&trans->new_bgs, + struct btrfs_block_group_cache, + bg_list); if (ret) goto next; @@ -10363,7 +10492,7 @@ btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, * more device items and remove one chunk item), but this is done at * btrfs_remove_chunk() through a call to check_system_chunk(). */ - map = (struct map_lookup *)em->bdev; + map = em->map_lookup; num_items = 3 + map->num_stripes; free_extent_map(em); @@ -10409,7 +10538,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); - if (block_group->reserved || + if (block_group->reserved || block_group->pinned || btrfs_block_group_used(&block_group->item) || block_group->ro || list_is_singular(&block_group->list)) { @@ -10601,13 +10730,17 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) * transaction. */ static int btrfs_trim_free_extents(struct btrfs_device *device, - u64 minlen, u64 *trimmed) + struct fstrim_range *range, u64 *trimmed) { - u64 start = 0, len = 0; + u64 start = range->start, len = 0; int ret; *trimmed = 0; + /* Discard not supported = nothing to do. */ + if (!blk_queue_discard(bdev_get_queue(device->bdev))) + return 0; + /* Not writeable = nothing to do. */ if (!device->writeable) return 0; @@ -10635,8 +10768,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, atomic_inc(&trans->use_count); spin_unlock(&fs_info->trans_lock); - ret = find_free_dev_extent_start(trans, device, minlen, start, - &start, &len); + ret = find_free_dev_extent_start(trans, device, range->minlen, + start, &start, &len); if (trans) btrfs_put_transaction(trans); @@ -10648,6 +10781,16 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, break; } + /* If we are out of the passed range break */ + if (start > range->start + range->len - 1) { + mutex_unlock(&fs_info->chunk_mutex); + ret = 0; + break; + } + + start = max(range->start, start); + len = min(range->len, len); + ret = btrfs_issue_discard(device->bdev, start, len, &bytes); up_read(&fs_info->commit_root_sem); mutex_unlock(&fs_info->chunk_mutex); @@ -10658,6 +10801,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, start += len; *trimmed += bytes; + /* We've trimmed enough */ + if (*trimmed >= range->len) + break; + if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -10679,17 +10826,9 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) u64 start; u64 end; u64 trimmed = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret = 0; - /* - * try to trim all FS space, our block group may start from non-zero. - */ - if (range->len == total_bytes) - cache = btrfs_lookup_first_block_group(fs_info, range->start); - else - cache = btrfs_lookup_block_group(fs_info, range->start); - + cache = btrfs_lookup_first_block_group(fs_info, range->start); while (cache) { if (cache->key.objectid >= (range->start + range->len)) { btrfs_put_block_group(cache); @@ -10730,10 +10869,9 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) } mutex_lock(&root->fs_info->fs_devices->device_list_mutex); - devices = &root->fs_info->fs_devices->alloc_list; - list_for_each_entry(device, devices, dev_alloc_list) { - ret = btrfs_trim_free_extents(device, range->minlen, - &group_trimmed); + devices = &root->fs_info->fs_devices->devices; + list_for_each_entry(device, devices, dev_list) { + ret = btrfs_trim_free_extents(device, range, &group_trimmed); if (ret) break; |