Btrfs: Dynamic chunk and block group allocation

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2cd957d..15082b10 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -36,6 +36,10 @@
 				 btrfs_root *extent_root);
 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
 			       btrfs_root *extent_root);
+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *root, u64 bytes_used,
+			   u64 type, u64 chunk_tree, u64 chunk_objectid,
+			   u64 size);
 
 
 static int cache_block_group(struct btrfs_root *root,
@@ -168,16 +172,7 @@
 
 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
 {
-	if ((bits & BLOCK_GROUP_DATA) &&
-	    (cache->flags & BTRFS_BLOCK_GROUP_DATA))
-		return 1;
-	if ((bits & BLOCK_GROUP_METADATA) &&
-	     (cache->flags & BTRFS_BLOCK_GROUP_METADATA))
-		return 1;
-	if ((bits & BLOCK_GROUP_SYSTEM) &&
-	     (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
-		return 1;
-	return 0;
+	return (cache->flags & bits);
 }
 
 static int noinline find_search_start(struct btrfs_root *root,
@@ -276,6 +271,18 @@
 	return num;
 }
 
+static int block_group_state_bits(u64 flags)
+{
+	int bits = 0;
+	if (flags & BTRFS_BLOCK_GROUP_DATA)
+		bits |= BLOCK_GROUP_DATA;
+	if (flags & BTRFS_BLOCK_GROUP_METADATA)
+		bits |= BLOCK_GROUP_METADATA;
+	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+		bits |= BLOCK_GROUP_SYSTEM;
+	return bits;
+}
+
 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
 						 struct btrfs_block_group_cache
 						 *hint, u64 search_start,
@@ -304,7 +311,7 @@
 	if (!owner)
 		factor = 8;
 
-	bit = data;
+	bit = block_group_state_bits(data);
 
 	if (search_start && search_start < total_fs_bytes) {
 		struct btrfs_block_group_cache *shint;
@@ -358,10 +365,15 @@
 			free_check = cache->key.offset;
 		else
 			free_check = div_factor(cache->key.offset, factor);
+
 		if (used + cache->pinned < free_check) {
 			found_group = cache;
 			goto found;
 		}
+		if (full_search) {
+			printk("failed on cache %Lu used %Lu total %Lu\n",
+			       cache->key.objectid, used, cache->key.offset);
+		}
 		cond_resched();
 	}
 	if (!full_search) {
@@ -983,6 +995,58 @@
 	return werr;
 }
 
+static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
+						  u64 flags)
+{
+	struct list_head *head = &info->space_info;
+	struct list_head *cur;
+	struct btrfs_space_info *found;
+	list_for_each(cur, head) {
+		found = list_entry(cur, struct btrfs_space_info, list);
+		if (found->flags == flags)
+			return found;
+	}
+	return NULL;
+
+}
+
+static int do_chunk_alloc(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *extent_root, u64 alloc_bytes,
+			  u64 flags)
+{
+	struct btrfs_space_info *space_info;
+	u64 thresh;
+	u64 start;
+	u64 num_bytes;
+	int ret;
+
+	space_info = __find_space_info(extent_root->fs_info, flags);
+	BUG_ON(!space_info);
+
+	if (space_info->full)
+		return 0;
+
+	thresh = div_factor(space_info->total_bytes, 7);
+	if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
+	    thresh)
+		return 0;
+
+	ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
+	if (ret == -ENOSPC) {
+printk("space info full %Lu\n", flags);
+		space_info->full = 1;
+		return 0;
+	}
+
+	BUG_ON(ret);
+
+	ret = btrfs_make_block_group(trans, extent_root, 0, flags,
+		     extent_root->fs_info->chunk_root->root_key.objectid,
+		     start, num_bytes);
+	BUG_ON(ret);
+	return 0;
+}
+
 static int update_block_group(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      u64 bytenr, u64 num_bytes, int alloc,
@@ -1012,8 +1076,10 @@
 		num_bytes = min(total, cache->key.offset - byte_in_group);
 		if (alloc) {
 			old_val += num_bytes;
+			cache->space_info->bytes_used += num_bytes;
 		} else {
 			old_val -= num_bytes;
+			cache->space_info->bytes_used -= num_bytes;
 			if (mark_free) {
 				set_extent_dirty(&info->free_space_cache,
 						 bytenr, bytenr + num_bytes - 1,
@@ -1026,6 +1092,7 @@
 	}
 	return 0;
 }
+
 static int update_pinned_extents(struct btrfs_root *root,
 				u64 bytenr, u64 num, int pin)
 {
@@ -1047,9 +1114,11 @@
 			  (bytenr - cache->key.objectid));
 		if (pin) {
 			cache->pinned += len;
+			cache->space_info->bytes_pinned += len;
 			fs_info->total_pinned += len;
 		} else {
 			cache->pinned -= len;
+			cache->space_info->bytes_pinned -= len;
 			fs_info->total_pinned -= len;
 		}
 		bytenr += len;
@@ -1472,7 +1541,7 @@
 		goto new_group;
 	}
 
-	if (!(data & BLOCK_GROUP_DATA)) {
+	if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
 		block_group = btrfs_lookup_block_group(info, ins->objectid);
 		if (block_group)
 			trans->block_group = block_group;
@@ -1532,12 +1601,25 @@
 	struct btrfs_path *path;
 	struct btrfs_key keys[2];
 
-	if (data)
-		data = BLOCK_GROUP_DATA;
-	else if (root == root->fs_info->chunk_root)
-		data = BLOCK_GROUP_SYSTEM;
-	else
-		data = BLOCK_GROUP_METADATA;
+	if (data) {
+		data = BTRFS_BLOCK_GROUP_DATA;
+	} else if (root == root->fs_info->chunk_root) {
+		data = BTRFS_BLOCK_GROUP_SYSTEM;
+	} else {
+		data = BTRFS_BLOCK_GROUP_METADATA;
+	}
+
+	if (root->ref_cows) {
+		if (data != BTRFS_BLOCK_GROUP_METADATA) {
+			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+					     num_bytes,
+					     BTRFS_BLOCK_GROUP_METADATA);
+			BUG_ON(ret);
+		}
+		ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+				     num_bytes, data);
+		BUG_ON(ret);
+	}
 
 	new_hint = max(hint_byte, root->fs_info->alloc_start);
 	if (new_hint < btrfs_super_total_bytes(&info->super_copy))
@@ -2490,6 +2572,34 @@
 	return ret;
 }
 
+static int update_space_info(struct btrfs_fs_info *info, u64 flags,
+			     u64 total_bytes, u64 bytes_used,
+			     struct btrfs_space_info **space_info)
+{
+	struct btrfs_space_info *found;
+
+	found = __find_space_info(info, flags);
+	if (found) {
+		found->total_bytes += total_bytes;
+		found->bytes_used += bytes_used;
+		WARN_ON(found->total_bytes < found->bytes_used);
+		*space_info = found;
+		return 0;
+	}
+	found = kmalloc(sizeof(*found), GFP_NOFS);
+	if (!found)
+		return -ENOMEM;
+
+	list_add(&found->list, &info->space_info);
+	found->flags = flags;
+	found->total_bytes = total_bytes;
+	found->bytes_used = bytes_used;
+	found->bytes_pinned = 0;
+	found->full = 0;
+	*space_info = found;
+	return 0;
+}
+
 int btrfs_read_block_groups(struct btrfs_root *root)
 {
 	struct btrfs_path *path;
@@ -2497,6 +2607,7 @@
 	int bit;
 	struct btrfs_block_group_cache *cache;
 	struct btrfs_fs_info *info = root->fs_info;
+	struct btrfs_space_info *space_info;
 	struct extent_io_tree *block_group_cache;
 	struct btrfs_key key;
 	struct btrfs_key found_key;
@@ -2547,6 +2658,12 @@
 			bit = BLOCK_GROUP_METADATA;
 		}
 
+		ret = update_space_info(info, cache->flags, found_key.offset,
+					btrfs_block_group_used(&cache->item),
+					&space_info);
+		BUG_ON(ret);
+		cache->space_info = space_info;
+
 		/* use EXTENT_LOCKED to prevent merging */
 		set_extent_bits(block_group_cache, found_key.objectid,
 				found_key.objectid + found_key.offset - 1,
@@ -2563,3 +2680,58 @@
 	btrfs_free_path(path);
 	return ret;
 }
+
+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *root, u64 bytes_used,
+			   u64 type, u64 chunk_tree, u64 chunk_objectid,
+			   u64 size)
+{
+	int ret;
+	int bit = 0;
+	struct btrfs_root *extent_root;
+	struct btrfs_block_group_cache *cache;
+	struct extent_io_tree *block_group_cache;
+
+	extent_root = root->fs_info->extent_root;
+	block_group_cache = &root->fs_info->block_group_cache;
+
+	cache = kmalloc(sizeof(*cache), GFP_NOFS);
+	BUG_ON(!cache);
+	cache->key.objectid = chunk_objectid;
+	cache->key.offset = size;
+	cache->cached = 0;
+	cache->pinned = 0;
+	btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
+	memset(&cache->item, 0, sizeof(cache->item));
+	btrfs_set_block_group_used(&cache->item, bytes_used);
+	btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
+	btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
+	cache->flags = type;
+	btrfs_set_block_group_flags(&cache->item, type);
+
+	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
+				&cache->space_info);
+	BUG_ON(ret);
+
+	if (type & BTRFS_BLOCK_GROUP_DATA) {
+		bit = BLOCK_GROUP_DATA;
+	} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+		bit = BLOCK_GROUP_SYSTEM;
+	} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
+		bit = BLOCK_GROUP_METADATA;
+	}
+	set_extent_bits(block_group_cache, chunk_objectid,
+			chunk_objectid + size - 1,
+			bit | EXTENT_LOCKED, GFP_NOFS);
+	set_state_private(block_group_cache, chunk_objectid,
+			  (unsigned long)cache);
+
+	ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
+				sizeof(cache->item));
+	BUG_ON(ret);
+
+	finish_current_insert(trans, extent_root);
+	ret = del_pending_extents(trans, extent_root);
+	BUG_ON(ret);
+	return 0;
+}