aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/qgroup.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-30 20:07:45 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-30 20:07:45 -0700
commit043cd04950431f206f784d1ed9b3fcc5993045f2 (patch)
tree63c3f3c620c6b8a70030185963f2fea7f74e5e86 /fs/btrfs/qgroup.c
parent43baed34bc6b22fdd433b63ec6738f2d28925b1a (diff)
parent5a5003df98d5a7f6834227885b7c9728f767cc27 (diff)
downloadlinux-linaro-stable-043cd04950431f206f784d1ed9b3fcc5993045f2.tar.gz
Merge branch 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "Outside of our usual batch of fixes, this integrates the subvolume quota updates that Qu Wenruo from Fujitsu has been working on for a few releases now. He gets an extra gold star for making btrfs smaller this time, and fixing a number of quota corners in the process. Dave Sterba tested and integrated Anand Jain's sysfs improvements. Outside of exporting a symbol (ack'd by Greg) these are all internal to btrfs and it's mostly cleanups and fixes. Anand also attached some of our sysfs objects to our internal device management structs instead of an object off the super block. It will make device management easier overall and it's a better fit for how the sysfs files are used. None of the existing sysfs files are moved around. Thanks for all the fixes everyone" * 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (87 commits) btrfs: delayed-ref: double free in btrfs_add_delayed_tree_ref() Btrfs: Check if kobject is initialized before put lib: export symbol kobject_move() Btrfs: sysfs: add support to show replacing target in the sysfs Btrfs: free the stale device Btrfs: use received_uuid of parent during send Btrfs: fix use-after-free in btrfs_replay_log btrfs: wait for delayed iputs on no space btrfs: qgroup: Make snapshot accounting work with new extent-oriented qgroup. btrfs: qgroup: Add the ability to skip given qgroup for old/new_roots. btrfs: ulist: Add ulist_del() function. btrfs: qgroup: Cleanup the old ref_node-oriented mechanism. btrfs: qgroup: Switch self test to extent-oriented qgroup mechanism. btrfs: qgroup: Switch to new extent-oriented qgroup mechanism. btrfs: qgroup: Switch rescan to new mechanism. btrfs: qgroup: Add new qgroup calculation function btrfs_qgroup_account_extents(). btrfs: backref: Add special time_seq == (u64)-1 case for btrfs_find_all_roots(). btrfs: qgroup: Add new function to record old_roots. btrfs: qgroup: Record possible quota-related extent for qgroup. btrfs: qgroup: Add function qgroup_update_counters(). ...
Diffstat (limited to 'fs/btrfs/qgroup.c')
-rw-r--r--fs/btrfs/qgroup.c1052
1 files changed, 268 insertions, 784 deletions
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 3d6546581bb9..d5f1f033b7a0 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -34,6 +34,7 @@
#include "extent_io.h"
#include "qgroup.h"
+
/* TODO XXX FIXME
* - subvol delete -> delete when ref goes to 0? delete limits also?
* - reorganize keys
@@ -84,11 +85,42 @@ struct btrfs_qgroup {
/*
* temp variables for accounting operations
+ * Refer to qgroup_shared_accouting() for details.
*/
u64 old_refcnt;
u64 new_refcnt;
};
+static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
+ int mod)
+{
+ if (qg->old_refcnt < seq)
+ qg->old_refcnt = seq;
+ qg->old_refcnt += mod;
+}
+
+static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
+ int mod)
+{
+ if (qg->new_refcnt < seq)
+ qg->new_refcnt = seq;
+ qg->new_refcnt += mod;
+}
+
+static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
+{
+ if (qg->old_refcnt < seq)
+ return 0;
+ return qg->old_refcnt - seq;
+}
+
+static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
+{
+ if (qg->new_refcnt < seq)
+ return 0;
+ return qg->new_refcnt - seq;
+}
+
/*
* glue structure to represent the relations between qgroups.
*/
@@ -1115,14 +1147,14 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct ulist *tmp;
int ret = 0;
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- return -ENOMEM;
-
/* Check the level of src and dst first */
if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
return -EINVAL;
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+
mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
if (!quota_root) {
@@ -1356,239 +1388,86 @@ out:
return ret;
}
-static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
- struct btrfs_qgroup_operation *oper2)
+int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
{
- /*
- * Ignore seq and type here, we're looking for any operation
- * at all related to this extent on that root.
- */
- if (oper1->bytenr < oper2->bytenr)
- return -1;
- if (oper1->bytenr > oper2->bytenr)
- return 1;
- if (oper1->ref_root < oper2->ref_root)
- return -1;
- if (oper1->ref_root > oper2->ref_root)
- return 1;
- return 0;
-}
+ struct btrfs_qgroup_extent_record *record;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct rb_node *node;
+ u64 qgroup_to_skip;
+ int ret = 0;
-static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct rb_node *n;
- struct btrfs_qgroup_operation *cur;
- int cmp;
+ delayed_refs = &trans->transaction->delayed_refs;
+ qgroup_to_skip = delayed_refs->qgroup_to_skip;
- spin_lock(&fs_info->qgroup_op_lock);
- n = fs_info->qgroup_op_tree.rb_node;
- while (n) {
- cur = rb_entry(n, struct btrfs_qgroup_operation, n);
- cmp = comp_oper_exist(cur, oper);
- if (cmp < 0) {
- n = n->rb_right;
- } else if (cmp) {
- n = n->rb_left;
- } else {
- spin_unlock(&fs_info->qgroup_op_lock);
- return -EEXIST;
- }
+ /*
+ * No need to do lock, since this function will only be called in
+ * btrfs_commmit_transaction().
+ */
+ node = rb_first(&delayed_refs->dirty_extent_root);
+ while (node) {
+ record = rb_entry(node, struct btrfs_qgroup_extent_record,
+ node);
+ ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
+ &record->old_roots);
+ if (ret < 0)
+ break;
+ if (qgroup_to_skip)
+ ulist_del(record->old_roots, qgroup_to_skip, 0);
+ node = rb_next(node);
}
- spin_unlock(&fs_info->qgroup_op_lock);
- return 0;
-}
-
-static int comp_oper(struct btrfs_qgroup_operation *oper1,
- struct btrfs_qgroup_operation *oper2)
-{
- if (oper1->bytenr < oper2->bytenr)
- return -1;
- if (oper1->bytenr > oper2->bytenr)
- return 1;
- if (oper1->ref_root < oper2->ref_root)
- return -1;
- if (oper1->ref_root > oper2->ref_root)
- return 1;
- if (oper1->seq < oper2->seq)
- return -1;
- if (oper1->seq > oper2->seq)
- return 1;
- if (oper1->type < oper2->type)
- return -1;
- if (oper1->type > oper2->type)
- return 1;
- return 0;
+ return ret;
}
-static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
+struct btrfs_qgroup_extent_record
+*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_qgroup_extent_record *record)
{
- struct rb_node **p;
- struct rb_node *parent = NULL;
- struct btrfs_qgroup_operation *cur;
- int cmp;
+ struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
+ struct rb_node *parent_node = NULL;
+ struct btrfs_qgroup_extent_record *entry;
+ u64 bytenr = record->bytenr;
- spin_lock(&fs_info->qgroup_op_lock);
- p = &fs_info->qgroup_op_tree.rb_node;
while (*p) {
- parent = *p;
- cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
- cmp = comp_oper(cur, oper);
- if (cmp < 0) {
- p = &(*p)->rb_right;
- } else if (cmp) {
+ parent_node = *p;
+ entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
+ node);
+ if (bytenr < entry->bytenr)
p = &(*p)->rb_left;
- } else {
- spin_unlock(&fs_info->qgroup_op_lock);
- return -EEXIST;
- }
- }
- rb_link_node(&oper->n, parent, p);
- rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
- spin_unlock(&fs_info->qgroup_op_lock);
- return 0;
-}
-
-/*
- * Record a quota operation for processing later on.
- * @trans: the transaction we are adding the delayed op to.
- * @fs_info: the fs_info for this fs.
- * @ref_root: the root of the reference we are acting on,
- * @bytenr: the bytenr we are acting on.
- * @num_bytes: the number of bytes in the reference.
- * @type: the type of operation this is.
- * @mod_seq: do we need to get a sequence number for looking up roots.
- *
- * We just add it to our trans qgroup_ref_list and carry on and process these
- * operations in order at some later point. If the reference root isn't a fs
- * root then we don't bother with doing anything.
- *
- * MUST BE HOLDING THE REF LOCK.
- */
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 ref_root,
- u64 bytenr, u64 num_bytes,
- enum btrfs_qgroup_operation_type type, int mod_seq)
-{
- struct btrfs_qgroup_operation *oper;
- int ret;
-
- if (!is_fstree(ref_root) || !fs_info->quota_enabled)
- return 0;
-
- oper = kmalloc(sizeof(*oper), GFP_NOFS);
- if (!oper)
- return -ENOMEM;
-
- oper->ref_root = ref_root;
- oper->bytenr = bytenr;
- oper->num_bytes = num_bytes;
- oper->type = type;
- oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
- INIT_LIST_HEAD(&oper->elem.list);
- oper->elem.seq = 0;
-
- trace_btrfs_qgroup_record_ref(oper);
-
- if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
- /*
- * If any operation for this bytenr/ref_root combo
- * exists, then we know it's not exclusively owned and
- * shouldn't be queued up.
- *
- * This also catches the case where we have a cloned
- * extent that gets queued up multiple times during
- * drop snapshot.
- */
- if (qgroup_oper_exists(fs_info, oper)) {
- kfree(oper);
- return 0;
- }
- }
-
- ret = insert_qgroup_oper(fs_info, oper);
- if (ret) {
- /* Shouldn't happen so have an assert for developers */
- ASSERT(0);
- kfree(oper);
- return ret;
+ else if (bytenr > entry->bytenr)
+ p = &(*p)->rb_right;
+ else
+ return entry;
}
- list_add_tail(&oper->list, &trans->qgroup_ref_list);
- if (mod_seq)
- btrfs_get_tree_mod_seq(fs_info, &oper->elem);
-
- return 0;
-}
-
-static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct ulist *tmp;
- int sign = 0;
- int ret = 0;
-
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- return -ENOMEM;
-
- spin_lock(&fs_info->qgroup_lock);
- if (!fs_info->quota_root)
- goto out;
-
- switch (oper->type) {
- case BTRFS_QGROUP_OPER_ADD_EXCL:
- sign = 1;
- break;
- case BTRFS_QGROUP_OPER_SUB_EXCL:
- sign = -1;
- break;
- default:
- ASSERT(0);
- }
- ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
- oper->num_bytes, sign);
-out:
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(tmp);
- return ret;
+ rb_link_node(&record->node, parent_node, p);
+ rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
+ return NULL;
}
+#define UPDATE_NEW 0
+#define UPDATE_OLD 1
/*
- * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
- * properly.
+ * Walk all of the roots that points to the bytenr and adjust their refcnts.
*/
-static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
- u64 root_to_skip, struct ulist *tmp,
- struct ulist *roots, struct ulist *qgroups,
- u64 seq, int *old_roots, int rescan)
+static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
+ struct ulist *roots, struct ulist *tmp,
+ struct ulist *qgroups, u64 seq, int update_old)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
struct ulist_node *tmp_unode;
struct ulist_iterator tmp_uiter;
struct btrfs_qgroup *qg;
- int ret;
+ int ret = 0;
+ if (!roots)
+ return 0;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(roots, &uiter))) {
- /* We don't count our current root here */
- if (unode->val == root_to_skip)
- continue;
qg = find_qgroup_rb(fs_info, unode->val);
if (!qg)
continue;
- /*
- * We could have a pending removal of this same ref so we may
- * not have actually found our ref root when doing
- * btrfs_find_all_roots, so we need to keep track of how many
- * old roots we find in case we removed ours and added a
- * different one at the same time. I don't think this could
- * happen in practice but that sort of thinking leads to pain
- * and suffering and to the dark side.
- */
- (*old_roots)++;
ulist_reinit(tmp);
ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
@@ -1603,29 +1482,10 @@ static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_list *glist;
qg = u64_to_ptr(tmp_unode->aux);
- /*
- * We use this sequence number to keep from having to
- * run the whole list and 0 out the refcnt every time.
- * We basically use sequnce as the known 0 count and
- * then add 1 everytime we see a qgroup. This is how we
- * get how many of the roots actually point up to the
- * upper level qgroups in order to determine exclusive
- * counts.
- *
- * For rescan we want to set old_refcnt to seq so our
- * exclusive calculations end up correct.
- */
- if (rescan)
- qg->old_refcnt = seq;
- else if (qg->old_refcnt < seq)
- qg->old_refcnt = seq + 1;
+ if (update_old)
+ btrfs_qgroup_update_old_refcnt(qg, seq, 1);
else
- qg->old_refcnt++;
-
- if (qg->new_refcnt < seq)
- qg->new_refcnt = seq + 1;
- else
- qg->new_refcnt++;
+ btrfs_qgroup_update_new_refcnt(qg, seq, 1);
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(qgroups, glist->group->qgroupid,
ptr_to_u64(glist->group),
@@ -1644,161 +1504,46 @@ static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
}
/*
- * We need to walk forward in our operation tree and account for any roots that
- * were deleted after we made this operation.
- */
-static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper,
- struct ulist *tmp,
- struct ulist *qgroups, u64 seq,
- int *old_roots)
-{
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- struct btrfs_qgroup *qg;
- struct btrfs_qgroup_operation *tmp_oper;
- struct rb_node *n;
- int ret;
-
- ulist_reinit(tmp);
-
- /*
- * We only walk forward in the tree since we're only interested in
- * removals that happened _after_ our operation.
- */
- spin_lock(&fs_info->qgroup_op_lock);
- n = rb_next(&oper->n);
- spin_unlock(&fs_info->qgroup_op_lock);
- if (!n)
- return 0;
- tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
- while (tmp_oper->bytenr == oper->bytenr) {
- /*
- * If it's not a removal we don't care, additions work out
- * properly with our refcnt tracking.
- */
- if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
- tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
- goto next;
- qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
- if (!qg)
- goto next;
- ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
- GFP_ATOMIC);
- if (ret) {
- if (ret < 0)
- return ret;
- /*
- * We only want to increase old_roots if this qgroup is
- * not already in the list of qgroups. If it is already
- * there then that means it must have been re-added or
- * the delete will be discarded because we had an
- * existing ref that we haven't looked up yet. In this
- * case we don't want to increase old_roots. So if ret
- * == 1 then we know that this is the first time we've
- * seen this qgroup and we can bump the old_roots.
- */
- (*old_roots)++;
- ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
- GFP_ATOMIC);
- if (ret < 0)
- return ret;
- }
-next:
- spin_lock(&fs_info->qgroup_op_lock);
- n = rb_next(&tmp_oper->n);
- spin_unlock(&fs_info->qgroup_op_lock);
- if (!n)
- break;
- tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
- }
-
- /* Ok now process the qgroups we found */
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(tmp, &uiter))) {
- struct btrfs_qgroup_list *glist;
-
- qg = u64_to_ptr(unode->aux);
- if (qg->old_refcnt < seq)
- qg->old_refcnt = seq + 1;
- else
- qg->old_refcnt++;
- if (qg->new_refcnt < seq)
- qg->new_refcnt = seq + 1;
- else
- qg->new_refcnt++;
- list_for_each_entry(glist, &qg->groups, next_group) {
- ret = ulist_add(qgroups, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ret = ulist_add(tmp, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
-}
-
-/* Add refcnt for the newly added reference. */
-static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper,
- struct btrfs_qgroup *qgroup,
- struct ulist *tmp, struct ulist *qgroups,
- u64 seq)
-{
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- struct btrfs_qgroup *qg;
- int ret;
-
- ulist_reinit(tmp);
- ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
- GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
- GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(tmp, &uiter))) {
- struct btrfs_qgroup_list *glist;
-
- qg = u64_to_ptr(unode->aux);
- if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
- if (qg->new_refcnt < seq)
- qg->new_refcnt = seq + 1;
- else
- qg->new_refcnt++;
- } else {
- if (qg->old_refcnt < seq)
- qg->old_refcnt = seq + 1;
- else
- qg->old_refcnt++;
- }
- list_for_each_entry(glist, &qg->groups, next_group) {
- ret = ulist_add(tmp, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- ret = ulist_add(qgroups, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
-}
-
-/*
- * This adjusts the counters for all referenced qgroups if need be.
+ * Update qgroup rfer/excl counters.
+ * Rfer update is easy, codes can explain themselves.
+ *
+ * Excl update is tricky, the update is split into 2 part.
+ * Part 1: Possible exclusive <-> sharing detect:
+ * | A | !A |
+ * -------------------------------------
+ * B | * | - |
+ * -------------------------------------
+ * !B | + | ** |
+ * -------------------------------------
+ *
+ * Conditions:
+ * A: cur_old_roots < nr_old_roots (not exclusive before)
+ * !A: cur_old_roots == nr_old_roots (possible exclusive before)
+ * B: cur_new_roots < nr_new_roots (not exclusive now)
+ * !B: cur_new_roots == nr_new_roots (possible exclsuive now)
+ *
+ * Results:
+ * +: Possible sharing -> exclusive -: Possible exclusive -> sharing
+ * *: Definitely not changed. **: Possible unchanged.
+ *
+ * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
+ *
+ * To make the logic clear, we first use condition A and B to split
+ * combination into 4 results.
+ *
+ * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
+ * only on variant maybe 0.
+ *
+ * Lastly, check result **, since there are 2 variants maybe 0, split them
+ * again(2x2).
+ * But this time we don't need to consider other things, the codes and logic
+ * is easy to understand now.
*/
-static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
- u64 root_to_skip, u64 num_bytes,
- struct ulist *qgroups, u64 seq,
- int old_roots, int new_roots, int rescan)
+static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
+ struct ulist *qgroups,
+ u64 nr_old_roots,
+ u64 nr_new_roots,
+ u64 num_bytes, u64 seq)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
@@ -1810,423 +1555,191 @@ static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
bool dirty = false;
qg = u64_to_ptr(unode->aux);
- /*
- * Wasn't referenced before but is now, add to the reference
- * counters.
- */
- if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
+ cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
+ cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
+
+ /* Rfer update part */
+ if (cur_old_count == 0 && cur_new_count > 0) {
qg->rfer += num_bytes;
qg->rfer_cmpr += num_bytes;
dirty = true;
}
-
- /*
- * Was referenced before but isn't now, subtract from the
- * reference counters.
- */
- if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
+ if (cur_old_count > 0 && cur_new_count == 0) {
qg->rfer -= num_bytes;
qg->rfer_cmpr -= num_bytes;
dirty = true;
}
- if (qg->old_refcnt < seq)
- cur_old_count = 0;
- else
- cur_old_count = qg->old_refcnt - seq;
- if (qg->new_refcnt < seq)
- cur_new_count = 0;
- else
- cur_new_count = qg->new_refcnt - seq;
-
- /*
- * If our refcount was the same as the roots previously but our
- * new count isn't the same as the number of roots now then we
- * went from having a exclusive reference on this range to not.
- */
- if (old_roots && cur_old_count == old_roots &&
- (cur_new_count != new_roots || new_roots == 0)) {
- WARN_ON(cur_new_count != new_roots && new_roots == 0);
- qg->excl -= num_bytes;
- qg->excl_cmpr -= num_bytes;
- dirty = true;
+ /* Excl update part */
+ /* Exclusive/none -> shared case */
+ if (cur_old_count == nr_old_roots &&
+ cur_new_count < nr_new_roots) {
+ /* Exclusive -> shared */
+ if (cur_old_count != 0) {
+ qg->excl -= num_bytes;
+ qg->excl_cmpr -= num_bytes;
+ dirty = true;
+ }
}
- /*
- * If we didn't reference all the roots before but now we do we
- * have an exclusive reference to this range.
- */
- if ((!old_roots || (old_roots && cur_old_count != old_roots))
- && cur_new_count == new_roots) {
- qg->excl += num_bytes;
- qg->excl_cmpr += num_bytes;
- dirty = true;
+ /* Shared -> exclusive/none case */
+ if (cur_old_count < nr_old_roots &&
+ cur_new_count == nr_new_roots) {
+ /* Shared->exclusive */
+ if (cur_new_count != 0) {
+ qg->excl += num_bytes;
+ qg->excl_cmpr += num_bytes;
+ dirty = true;
+ }
}
+ /* Exclusive/none -> exclusive/none case */
+ if (cur_old_count == nr_old_roots &&
+ cur_new_count == nr_new_roots) {
+ if (cur_old_count == 0) {
+ /* None -> exclusive/none */
+
+ if (cur_new_count != 0) {
+ /* None -> exclusive */
+ qg->excl += num_bytes;
+ qg->excl_cmpr += num_bytes;
+ dirty = true;
+ }
+ /* None -> none, nothing changed */
+ } else {
+ /* Exclusive -> exclusive/none */
+
+ if (cur_new_count == 0) {
+ /* Exclusive -> none */
+ qg->excl -= num_bytes;
+ qg->excl_cmpr -= num_bytes;
+ dirty = true;
+ }
+ /* Exclusive -> exclusive, nothing changed */
+ }
+ }
if (dirty)
qgroup_dirty(fs_info, qg);
}
return 0;
}
-/*
- * If we removed a data extent and there were other references for that bytenr
- * then we need to lookup all referenced roots to make sure we still don't
- * reference this bytenr. If we do then we can just discard this operation.
- */
-static int check_existing_refs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct ulist *roots = NULL;
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- int ret = 0;
-
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
- oper->elem.seq, &roots);
- if (ret < 0)
- return ret;
- ret = 0;
-
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(roots, &uiter))) {
- if (unode->val == oper->ref_root) {
- ret = 1;
- break;
- }
- }
- ulist_free(roots);
- btrfs_put_tree_mod_seq(fs_info, &oper->elem);
-
- return ret;
-}
-
-/*
- * If we share a reference across multiple roots then we may need to adjust
- * various qgroups referenced and exclusive counters. The basic premise is this
- *
- * 1) We have seq to represent a 0 count. Instead of looping through all of the
- * qgroups and resetting their refcount to 0 we just constantly bump this
- * sequence number to act as the base reference count. This means that if
- * anybody is equal to or below this sequence they were never referenced. We
- * jack this sequence up by the number of roots we found each time in order to
- * make sure we don't have any overlap.
- *
- * 2) We first search all the roots that reference the area _except_ the root
- * we're acting on currently. This makes up the old_refcnt of all the qgroups
- * before.
- *
- * 3) We walk all of the qgroups referenced by the root we are currently acting
- * on, and will either adjust old_refcnt in the case of a removal or the
- * new_refcnt in the case of an addition.
- *
- * 4) Finally we walk all the qgroups that are referenced by this range
- * including the root we are acting on currently. We will adjust the counters
- * based on the number of roots we had and will have after this operation.
- *
- * Take this example as an illustration
- *
- * [qgroup 1/0]
- * / | \
- * [qg 0/0] [qg 0/1] [qg 0/2]
- * \ | /
- * [ extent ]
- *
- * Say we are adding a reference that is covered by qg 0/0. The first step
- * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
- * old_roots being 2. Because it is adding new_roots will be 1. We then go
- * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
- * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
- * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
- * reference and thus must add the size to the referenced bytes. Everything
- * else is the same so nothing else changes.
- */
-static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
+int
+btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 bytenr, u64 num_bytes,
+ struct ulist *old_roots, struct ulist *new_roots)
{
- struct ulist *roots = NULL;
- struct ulist *qgroups, *tmp;
- struct btrfs_qgroup *qgroup;
- struct seq_list elem = SEQ_LIST_INIT(elem);
+ struct ulist *qgroups = NULL;
+ struct ulist *tmp = NULL;
u64 seq;
- int old_roots = 0;
- int new_roots = 0;
+ u64 nr_new_roots = 0;
+ u64 nr_old_roots = 0;
int ret = 0;
- if (oper->elem.seq) {
- ret = check_existing_refs(trans, fs_info, oper);
- if (ret < 0)
- return ret;
- if (ret)
- return 0;
- }
+ if (new_roots)
+ nr_new_roots = new_roots->nnodes;
+ if (old_roots)
+ nr_old_roots = old_roots->nnodes;
- qgroups = ulist_alloc(GFP_NOFS);
- if (!qgroups)
- return -ENOMEM;
+ if (!fs_info->quota_enabled)
+ goto out_free;
+ BUG_ON(!fs_info->quota_root);
+ qgroups = ulist_alloc(GFP_NOFS);
+ if (!qgroups) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
tmp = ulist_alloc(GFP_NOFS);
if (!tmp) {
- ulist_free(qgroups);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_free;
}
- btrfs_get_tree_mod_seq(fs_info, &elem);
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
- &roots);
- btrfs_put_tree_mod_seq(fs_info, &elem);
- if (ret < 0) {
- ulist_free(qgroups);
- ulist_free(tmp);
- return ret;
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ ret = 0;
+ goto out_free;
+ }
}
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
spin_lock(&fs_info->qgroup_lock);
- qgroup = find_qgroup_rb(fs_info, oper->ref_root);
- if (!qgroup)
- goto out;
seq = fs_info->qgroup_seq;
- /*
- * So roots is the list of all the roots currently pointing at the
- * bytenr, including the ref we are adding if we are adding, or not if
- * we are removing a ref. So we pass in the ref_root to skip that root
- * in our calculations. We set old_refnct and new_refcnt cause who the
- * hell knows what everything looked like before, and it doesn't matter
- * except...
- */
- ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
- seq, &old_roots, 0);
+ /* Update old refcnts using old_roots */
+ ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
+ UPDATE_OLD);
if (ret < 0)
goto out;
- /*
- * Now adjust the refcounts of the qgroups that care about this
- * reference, either the old_count in the case of removal or new_count
- * in the case of an addition.
- */
- ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
- seq);
+ /* Update new refcnts using new_roots */
+ ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
+ UPDATE_NEW);
if (ret < 0)
goto out;
- /*
- * ...in the case of removals. If we had a removal before we got around
- * to processing this operation then we need to find that guy and count
- * his references as if they really existed so we don't end up screwing
- * up the exclusive counts. Then whenever we go to process the delete
- * everything will be grand and we can account for whatever exclusive
- * changes need to be made there. We also have to pass in old_roots so
- * we have an accurate count of the roots as it pertains to this
- * operations view of the world.
- */
- ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
- &old_roots);
- if (ret < 0)
- goto out;
+ qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
+ num_bytes, seq);
/*
- * We are adding our root, need to adjust up the number of roots,
- * otherwise old_roots is the number of roots we want.
+ * Bump qgroup_seq to avoid seq overlap
*/
- if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
- new_roots = old_roots + 1;
- } else {
- new_roots = old_roots;
- old_roots++;
- }
- fs_info->qgroup_seq += old_roots + 1;
-
-
- /*
- * And now the magic happens, bless Arne for having a pretty elegant
- * solution for this.
- */
- qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
- qgroups, seq, old_roots, new_roots, 0);
+ fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
out:
spin_unlock(&fs_info->qgroup_lock);
- ulist_free(qgroups);
- ulist_free(roots);
+out_free:
ulist_free(tmp);
+ ulist_free(qgroups);
+ ulist_free(old_roots);
+ ulist_free(new_roots);
return ret;
}
-/*
- * Process a reference to a shared subtree. This type of operation is
- * queued during snapshot removal when we encounter extents which are
- * shared between more than one root.
- */
-static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
-{
- struct ulist *roots = NULL;
- struct ulist_node *unode;
- struct ulist_iterator uiter;
- struct btrfs_qgroup_list *glist;
- struct ulist *parents;
- int ret = 0;
- int err;
- struct btrfs_qgroup *qg;
- u64 root_obj = 0;
- struct seq_list elem = SEQ_LIST_INIT(elem);
-
- parents = ulist_alloc(GFP_NOFS);
- if (!parents)
- return -ENOMEM;
-
- btrfs_get_tree_mod_seq(fs_info, &elem);
- ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
- elem.seq, &roots);
- btrfs_put_tree_mod_seq(fs_info, &elem);
- if (ret < 0)
- goto out;
-
- if (roots->nnodes != 1)
- goto out;
-
- ULIST_ITER_INIT(&uiter);
- unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
- /*
- * If we find our ref root then that means all refs
- * this extent has to the root have not yet been
- * deleted. In that case, we do nothing and let the
- * last ref for this bytenr drive our update.
- *
- * This can happen for example if an extent is
- * referenced multiple times in a snapshot (clone,
- * etc). If we are in the middle of snapshot removal,
- * queued updates for such an extent will find the
- * root if we have not yet finished removing the
- * snapshot.
- */
- if (unode->val == oper->ref_root)
- goto out;
-
- root_obj = unode->val;
- BUG_ON(!root_obj);
-
- spin_lock(&fs_info->qgroup_lock);
- qg = find_qgroup_rb(fs_info, root_obj);
- if (!qg)
- goto out_unlock;
-
- qg->excl += oper->num_bytes;
- qg->excl_cmpr += oper->num_bytes;
- qgroup_dirty(fs_info, qg);
-
- /*
- * Adjust counts for parent groups. First we find all
- * parents, then in the 2nd loop we do the adjustment
- * while adding parents of the parents to our ulist.
- */
- list_for_each_entry(glist, &qg->groups, next_group) {
- err = ulist_add(parents, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (err < 0) {
- ret = err;
- goto out_unlock;
- }
- }
-
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(parents, &uiter))) {
- qg = u64_to_ptr(unode->aux);
- qg->excl += oper->num_bytes;
- qg->excl_cmpr += oper->num_bytes;
- qgroup_dirty(fs_info, qg);
-
- /* Add any parents of the parents */
- list_for_each_entry(glist, &qg->groups, next_group) {
- err = ulist_add(parents, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (err < 0) {
- ret = err;
- goto out_unlock;
- }
- }
- }
-
-out_unlock:
- spin_unlock(&fs_info->qgroup_lock);
-
-out:
- ulist_free(roots);
- ulist_free(parents);
- return ret;
-}
-
-/*
- * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
- * from the fs. First, all roots referencing the extent are searched, and
- * then the space is accounted accordingly to the different roots. The
- * accounting algorithm works in 3 steps documented inline.
- */
-static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_operation *oper)
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
{
+ struct btrfs_qgroup_extent_record *record;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct ulist *new_roots = NULL;
+ struct rb_node *node;
+ u64 qgroup_to_skip;
int ret = 0;
- if (!fs_info->quota_enabled)
- return 0;
-
- BUG_ON(!fs_info->quota_root);
+ delayed_refs = &trans->transaction->delayed_refs;
+ qgroup_to_skip = delayed_refs->qgroup_to_skip;
+ while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
+ record = rb_entry(node, struct btrfs_qgroup_extent_record,
+ node);
- mutex_lock(&fs_info->qgroup_rescan_lock);
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
- if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
- mutex_unlock(&fs_info->qgroup_rescan_lock);
- return 0;
+ if (!ret) {
+ /*
+ * Use (u64)-1 as time_seq to do special search, which
+ * doesn't lock tree or delayed_refs and search current
+ * root. It's safe inside commit_transaction().
+ */
+ ret = btrfs_find_all_roots(trans, fs_info,
+ record->bytenr, (u64)-1, &new_roots);
+ if (ret < 0)
+ goto cleanup;
+ if (qgroup_to_skip)
+ ulist_del(new_roots, qgroup_to_skip, 0);
+ ret = btrfs_qgroup_account_extent(trans, fs_info,
+ record->bytenr, record->num_bytes,
+ record->old_roots, new_roots);
+ record->old_roots = NULL;
+ new_roots = NULL;
}
- }
- mutex_unlock(&fs_info->qgroup_rescan_lock);
+cleanup:
+ ulist_free(record->old_roots);
+ ulist_free(new_roots);
+ new_roots = NULL;
+ rb_erase(node, &delayed_refs->dirty_extent_root);
+ kfree(record);
- ASSERT(is_fstree(oper->ref_root));
-
- trace_btrfs_qgroup_account(oper);
-
- switch (oper->type) {
- case BTRFS_QGROUP_OPER_ADD_EXCL:
- case BTRFS_QGROUP_OPER_SUB_EXCL:
- ret = qgroup_excl_accounting(fs_info, oper);
- break;
- case BTRFS_QGROUP_OPER_ADD_SHARED:
- case BTRFS_QGROUP_OPER_SUB_SHARED:
- ret = qgroup_shared_accounting(trans, fs_info, oper);
- break;
- case BTRFS_QGROUP_OPER_SUB_SUBTREE:
- ret = qgroup_subtree_accounting(trans, fs_info, oper);
- break;
- default:
- ASSERT(0);
- }
- return ret;
-}
-
-/*
- * Needs to be called everytime we run delayed refs, even if there is an error
- * in order to cleanup outstanding operations.
- */
-int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct btrfs_qgroup_operation *oper;
- int ret = 0;
-
- while (!list_empty(&trans->qgroup_ref_list)) {
- oper = list_first_entry(&trans->qgroup_ref_list,
- struct btrfs_qgroup_operation, list);
- list_del_init(&oper->list);
- if (!ret || !trans->aborted)
- ret = btrfs_qgroup_account(trans, fs_info, oper);
- spin_lock(&fs_info->qgroup_op_lock);
- rb_erase(&oper->n, &fs_info->qgroup_op_tree);
- spin_unlock(&fs_info->qgroup_op_lock);
- btrfs_put_tree_mod_seq(fs_info, &oper->elem);
- kfree(oper);
}
return ret;
}
@@ -2637,15 +2150,13 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
*/
static int
qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
- struct btrfs_trans_handle *trans, struct ulist *qgroups,
- struct ulist *tmp, struct extent_buffer *scratch_leaf)
+ struct btrfs_trans_handle *trans,
+ struct extent_buffer *scratch_leaf)
{
struct btrfs_key found;
struct ulist *roots = NULL;
struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
u64 num_bytes;
- u64 seq;
- int new_roots;
int slot;
int ret;
@@ -2695,33 +2206,15 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
else
num_bytes = found.offset;
- ulist_reinit(qgroups);
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
&roots);
if (ret < 0)
goto out;
- spin_lock(&fs_info->qgroup_lock);
- seq = fs_info->qgroup_seq;
- fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
-
- new_roots = 0;
- ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
- seq, &new_roots, 1);
- if (ret < 0) {
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(roots);
- goto out;
- }
-
- ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
- seq, 0, new_roots, 1);
- if (ret < 0) {
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(roots);
+ /* For rescan, just pass old_roots as NULL */
+ ret = btrfs_qgroup_account_extent(trans, fs_info,
+ found.objectid, num_bytes, NULL, roots);
+ if (ret < 0)
goto out;
- }
- spin_unlock(&fs_info->qgroup_lock);
- ulist_free(roots);
}
out:
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
@@ -2735,7 +2228,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
qgroup_rescan_work);
struct btrfs_path *path;
struct btrfs_trans_handle *trans = NULL;
- struct ulist *tmp = NULL, *qgroups = NULL;
struct extent_buffer *scratch_leaf = NULL;
int err = -ENOMEM;
int ret = 0;
@@ -2743,12 +2235,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
path = btrfs_alloc_path();
if (!path)
goto out;
- qgroups = ulist_alloc(GFP_NOFS);
- if (!qgroups)
- goto out;
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- goto out;
scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
if (!scratch_leaf)
goto out;
@@ -2764,7 +2250,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
err = -EINTR;
} else {
err = qgroup_rescan_leaf(fs_info, path, trans,
- qgroups, tmp, scratch_leaf);
+ scratch_leaf);
}
if (err > 0)
btrfs_commit_transaction(trans, fs_info->fs_root);
@@ -2774,8 +2260,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
out:
kfree(scratch_leaf);
- ulist_free(qgroups);
- ulist_free(tmp);
btrfs_free_path(path);
mutex_lock(&fs_info->qgroup_rescan_lock);