btrfs: qgroup: Cleanup the old ref_node-oriented mechanism.
Goodbye, the old mechanisim. Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
Родитель
442244c963
Коммит
e69bcee376
|
@ -1736,7 +1736,7 @@ struct btrfs_fs_info {
|
|||
/* list of dirty qgroups to be written at next commit */
|
||||
struct list_head dirty_qgroups;
|
||||
|
||||
/* used by btrfs_qgroup_record_ref for an efficient tree traversal */
|
||||
/* used by qgroup for an efficient tree traversal */
|
||||
u64 qgroup_seq;
|
||||
|
||||
/* qgroup rescan items */
|
||||
|
|
|
@ -1981,7 +1981,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
|||
u64 refs;
|
||||
int ret;
|
||||
int no_quota = node->no_quota;
|
||||
enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
|
@ -2009,8 +2008,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
|||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
|
||||
refs = btrfs_extent_refs(leaf, item);
|
||||
if (refs)
|
||||
type = BTRFS_QGROUP_OPER_ADD_SHARED;
|
||||
btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
|
||||
if (extent_op)
|
||||
__run_delayed_extent_op(extent_op, leaf, item);
|
||||
|
@ -6112,7 +6109,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
|||
u64 bytenr = node->bytenr;
|
||||
u64 num_bytes = node->num_bytes;
|
||||
int last_ref = 0;
|
||||
enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
|
||||
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
|
||||
SKINNY_METADATA);
|
||||
|
||||
|
@ -6293,7 +6289,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
|||
refs -= refs_to_drop;
|
||||
|
||||
if (refs > 0) {
|
||||
type = BTRFS_QGROUP_OPER_SUB_SHARED;
|
||||
if (extent_op)
|
||||
__run_delayed_extent_op(extent_op, leaf, ei);
|
||||
/*
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "extent_io.h"
|
||||
#include "qgroup.h"
|
||||
|
||||
|
||||
/* TODO XXX FIXME
|
||||
* - subvol delete -> delete when ref goes to 0? delete limits also?
|
||||
* - reorganize keys
|
||||
|
@ -1387,172 +1388,6 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
|
||||
struct btrfs_qgroup_operation *oper2)
|
||||
{
|
||||
/*
|
||||
* Ignore seq and type here, we're looking for any operation
|
||||
* at all related to this extent on that root.
|
||||
*/
|
||||
if (oper1->bytenr < oper2->bytenr)
|
||||
return -1;
|
||||
if (oper1->bytenr > oper2->bytenr)
|
||||
return 1;
|
||||
if (oper1->ref_root < oper2->ref_root)
|
||||
return -1;
|
||||
if (oper1->ref_root > oper2->ref_root)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup_operation *oper)
|
||||
{
|
||||
struct rb_node *n;
|
||||
struct btrfs_qgroup_operation *cur;
|
||||
int cmp;
|
||||
|
||||
spin_lock(&fs_info->qgroup_op_lock);
|
||||
n = fs_info->qgroup_op_tree.rb_node;
|
||||
while (n) {
|
||||
cur = rb_entry(n, struct btrfs_qgroup_operation, n);
|
||||
cmp = comp_oper_exist(cur, oper);
|
||||
if (cmp < 0) {
|
||||
n = n->rb_right;
|
||||
} else if (cmp) {
|
||||
n = n->rb_left;
|
||||
} else {
|
||||
spin_unlock(&fs_info->qgroup_op_lock);
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
spin_unlock(&fs_info->qgroup_op_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int comp_oper(struct btrfs_qgroup_operation *oper1,
|
||||
struct btrfs_qgroup_operation *oper2)
|
||||
{
|
||||
if (oper1->bytenr < oper2->bytenr)
|
||||
return -1;
|
||||
if (oper1->bytenr > oper2->bytenr)
|
||||
return 1;
|
||||
if (oper1->ref_root < oper2->ref_root)
|
||||
return -1;
|
||||
if (oper1->ref_root > oper2->ref_root)
|
||||
return 1;
|
||||
if (oper1->seq < oper2->seq)
|
||||
return -1;
|
||||
if (oper1->seq > oper2->seq)
|
||||
return 1;
|
||||
if (oper1->type < oper2->type)
|
||||
return -1;
|
||||
if (oper1->type > oper2->type)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup_operation *oper)
|
||||
{
|
||||
struct rb_node **p;
|
||||
struct rb_node *parent = NULL;
|
||||
struct btrfs_qgroup_operation *cur;
|
||||
int cmp;
|
||||
|
||||
spin_lock(&fs_info->qgroup_op_lock);
|
||||
p = &fs_info->qgroup_op_tree.rb_node;
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
|
||||
cmp = comp_oper(cur, oper);
|
||||
if (cmp < 0) {
|
||||
p = &(*p)->rb_right;
|
||||
} else if (cmp) {
|
||||
p = &(*p)->rb_left;
|
||||
} else {
|
||||
spin_unlock(&fs_info->qgroup_op_lock);
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
rb_link_node(&oper->n, parent, p);
|
||||
rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
|
||||
spin_unlock(&fs_info->qgroup_op_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Record a quota operation for processing later on.
|
||||
* @trans: the transaction we are adding the delayed op to.
|
||||
* @fs_info: the fs_info for this fs.
|
||||
* @ref_root: the root of the reference we are acting on,
|
||||
* @bytenr: the bytenr we are acting on.
|
||||
* @num_bytes: the number of bytes in the reference.
|
||||
* @type: the type of operation this is.
|
||||
* @mod_seq: do we need to get a sequence number for looking up roots.
|
||||
*
|
||||
* We just add it to our trans qgroup_ref_list and carry on and process these
|
||||
* operations in order at some later point. If the reference root isn't a fs
|
||||
* root then we don't bother with doing anything.
|
||||
*
|
||||
* MUST BE HOLDING THE REF LOCK.
|
||||
*/
|
||||
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 ref_root,
|
||||
u64 bytenr, u64 num_bytes,
|
||||
enum btrfs_qgroup_operation_type type, int mod_seq)
|
||||
{
|
||||
struct btrfs_qgroup_operation *oper;
|
||||
int ret;
|
||||
|
||||
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
|
||||
return 0;
|
||||
|
||||
oper = kmalloc(sizeof(*oper), GFP_NOFS);
|
||||
if (!oper)
|
||||
return -ENOMEM;
|
||||
|
||||
oper->ref_root = ref_root;
|
||||
oper->bytenr = bytenr;
|
||||
oper->num_bytes = num_bytes;
|
||||
oper->type = type;
|
||||
oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
|
||||
INIT_LIST_HEAD(&oper->elem.list);
|
||||
oper->elem.seq = 0;
|
||||
|
||||
trace_btrfs_qgroup_record_ref(oper);
|
||||
|
||||
if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
|
||||
/*
|
||||
* If any operation for this bytenr/ref_root combo
|
||||
* exists, then we know it's not exclusively owned and
|
||||
* shouldn't be queued up.
|
||||
*
|
||||
* This also catches the case where we have a cloned
|
||||
* extent that gets queued up multiple times during
|
||||
* drop snapshot.
|
||||
*/
|
||||
if (qgroup_oper_exists(fs_info, oper)) {
|
||||
kfree(oper);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
ret = insert_qgroup_oper(fs_info, oper);
|
||||
if (ret) {
|
||||
/* Shouldn't happen so have an assert for developers */
|
||||
ASSERT(0);
|
||||
kfree(oper);
|
||||
return ret;
|
||||
}
|
||||
list_add_tail(&oper->list, &trans->qgroup_ref_list);
|
||||
|
||||
if (mod_seq)
|
||||
btrfs_get_tree_mod_seq(fs_info, &oper->elem);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
|
@ -1606,264 +1441,6 @@ struct btrfs_qgroup_extent_record
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* The easy accounting, if we are adding/removing the only ref for an extent
|
||||
* then this qgroup and all of the parent qgroups get their refrence and
|
||||
* exclusive counts adjusted.
|
||||
*/
|
||||
static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup_operation *oper)
|
||||
{
|
||||
struct ulist *tmp;
|
||||
int sign = 0;
|
||||
int ret = 0;
|
||||
|
||||
tmp = ulist_alloc(GFP_NOFS);
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
if (!fs_info->quota_root)
|
||||
goto out;
|
||||
|
||||
switch (oper->type) {
|
||||
case BTRFS_QGROUP_OPER_ADD_EXCL:
|
||||
sign = 1;
|
||||
break;
|
||||
case BTRFS_QGROUP_OPER_SUB_EXCL:
|
||||
sign = -1;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
|
||||
oper->num_bytes, sign);
|
||||
out:
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
ulist_free(tmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk all of the roots that pointed to our bytenr and adjust their refcnts as
|
||||
* properly.
|
||||
*/
|
||||
static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
|
||||
u64 root_to_skip, struct ulist *tmp,
|
||||
struct ulist *roots, struct ulist *qgroups,
|
||||
u64 seq, int *old_roots, int rescan)
|
||||
{
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
struct ulist_node *tmp_unode;
|
||||
struct ulist_iterator tmp_uiter;
|
||||
struct btrfs_qgroup *qg;
|
||||
int ret;
|
||||
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while ((unode = ulist_next(roots, &uiter))) {
|
||||
/* We don't count our current root here */
|
||||
if (unode->val == root_to_skip)
|
||||
continue;
|
||||
qg = find_qgroup_rb(fs_info, unode->val);
|
||||
if (!qg)
|
||||
continue;
|
||||
/*
|
||||
* We could have a pending removal of this same ref so we may
|
||||
* not have actually found our ref root when doing
|
||||
* btrfs_find_all_roots, so we need to keep track of how many
|
||||
* old roots we find in case we removed ours and added a
|
||||
* different one at the same time. I don't think this could
|
||||
* happen in practice but that sort of thinking leads to pain
|
||||
* and suffering and to the dark side.
|
||||
*/
|
||||
(*old_roots)++;
|
||||
|
||||
ulist_reinit(tmp);
|
||||
ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
|
||||
GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ULIST_ITER_INIT(&tmp_uiter);
|
||||
while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
|
||||
struct btrfs_qgroup_list *glist;
|
||||
int mod;
|
||||
|
||||
qg = u64_to_ptr(tmp_unode->aux);
|
||||
/*
|
||||
* We use this sequence number to keep from having to
|
||||
* run the whole list and 0 out the refcnt every time.
|
||||
* We basically use sequnce as the known 0 count and
|
||||
* then add 1 everytime we see a qgroup. This is how we
|
||||
* get how many of the roots actually point up to the
|
||||
* upper level qgroups in order to determine exclusive
|
||||
* counts.
|
||||
*
|
||||
* For rescan none of the extent is recorded before so
|
||||
* we just don't add old_refcnt.
|
||||
*/
|
||||
if (rescan)
|
||||
mod = 0;
|
||||
else
|
||||
mod = 1;
|
||||
btrfs_qgroup_update_old_refcnt(qg, seq, mod);
|
||||
btrfs_qgroup_update_new_refcnt(qg, seq, 1);
|
||||
list_for_each_entry(glist, &qg->groups, next_group) {
|
||||
ret = ulist_add(qgroups, glist->group->qgroupid,
|
||||
ptr_to_u64(glist->group),
|
||||
GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = ulist_add(tmp, glist->group->qgroupid,
|
||||
ptr_to_u64(glist->group),
|
||||
GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to walk forward in our operation tree and account for any roots that
|
||||
* were deleted after we made this operation.
|
||||
*/
|
||||
static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup_operation *oper,
|
||||
struct ulist *tmp,
|
||||
struct ulist *qgroups, u64 seq,
|
||||
int *old_roots)
|
||||
{
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
struct btrfs_qgroup *qg;
|
||||
struct btrfs_qgroup_operation *tmp_oper;
|
||||
struct rb_node *n;
|
||||
int ret;
|
||||
|
||||
ulist_reinit(tmp);
|
||||
|
||||
/*
|
||||
* We only walk forward in the tree since we're only interested in
|
||||
* removals that happened _after_ our operation.
|
||||
*/
|
||||
spin_lock(&fs_info->qgroup_op_lock);
|
||||
n = rb_next(&oper->n);
|
||||
spin_unlock(&fs_info->qgroup_op_lock);
|
||||
if (!n)
|
||||
return 0;
|
||||
tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
|
||||
while (tmp_oper->bytenr == oper->bytenr) {
|
||||
/*
|
||||
* If it's not a removal we don't care, additions work out
|
||||
* properly with our refcnt tracking.
|
||||
*/
|
||||
if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
|
||||
tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
|
||||
goto next;
|
||||
qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
|
||||
if (!qg)
|
||||
goto next;
|
||||
ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
|
||||
GFP_ATOMIC);
|
||||
if (ret) {
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/*
|
||||
* We only want to increase old_roots if this qgroup is
|
||||
* not already in the list of qgroups. If it is already
|
||||
* there then that means it must have been re-added or
|
||||
* the delete will be discarded because we had an
|
||||
* existing ref that we haven't looked up yet. In this
|
||||
* case we don't want to increase old_roots. So if ret
|
||||
* == 1 then we know that this is the first time we've
|
||||
* seen this qgroup and we can bump the old_roots.
|
||||
*/
|
||||
(*old_roots)++;
|
||||
ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
|
||||
GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
next:
|
||||
spin_lock(&fs_info->qgroup_op_lock);
|
||||
n = rb_next(&tmp_oper->n);
|
||||
spin_unlock(&fs_info->qgroup_op_lock);
|
||||
if (!n)
|
||||
break;
|
||||
tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
|
||||
}
|
||||
|
||||
/* Ok now process the qgroups we found */
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while ((unode = ulist_next(tmp, &uiter))) {
|
||||
struct btrfs_qgroup_list *glist;
|
||||
|
||||
qg = u64_to_ptr(unode->aux);
|
||||
btrfs_qgroup_update_old_refcnt(qg, seq, 1);
|
||||
btrfs_qgroup_update_new_refcnt(qg, seq, 1);
|
||||
list_for_each_entry(glist, &qg->groups, next_group) {
|
||||
ret = ulist_add(qgroups, glist->group->qgroupid,
|
||||
ptr_to_u64(glist->group), GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = ulist_add(tmp, glist->group->qgroupid,
|
||||
ptr_to_u64(glist->group), GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Add refcnt for the newly added reference. */
|
||||
static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup_operation *oper,
|
||||
struct btrfs_qgroup *qgroup,
|
||||
struct ulist *tmp, struct ulist *qgroups,
|
||||
u64 seq)
|
||||
{
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
struct btrfs_qgroup *qg;
|
||||
int ret;
|
||||
|
||||
ulist_reinit(tmp);
|
||||
ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
|
||||
GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
|
||||
GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while ((unode = ulist_next(tmp, &uiter))) {
|
||||
struct btrfs_qgroup_list *glist;
|
||||
|
||||
qg = u64_to_ptr(unode->aux);
|
||||
if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED)
|
||||
btrfs_qgroup_update_new_refcnt(qg, seq, 1);
|
||||
else
|
||||
btrfs_qgroup_update_old_refcnt(qg, seq, 1);
|
||||
list_for_each_entry(glist, &qg->groups, next_group) {
|
||||
ret = ulist_add(tmp, glist->group->qgroupid,
|
||||
ptr_to_u64(glist->group), GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = ulist_add(qgroups, glist->group->qgroupid,
|
||||
ptr_to_u64(glist->group), GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define UPDATE_NEW 0
|
||||
#define UPDATE_OLD 1
|
||||
/*
|
||||
|
@ -1925,6 +1502,7 @@ static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
|
|||
/*
|
||||
* Update qgroup rfer/excl counters.
|
||||
* Rfer update is easy, codes can explain themselves.
|
||||
*
|
||||
* Excl update is tricky, the update is split into 2 part.
|
||||
* Part 1: Possible exclusive <-> sharing detect:
|
||||
* | A | !A |
|
||||
|
@ -2042,419 +1620,6 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This adjusts the counters for all referenced qgroups if need be.
|
||||
*/
|
||||
static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
|
||||
u64 root_to_skip, u64 num_bytes,
|
||||
struct ulist *qgroups, u64 seq,
|
||||
int old_roots, int new_roots, int rescan)
|
||||
{
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
struct btrfs_qgroup *qg;
|
||||
u64 cur_new_count, cur_old_count;
|
||||
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while ((unode = ulist_next(qgroups, &uiter))) {
|
||||
bool dirty = false;
|
||||
|
||||
qg = u64_to_ptr(unode->aux);
|
||||
cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
|
||||
cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
|
||||
|
||||
/*
|
||||
* Wasn't referenced before but is now, add to the reference
|
||||
* counters.
|
||||
*/
|
||||
if (cur_old_count == 0 && cur_new_count > 0) {
|
||||
qg->rfer += num_bytes;
|
||||
qg->rfer_cmpr += num_bytes;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Was referenced before but isn't now, subtract from the
|
||||
* reference counters.
|
||||
*/
|
||||
if (cur_old_count > 0 && cur_new_count == 0) {
|
||||
qg->rfer -= num_bytes;
|
||||
qg->rfer_cmpr -= num_bytes;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If our refcount was the same as the roots previously but our
|
||||
* new count isn't the same as the number of roots now then we
|
||||
* went from having a exclusive reference on this range to not.
|
||||
*/
|
||||
if (old_roots && cur_old_count == old_roots &&
|
||||
(cur_new_count != new_roots || new_roots == 0)) {
|
||||
WARN_ON(cur_new_count != new_roots && new_roots == 0);
|
||||
qg->excl -= num_bytes;
|
||||
qg->excl_cmpr -= num_bytes;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we didn't reference all the roots before but now we do we
|
||||
* have an exclusive reference to this range.
|
||||
*/
|
||||
if ((!old_roots || (old_roots && cur_old_count != old_roots))
|
||||
&& cur_new_count == new_roots) {
|
||||
qg->excl += num_bytes;
|
||||
qg->excl_cmpr += num_bytes;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
if (dirty)
|
||||
qgroup_dirty(fs_info, qg);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we removed a data extent and there were other references for that bytenr
|
||||
* then we need to lookup all referenced roots to make sure we still don't
|
||||
* reference this bytenr. If we do then we can just discard this operation.
|
||||
*/
|
||||
static int check_existing_refs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup_operation *oper)
|
||||
{
|
||||
struct ulist *roots = NULL;
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
int ret = 0;
|
||||
|
||||
ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
|
||||
oper->elem.seq, &roots);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = 0;
|
||||
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while ((unode = ulist_next(roots, &uiter))) {
|
||||
if (unode->val == oper->ref_root) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ulist_free(roots);
|
||||
btrfs_put_tree_mod_seq(fs_info, &oper->elem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we share a reference across multiple roots then we may need to adjust
|
||||
* various qgroups referenced and exclusive counters. The basic premise is this
|
||||
*
|
||||
* 1) We have seq to represent a 0 count. Instead of looping through all of the
|
||||
* qgroups and resetting their refcount to 0 we just constantly bump this
|
||||
* sequence number to act as the base reference count. This means that if
|
||||
* anybody is equal to or below this sequence they were never referenced. We
|
||||
* jack this sequence up by the number of roots we found each time in order to
|
||||
* make sure we don't have any overlap.
|
||||
*
|
||||
* 2) We first search all the roots that reference the area _except_ the root
|
||||
* we're acting on currently. This makes up the old_refcnt of all the qgroups
|
||||
* before.
|
||||
*
|
||||
* 3) We walk all of the qgroups referenced by the root we are currently acting
|
||||
* on, and will either adjust old_refcnt in the case of a removal or the
|
||||
* new_refcnt in the case of an addition.
|
||||
*
|
||||
* 4) Finally we walk all the qgroups that are referenced by this range
|
||||
* including the root we are acting on currently. We will adjust the counters
|
||||
* based on the number of roots we had and will have after this operation.
|
||||
*
|
||||
* Take this example as an illustration
|
||||
*
|
||||
* [qgroup 1/0]
|
||||
* / | \
|
||||
* [qg 0/0] [qg 0/1] [qg 0/2]
|
||||
* \ | /
|
||||
* [ extent ]
|
||||
*
|
||||
* Say we are adding a reference that is covered by qg 0/0. The first step
|
||||
* would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
|
||||
* old_roots being 2. Because it is adding new_roots will be 1. We then go
|
||||
* through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
|
||||
* new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
|
||||
* notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
|
||||
* reference and thus must add the size to the referenced bytes. Everything
|
||||
* else is the same so nothing else changes.
|
||||
*/
|
||||
static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup_operation *oper)
|
||||
{
|
||||
struct ulist *roots = NULL;
|
||||
struct ulist *qgroups, *tmp;
|
||||
struct btrfs_qgroup *qgroup;
|
||||
struct seq_list elem = SEQ_LIST_INIT(elem);
|
||||
u64 seq;
|
||||
int old_roots = 0;
|
||||
int new_roots = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (oper->elem.seq) {
|
||||
ret = check_existing_refs(trans, fs_info, oper);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
return 0;
|
||||
}
|
||||
|
||||
qgroups = ulist_alloc(GFP_NOFS);
|
||||
if (!qgroups)
|
||||
return -ENOMEM;
|
||||
|
||||
tmp = ulist_alloc(GFP_NOFS);
|
||||
if (!tmp) {
|
||||
ulist_free(qgroups);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
btrfs_get_tree_mod_seq(fs_info, &elem);
|
||||
ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
|
||||
&roots);
|
||||
btrfs_put_tree_mod_seq(fs_info, &elem);
|
||||
if (ret < 0) {
|
||||
ulist_free(qgroups);
|
||||
ulist_free(tmp);
|
||||
return ret;
|
||||
}
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
qgroup = find_qgroup_rb(fs_info, oper->ref_root);
|
||||
if (!qgroup)
|
||||
goto out;
|
||||
seq = fs_info->qgroup_seq;
|
||||
|
||||
/*
|
||||
* So roots is the list of all the roots currently pointing at the
|
||||
* bytenr, including the ref we are adding if we are adding, or not if
|
||||
* we are removing a ref. So we pass in the ref_root to skip that root
|
||||
* in our calculations. We set old_refnct and new_refcnt cause who the
|
||||
* hell knows what everything looked like before, and it doesn't matter
|
||||
* except...
|
||||
*/
|
||||
ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
|
||||
seq, &old_roots, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Now adjust the refcounts of the qgroups that care about this
|
||||
* reference, either the old_count in the case of removal or new_count
|
||||
* in the case of an addition.
|
||||
*/
|
||||
ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
|
||||
seq);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* ...in the case of removals. If we had a removal before we got around
|
||||
* to processing this operation then we need to find that guy and count
|
||||
* his references as if they really existed so we don't end up screwing
|
||||
* up the exclusive counts. Then whenever we go to process the delete
|
||||
* everything will be grand and we can account for whatever exclusive
|
||||
* changes need to be made there. We also have to pass in old_roots so
|
||||
* we have an accurate count of the roots as it pertains to this
|
||||
* operations view of the world.
|
||||
*/
|
||||
ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
|
||||
&old_roots);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* We are adding our root, need to adjust up the number of roots,
|
||||
* otherwise old_roots is the number of roots we want.
|
||||
*/
|
||||
if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
|
||||
new_roots = old_roots + 1;
|
||||
} else {
|
||||
new_roots = old_roots;
|
||||
old_roots++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bump qgroup_seq to avoid seq overlap
|
||||
* XXX: This makes qgroup_seq mismatch with oper->seq.
|
||||
*/
|
||||
fs_info->qgroup_seq += old_roots + 1;
|
||||
|
||||
|
||||
/*
|
||||
* And now the magic happens, bless Arne for having a pretty elegant
|
||||
* solution for this.
|
||||
*/
|
||||
qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
|
||||
qgroups, seq, old_roots, new_roots, 0);
|
||||
out:
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
ulist_free(qgroups);
|
||||
ulist_free(roots);
|
||||
ulist_free(tmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process a reference to a shared subtree. This type of operation is
|
||||
* queued during snapshot removal when we encounter extents which are
|
||||
* shared between more than one root.
|
||||
*/
|
||||
static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup_operation *oper)
|
||||
{
|
||||
struct ulist *roots = NULL;
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
struct btrfs_qgroup_list *glist;
|
||||
struct ulist *parents;
|
||||
int ret = 0;
|
||||
int err;
|
||||
struct btrfs_qgroup *qg;
|
||||
u64 root_obj = 0;
|
||||
struct seq_list elem = SEQ_LIST_INIT(elem);
|
||||
|
||||
parents = ulist_alloc(GFP_NOFS);
|
||||
if (!parents)
|
||||
return -ENOMEM;
|
||||
|
||||
btrfs_get_tree_mod_seq(fs_info, &elem);
|
||||
ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
|
||||
elem.seq, &roots);
|
||||
btrfs_put_tree_mod_seq(fs_info, &elem);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (roots->nnodes != 1)
|
||||
goto out;
|
||||
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
|
||||
/*
|
||||
* If we find our ref root then that means all refs
|
||||
* this extent has to the root have not yet been
|
||||
* deleted. In that case, we do nothing and let the
|
||||
* last ref for this bytenr drive our update.
|
||||
*
|
||||
* This can happen for example if an extent is
|
||||
* referenced multiple times in a snapshot (clone,
|
||||
* etc). If we are in the middle of snapshot removal,
|
||||
* queued updates for such an extent will find the
|
||||
* root if we have not yet finished removing the
|
||||
* snapshot.
|
||||
*/
|
||||
if (unode->val == oper->ref_root)
|
||||
goto out;
|
||||
|
||||
root_obj = unode->val;
|
||||
BUG_ON(!root_obj);
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
qg = find_qgroup_rb(fs_info, root_obj);
|
||||
if (!qg)
|
||||
goto out_unlock;
|
||||
|
||||
qg->excl += oper->num_bytes;
|
||||
qg->excl_cmpr += oper->num_bytes;
|
||||
qgroup_dirty(fs_info, qg);
|
||||
|
||||
/*
|
||||
* Adjust counts for parent groups. First we find all
|
||||
* parents, then in the 2nd loop we do the adjustment
|
||||
* while adding parents of the parents to our ulist.
|
||||
*/
|
||||
list_for_each_entry(glist, &qg->groups, next_group) {
|
||||
err = ulist_add(parents, glist->group->qgroupid,
|
||||
ptr_to_u64(glist->group), GFP_ATOMIC);
|
||||
if (err < 0) {
|
||||
ret = err;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while ((unode = ulist_next(parents, &uiter))) {
|
||||
qg = u64_to_ptr(unode->aux);
|
||||
qg->excl += oper->num_bytes;
|
||||
qg->excl_cmpr += oper->num_bytes;
|
||||
qgroup_dirty(fs_info, qg);
|
||||
|
||||
/* Add any parents of the parents */
|
||||
list_for_each_entry(glist, &qg->groups, next_group) {
|
||||
err = ulist_add(parents, glist->group->qgroupid,
|
||||
ptr_to_u64(glist->group), GFP_ATOMIC);
|
||||
if (err < 0) {
|
||||
ret = err;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
|
||||
out:
|
||||
ulist_free(roots);
|
||||
ulist_free(parents);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_qgroup_account_ref is called for every ref that is added to or deleted
|
||||
* from the fs. First, all roots referencing the extent are searched, and
|
||||
* then the space is accounted accordingly to the different roots. The
|
||||
* accounting algorithm works in 3 steps documented inline.
|
||||
*/
|
||||
static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup_operation *oper)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (!fs_info->quota_enabled)
|
||||
return 0;
|
||||
|
||||
BUG_ON(!fs_info->quota_root);
|
||||
|
||||
mutex_lock(&fs_info->qgroup_rescan_lock);
|
||||
if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
|
||||
if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
|
||||
ASSERT(is_fstree(oper->ref_root));
|
||||
|
||||
trace_btrfs_qgroup_account(oper);
|
||||
|
||||
switch (oper->type) {
|
||||
case BTRFS_QGROUP_OPER_ADD_EXCL:
|
||||
case BTRFS_QGROUP_OPER_SUB_EXCL:
|
||||
ret = qgroup_excl_accounting(fs_info, oper);
|
||||
break;
|
||||
case BTRFS_QGROUP_OPER_ADD_SHARED:
|
||||
case BTRFS_QGROUP_OPER_SUB_SHARED:
|
||||
ret = qgroup_shared_accounting(trans, fs_info, oper);
|
||||
break;
|
||||
case BTRFS_QGROUP_OPER_SUB_SUBTREE:
|
||||
ret = qgroup_subtree_accounting(trans, fs_info, oper);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
|
@ -2571,31 +1736,6 @@ cleanup:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Needs to be called everytime we run delayed refs, even if there is an error
|
||||
* in order to cleanup outstanding operations.
|
||||
*/
|
||||
int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_qgroup_operation *oper;
|
||||
int ret = 0;
|
||||
|
||||
while (!list_empty(&trans->qgroup_ref_list)) {
|
||||
oper = list_first_entry(&trans->qgroup_ref_list,
|
||||
struct btrfs_qgroup_operation, list);
|
||||
list_del_init(&oper->list);
|
||||
if (!ret || !trans->aborted)
|
||||
ret = btrfs_qgroup_account(trans, fs_info, oper);
|
||||
spin_lock(&fs_info->qgroup_op_lock);
|
||||
rb_erase(&oper->n, &fs_info->qgroup_op_tree);
|
||||
spin_unlock(&fs_info->qgroup_op_lock);
|
||||
btrfs_put_tree_mod_seq(fs_info, &oper->elem);
|
||||
kfree(oper);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* called from commit_transaction. Writes all changed qgroups to disk.
|
||||
*/
|
||||
|
|
|
@ -22,45 +22,6 @@
|
|||
#include "ulist.h"
|
||||
#include "delayed-ref.h"
|
||||
|
||||
/*
|
||||
* A description of the operations, all of these operations only happen when we
|
||||
* are adding the 1st reference for that subvolume in the case of adding space
|
||||
* or on the last reference delete in the case of subtraction. The only
|
||||
* exception is the last one, which is added for confusion.
|
||||
*
|
||||
* BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
|
||||
* one pointing at the bytes we are adding. This is called on the first
|
||||
* allocation.
|
||||
*
|
||||
* BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
|
||||
* shared between subvols. This is called on the creation of a ref that already
|
||||
* has refs from a different subvolume, so basically reflink.
|
||||
*
|
||||
* BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
|
||||
* one referencing the range.
|
||||
*
|
||||
* BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
|
||||
* refs with other subvolumes.
|
||||
*/
|
||||
enum btrfs_qgroup_operation_type {
|
||||
BTRFS_QGROUP_OPER_ADD_EXCL,
|
||||
BTRFS_QGROUP_OPER_ADD_SHARED,
|
||||
BTRFS_QGROUP_OPER_SUB_EXCL,
|
||||
BTRFS_QGROUP_OPER_SUB_SHARED,
|
||||
BTRFS_QGROUP_OPER_SUB_SUBTREE,
|
||||
};
|
||||
|
||||
struct btrfs_qgroup_operation {
|
||||
u64 ref_root;
|
||||
u64 bytenr;
|
||||
u64 num_bytes;
|
||||
u64 seq;
|
||||
enum btrfs_qgroup_operation_type type;
|
||||
struct seq_list elem;
|
||||
struct rb_node n;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
/*
|
||||
* Record a dirty extent, and info qgroup to update quota on it
|
||||
* TODO: Use kmem cache to alloc it.
|
||||
|
@ -93,11 +54,6 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
|
|||
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_delayed_extent_op;
|
||||
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 ref_root,
|
||||
u64 bytenr, u64 num_bytes,
|
||||
enum btrfs_qgroup_operation_type type,
|
||||
int mod_seq);
|
||||
int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_qgroup_extent_record
|
||||
|
@ -110,11 +66,6 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
|
|||
struct ulist *old_roots, struct ulist *new_roots);
|
||||
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup_operation *oper);
|
||||
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
|
||||
|
|
|
@ -1117,61 +1117,6 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy,
|
|||
TP_ARGS(wq)
|
||||
);
|
||||
|
||||
#define show_oper_type(type) \
|
||||
__print_symbolic(type, \
|
||||
{ BTRFS_QGROUP_OPER_ADD_EXCL, "OPER_ADD_EXCL" }, \
|
||||
{ BTRFS_QGROUP_OPER_ADD_SHARED, "OPER_ADD_SHARED" }, \
|
||||
{ BTRFS_QGROUP_OPER_SUB_EXCL, "OPER_SUB_EXCL" }, \
|
||||
{ BTRFS_QGROUP_OPER_SUB_SHARED, "OPER_SUB_SHARED" })
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs_qgroup_oper,
|
||||
|
||||
TP_PROTO(struct btrfs_qgroup_operation *oper),
|
||||
|
||||
TP_ARGS(oper),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( u64, ref_root )
|
||||
__field( u64, bytenr )
|
||||
__field( u64, num_bytes )
|
||||
__field( u64, seq )
|
||||
__field( int, type )
|
||||
__field( u64, elem_seq )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->ref_root = oper->ref_root;
|
||||
__entry->bytenr = oper->bytenr,
|
||||
__entry->num_bytes = oper->num_bytes;
|
||||
__entry->seq = oper->seq;
|
||||
__entry->type = oper->type;
|
||||
__entry->elem_seq = oper->elem.seq;
|
||||
),
|
||||
|
||||
TP_printk("ref_root = %llu, bytenr = %llu, num_bytes = %llu, "
|
||||
"seq = %llu, elem.seq = %llu, type = %s",
|
||||
(unsigned long long)__entry->ref_root,
|
||||
(unsigned long long)__entry->bytenr,
|
||||
(unsigned long long)__entry->num_bytes,
|
||||
(unsigned long long)__entry->seq,
|
||||
(unsigned long long)__entry->elem_seq,
|
||||
show_oper_type(__entry->type))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_account,
|
||||
|
||||
TP_PROTO(struct btrfs_qgroup_operation *oper),
|
||||
|
||||
TP_ARGS(oper)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_record_ref,
|
||||
|
||||
TP_PROTO(struct btrfs_qgroup_operation *oper),
|
||||
|
||||
TP_ARGS(oper)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_BTRFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
|
Загрузка…
Ссылка в новой задаче