btrfs: qgroup: Cleanup the old ref_node-oriented mechanism.

Goodbye, the old mechanisim.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
Qu Wenruo 2015-04-17 10:23:16 +08:00 коммит произвёл Chris Mason
Родитель 442244c963
Коммит e69bcee376
5 изменённых файлов: 3 добавлений и 972 удалений

Просмотреть файл

@ -1736,7 +1736,7 @@ struct btrfs_fs_info {
/* list of dirty qgroups to be written at next commit */
struct list_head dirty_qgroups;
/* used by btrfs_qgroup_record_ref for an efficient tree traversal */
/* used by qgroup for an efficient tree traversal */
u64 qgroup_seq;
/* qgroup rescan items */

Просмотреть файл

@ -1981,7 +1981,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
u64 refs;
int ret;
int no_quota = node->no_quota;
enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
path = btrfs_alloc_path();
if (!path)
@ -2009,8 +2008,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, item);
if (refs)
type = BTRFS_QGROUP_OPER_ADD_SHARED;
btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, item);
@ -6112,7 +6109,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 bytenr = node->bytenr;
u64 num_bytes = node->num_bytes;
int last_ref = 0;
enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA);
@ -6293,7 +6289,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
refs -= refs_to_drop;
if (refs > 0) {
type = BTRFS_QGROUP_OPER_SUB_SHARED;
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, ei);
/*

Просмотреть файл

@ -34,6 +34,7 @@
#include "extent_io.h"
#include "qgroup.h"
/* TODO XXX FIXME
* - subvol delete -> delete when ref goes to 0? delete limits also?
* - reorganize keys
@ -1387,172 +1388,6 @@ out:
return ret;
}
static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
struct btrfs_qgroup_operation *oper2)
{
/*
* Ignore seq and type here, we're looking for any operation
* at all related to this extent on that root.
*/
if (oper1->bytenr < oper2->bytenr)
return -1;
if (oper1->bytenr > oper2->bytenr)
return 1;
if (oper1->ref_root < oper2->ref_root)
return -1;
if (oper1->ref_root > oper2->ref_root)
return 1;
return 0;
}
static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper)
{
struct rb_node *n;
struct btrfs_qgroup_operation *cur;
int cmp;
spin_lock(&fs_info->qgroup_op_lock);
n = fs_info->qgroup_op_tree.rb_node;
while (n) {
cur = rb_entry(n, struct btrfs_qgroup_operation, n);
cmp = comp_oper_exist(cur, oper);
if (cmp < 0) {
n = n->rb_right;
} else if (cmp) {
n = n->rb_left;
} else {
spin_unlock(&fs_info->qgroup_op_lock);
return -EEXIST;
}
}
spin_unlock(&fs_info->qgroup_op_lock);
return 0;
}
static int comp_oper(struct btrfs_qgroup_operation *oper1,
struct btrfs_qgroup_operation *oper2)
{
if (oper1->bytenr < oper2->bytenr)
return -1;
if (oper1->bytenr > oper2->bytenr)
return 1;
if (oper1->ref_root < oper2->ref_root)
return -1;
if (oper1->ref_root > oper2->ref_root)
return 1;
if (oper1->seq < oper2->seq)
return -1;
if (oper1->seq > oper2->seq)
return 1;
if (oper1->type < oper2->type)
return -1;
if (oper1->type > oper2->type)
return 1;
return 0;
}
static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper)
{
struct rb_node **p;
struct rb_node *parent = NULL;
struct btrfs_qgroup_operation *cur;
int cmp;
spin_lock(&fs_info->qgroup_op_lock);
p = &fs_info->qgroup_op_tree.rb_node;
while (*p) {
parent = *p;
cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
cmp = comp_oper(cur, oper);
if (cmp < 0) {
p = &(*p)->rb_right;
} else if (cmp) {
p = &(*p)->rb_left;
} else {
spin_unlock(&fs_info->qgroup_op_lock);
return -EEXIST;
}
}
rb_link_node(&oper->n, parent, p);
rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
spin_unlock(&fs_info->qgroup_op_lock);
return 0;
}
/*
* Record a quota operation for processing later on.
* @trans: the transaction we are adding the delayed op to.
* @fs_info: the fs_info for this fs.
* @ref_root: the root of the reference we are acting on,
* @bytenr: the bytenr we are acting on.
* @num_bytes: the number of bytes in the reference.
* @type: the type of operation this is.
* @mod_seq: do we need to get a sequence number for looking up roots.
*
* We just add it to our trans qgroup_ref_list and carry on and process these
* operations in order at some later point. If the reference root isn't a fs
* root then we don't bother with doing anything.
*
* MUST BE HOLDING THE REF LOCK.
*/
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 ref_root,
u64 bytenr, u64 num_bytes,
enum btrfs_qgroup_operation_type type, int mod_seq)
{
struct btrfs_qgroup_operation *oper;
int ret;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
return 0;
oper = kmalloc(sizeof(*oper), GFP_NOFS);
if (!oper)
return -ENOMEM;
oper->ref_root = ref_root;
oper->bytenr = bytenr;
oper->num_bytes = num_bytes;
oper->type = type;
oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
INIT_LIST_HEAD(&oper->elem.list);
oper->elem.seq = 0;
trace_btrfs_qgroup_record_ref(oper);
if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
/*
* If any operation for this bytenr/ref_root combo
* exists, then we know it's not exclusively owned and
* shouldn't be queued up.
*
* This also catches the case where we have a cloned
* extent that gets queued up multiple times during
* drop snapshot.
*/
if (qgroup_oper_exists(fs_info, oper)) {
kfree(oper);
return 0;
}
}
ret = insert_qgroup_oper(fs_info, oper);
if (ret) {
/* Shouldn't happen so have an assert for developers */
ASSERT(0);
kfree(oper);
return ret;
}
list_add_tail(&oper->list, &trans->qgroup_ref_list);
if (mod_seq)
btrfs_get_tree_mod_seq(fs_info, &oper->elem);
return 0;
}
int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
@ -1606,264 +1441,6 @@ struct btrfs_qgroup_extent_record
return NULL;
}
/*
* The easy accounting, if we are adding/removing the only ref for an extent
* then this qgroup and all of the parent qgroups get their refrence and
* exclusive counts adjusted.
*/
static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper)
{
struct ulist *tmp;
int sign = 0;
int ret = 0;
tmp = ulist_alloc(GFP_NOFS);
if (!tmp)
return -ENOMEM;
spin_lock(&fs_info->qgroup_lock);
if (!fs_info->quota_root)
goto out;
switch (oper->type) {
case BTRFS_QGROUP_OPER_ADD_EXCL:
sign = 1;
break;
case BTRFS_QGROUP_OPER_SUB_EXCL:
sign = -1;
break;
default:
ASSERT(0);
}
ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
oper->num_bytes, sign);
out:
spin_unlock(&fs_info->qgroup_lock);
ulist_free(tmp);
return ret;
}
/*
* Walk all of the roots that pointed to our bytenr and adjust their refcnts as
* properly.
*/
static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
u64 root_to_skip, struct ulist *tmp,
struct ulist *roots, struct ulist *qgroups,
u64 seq, int *old_roots, int rescan)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
struct ulist_node *tmp_unode;
struct ulist_iterator tmp_uiter;
struct btrfs_qgroup *qg;
int ret;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(roots, &uiter))) {
/* We don't count our current root here */
if (unode->val == root_to_skip)
continue;
qg = find_qgroup_rb(fs_info, unode->val);
if (!qg)
continue;
/*
* We could have a pending removal of this same ref so we may
* not have actually found our ref root when doing
* btrfs_find_all_roots, so we need to keep track of how many
* old roots we find in case we removed ours and added a
* different one at the same time. I don't think this could
* happen in practice but that sort of thinking leads to pain
* and suffering and to the dark side.
*/
(*old_roots)++;
ulist_reinit(tmp);
ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
GFP_ATOMIC);
if (ret < 0)
return ret;
ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
if (ret < 0)
return ret;
ULIST_ITER_INIT(&tmp_uiter);
while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
struct btrfs_qgroup_list *glist;
int mod;
qg = u64_to_ptr(tmp_unode->aux);
/*
* We use this sequence number to keep from having to
* run the whole list and 0 out the refcnt every time.
* We basically use sequnce as the known 0 count and
* then add 1 everytime we see a qgroup. This is how we
* get how many of the roots actually point up to the
* upper level qgroups in order to determine exclusive
* counts.
*
* For rescan none of the extent is recorded before so
* we just don't add old_refcnt.
*/
if (rescan)
mod = 0;
else
mod = 1;
btrfs_qgroup_update_old_refcnt(qg, seq, mod);
btrfs_qgroup_update_new_refcnt(qg, seq, 1);
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(qgroups, glist->group->qgroupid,
ptr_to_u64(glist->group),
GFP_ATOMIC);
if (ret < 0)
return ret;
ret = ulist_add(tmp, glist->group->qgroupid,
ptr_to_u64(glist->group),
GFP_ATOMIC);
if (ret < 0)
return ret;
}
}
}
return 0;
}
/*
* We need to walk forward in our operation tree and account for any roots that
* were deleted after we made this operation.
*/
static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper,
struct ulist *tmp,
struct ulist *qgroups, u64 seq,
int *old_roots)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
struct btrfs_qgroup *qg;
struct btrfs_qgroup_operation *tmp_oper;
struct rb_node *n;
int ret;
ulist_reinit(tmp);
/*
* We only walk forward in the tree since we're only interested in
* removals that happened _after_ our operation.
*/
spin_lock(&fs_info->qgroup_op_lock);
n = rb_next(&oper->n);
spin_unlock(&fs_info->qgroup_op_lock);
if (!n)
return 0;
tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
while (tmp_oper->bytenr == oper->bytenr) {
/*
* If it's not a removal we don't care, additions work out
* properly with our refcnt tracking.
*/
if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
goto next;
qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
if (!qg)
goto next;
ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
GFP_ATOMIC);
if (ret) {
if (ret < 0)
return ret;
/*
* We only want to increase old_roots if this qgroup is
* not already in the list of qgroups. If it is already
* there then that means it must have been re-added or
* the delete will be discarded because we had an
* existing ref that we haven't looked up yet. In this
* case we don't want to increase old_roots. So if ret
* == 1 then we know that this is the first time we've
* seen this qgroup and we can bump the old_roots.
*/
(*old_roots)++;
ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
GFP_ATOMIC);
if (ret < 0)
return ret;
}
next:
spin_lock(&fs_info->qgroup_op_lock);
n = rb_next(&tmp_oper->n);
spin_unlock(&fs_info->qgroup_op_lock);
if (!n)
break;
tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
}
/* Ok now process the qgroups we found */
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(tmp, &uiter))) {
struct btrfs_qgroup_list *glist;
qg = u64_to_ptr(unode->aux);
btrfs_qgroup_update_old_refcnt(qg, seq, 1);
btrfs_qgroup_update_new_refcnt(qg, seq, 1);
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(qgroups, glist->group->qgroupid,
ptr_to_u64(glist->group), GFP_ATOMIC);
if (ret < 0)
return ret;
ret = ulist_add(tmp, glist->group->qgroupid,
ptr_to_u64(glist->group), GFP_ATOMIC);
if (ret < 0)
return ret;
}
}
return 0;
}
/* Add refcnt for the newly added reference. */
static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper,
struct btrfs_qgroup *qgroup,
struct ulist *tmp, struct ulist *qgroups,
u64 seq)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
struct btrfs_qgroup *qg;
int ret;
ulist_reinit(tmp);
ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
GFP_ATOMIC);
if (ret < 0)
return ret;
ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
GFP_ATOMIC);
if (ret < 0)
return ret;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(tmp, &uiter))) {
struct btrfs_qgroup_list *glist;
qg = u64_to_ptr(unode->aux);
if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED)
btrfs_qgroup_update_new_refcnt(qg, seq, 1);
else
btrfs_qgroup_update_old_refcnt(qg, seq, 1);
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(tmp, glist->group->qgroupid,
ptr_to_u64(glist->group), GFP_ATOMIC);
if (ret < 0)
return ret;
ret = ulist_add(qgroups, glist->group->qgroupid,
ptr_to_u64(glist->group), GFP_ATOMIC);
if (ret < 0)
return ret;
}
}
return 0;
}
#define UPDATE_NEW 0
#define UPDATE_OLD 1
/*
@ -1925,6 +1502,7 @@ static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
/*
* Update qgroup rfer/excl counters.
* Rfer update is easy, codes can explain themselves.
*
* Excl update is tricky, the update is split into 2 part.
* Part 1: Possible exclusive <-> sharing detect:
* | A | !A |
@ -2042,419 +1620,6 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
return 0;
}
/*
* This adjusts the counters for all referenced qgroups if need be.
*/
static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
u64 root_to_skip, u64 num_bytes,
struct ulist *qgroups, u64 seq,
int old_roots, int new_roots, int rescan)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
struct btrfs_qgroup *qg;
u64 cur_new_count, cur_old_count;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(qgroups, &uiter))) {
bool dirty = false;
qg = u64_to_ptr(unode->aux);
cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
/*
* Wasn't referenced before but is now, add to the reference
* counters.
*/
if (cur_old_count == 0 && cur_new_count > 0) {
qg->rfer += num_bytes;
qg->rfer_cmpr += num_bytes;
dirty = true;
}
/*
* Was referenced before but isn't now, subtract from the
* reference counters.
*/
if (cur_old_count > 0 && cur_new_count == 0) {
qg->rfer -= num_bytes;
qg->rfer_cmpr -= num_bytes;
dirty = true;
}
/*
* If our refcount was the same as the roots previously but our
* new count isn't the same as the number of roots now then we
* went from having a exclusive reference on this range to not.
*/
if (old_roots && cur_old_count == old_roots &&
(cur_new_count != new_roots || new_roots == 0)) {
WARN_ON(cur_new_count != new_roots && new_roots == 0);
qg->excl -= num_bytes;
qg->excl_cmpr -= num_bytes;
dirty = true;
}
/*
* If we didn't reference all the roots before but now we do we
* have an exclusive reference to this range.
*/
if ((!old_roots || (old_roots && cur_old_count != old_roots))
&& cur_new_count == new_roots) {
qg->excl += num_bytes;
qg->excl_cmpr += num_bytes;
dirty = true;
}
if (dirty)
qgroup_dirty(fs_info, qg);
}
return 0;
}
/*
* If we removed a data extent and there were other references for that bytenr
* then we need to lookup all referenced roots to make sure we still don't
* reference this bytenr. If we do then we can just discard this operation.
*/
static int check_existing_refs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper)
{
struct ulist *roots = NULL;
struct ulist_node *unode;
struct ulist_iterator uiter;
int ret = 0;
ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
oper->elem.seq, &roots);
if (ret < 0)
return ret;
ret = 0;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(roots, &uiter))) {
if (unode->val == oper->ref_root) {
ret = 1;
break;
}
}
ulist_free(roots);
btrfs_put_tree_mod_seq(fs_info, &oper->elem);
return ret;
}
/*
* If we share a reference across multiple roots then we may need to adjust
* various qgroups referenced and exclusive counters. The basic premise is this
*
* 1) We have seq to represent a 0 count. Instead of looping through all of the
* qgroups and resetting their refcount to 0 we just constantly bump this
* sequence number to act as the base reference count. This means that if
* anybody is equal to or below this sequence they were never referenced. We
* jack this sequence up by the number of roots we found each time in order to
* make sure we don't have any overlap.
*
* 2) We first search all the roots that reference the area _except_ the root
* we're acting on currently. This makes up the old_refcnt of all the qgroups
* before.
*
* 3) We walk all of the qgroups referenced by the root we are currently acting
* on, and will either adjust old_refcnt in the case of a removal or the
* new_refcnt in the case of an addition.
*
* 4) Finally we walk all the qgroups that are referenced by this range
* including the root we are acting on currently. We will adjust the counters
* based on the number of roots we had and will have after this operation.
*
* Take this example as an illustration
*
* [qgroup 1/0]
* / | \
* [qg 0/0] [qg 0/1] [qg 0/2]
* \ | /
* [ extent ]
*
* Say we are adding a reference that is covered by qg 0/0. The first step
* would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
* old_roots being 2. Because it is adding new_roots will be 1. We then go
* through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
* new_refcnt, bringing it to 3. We then walk through all of the qgroups, we
* notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
* reference and thus must add the size to the referenced bytes. Everything
* else is the same so nothing else changes.
*/
static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper)
{
struct ulist *roots = NULL;
struct ulist *qgroups, *tmp;
struct btrfs_qgroup *qgroup;
struct seq_list elem = SEQ_LIST_INIT(elem);
u64 seq;
int old_roots = 0;
int new_roots = 0;
int ret = 0;
if (oper->elem.seq) {
ret = check_existing_refs(trans, fs_info, oper);
if (ret < 0)
return ret;
if (ret)
return 0;
}
qgroups = ulist_alloc(GFP_NOFS);
if (!qgroups)
return -ENOMEM;
tmp = ulist_alloc(GFP_NOFS);
if (!tmp) {
ulist_free(qgroups);
return -ENOMEM;
}
btrfs_get_tree_mod_seq(fs_info, &elem);
ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
&roots);
btrfs_put_tree_mod_seq(fs_info, &elem);
if (ret < 0) {
ulist_free(qgroups);
ulist_free(tmp);
return ret;
}
spin_lock(&fs_info->qgroup_lock);
qgroup = find_qgroup_rb(fs_info, oper->ref_root);
if (!qgroup)
goto out;
seq = fs_info->qgroup_seq;
/*
* So roots is the list of all the roots currently pointing at the
* bytenr, including the ref we are adding if we are adding, or not if
* we are removing a ref. So we pass in the ref_root to skip that root
* in our calculations. We set old_refnct and new_refcnt cause who the
* hell knows what everything looked like before, and it doesn't matter
* except...
*/
ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
seq, &old_roots, 0);
if (ret < 0)
goto out;
/*
* Now adjust the refcounts of the qgroups that care about this
* reference, either the old_count in the case of removal or new_count
* in the case of an addition.
*/
ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
seq);
if (ret < 0)
goto out;
/*
* ...in the case of removals. If we had a removal before we got around
* to processing this operation then we need to find that guy and count
* his references as if they really existed so we don't end up screwing
* up the exclusive counts. Then whenever we go to process the delete
* everything will be grand and we can account for whatever exclusive
* changes need to be made there. We also have to pass in old_roots so
* we have an accurate count of the roots as it pertains to this
* operations view of the world.
*/
ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
&old_roots);
if (ret < 0)
goto out;
/*
* We are adding our root, need to adjust up the number of roots,
* otherwise old_roots is the number of roots we want.
*/
if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
new_roots = old_roots + 1;
} else {
new_roots = old_roots;
old_roots++;
}
/*
* Bump qgroup_seq to avoid seq overlap
* XXX: This makes qgroup_seq mismatch with oper->seq.
*/
fs_info->qgroup_seq += old_roots + 1;
/*
* And now the magic happens, bless Arne for having a pretty elegant
* solution for this.
*/
qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
qgroups, seq, old_roots, new_roots, 0);
out:
spin_unlock(&fs_info->qgroup_lock);
ulist_free(qgroups);
ulist_free(roots);
ulist_free(tmp);
return ret;
}
/*
* Process a reference to a shared subtree. This type of operation is
* queued during snapshot removal when we encounter extents which are
* shared between more than one root.
*/
static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper)
{
struct ulist *roots = NULL;
struct ulist_node *unode;
struct ulist_iterator uiter;
struct btrfs_qgroup_list *glist;
struct ulist *parents;
int ret = 0;
int err;
struct btrfs_qgroup *qg;
u64 root_obj = 0;
struct seq_list elem = SEQ_LIST_INIT(elem);
parents = ulist_alloc(GFP_NOFS);
if (!parents)
return -ENOMEM;
btrfs_get_tree_mod_seq(fs_info, &elem);
ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
elem.seq, &roots);
btrfs_put_tree_mod_seq(fs_info, &elem);
if (ret < 0)
goto out;
if (roots->nnodes != 1)
goto out;
ULIST_ITER_INIT(&uiter);
unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
/*
* If we find our ref root then that means all refs
* this extent has to the root have not yet been
* deleted. In that case, we do nothing and let the
* last ref for this bytenr drive our update.
*
* This can happen for example if an extent is
* referenced multiple times in a snapshot (clone,
* etc). If we are in the middle of snapshot removal,
* queued updates for such an extent will find the
* root if we have not yet finished removing the
* snapshot.
*/
if (unode->val == oper->ref_root)
goto out;
root_obj = unode->val;
BUG_ON(!root_obj);
spin_lock(&fs_info->qgroup_lock);
qg = find_qgroup_rb(fs_info, root_obj);
if (!qg)
goto out_unlock;
qg->excl += oper->num_bytes;
qg->excl_cmpr += oper->num_bytes;
qgroup_dirty(fs_info, qg);
/*
* Adjust counts for parent groups. First we find all
* parents, then in the 2nd loop we do the adjustment
* while adding parents of the parents to our ulist.
*/
list_for_each_entry(glist, &qg->groups, next_group) {
err = ulist_add(parents, glist->group->qgroupid,
ptr_to_u64(glist->group), GFP_ATOMIC);
if (err < 0) {
ret = err;
goto out_unlock;
}
}
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(parents, &uiter))) {
qg = u64_to_ptr(unode->aux);
qg->excl += oper->num_bytes;
qg->excl_cmpr += oper->num_bytes;
qgroup_dirty(fs_info, qg);
/* Add any parents of the parents */
list_for_each_entry(glist, &qg->groups, next_group) {
err = ulist_add(parents, glist->group->qgroupid,
ptr_to_u64(glist->group), GFP_ATOMIC);
if (err < 0) {
ret = err;
goto out_unlock;
}
}
}
out_unlock:
spin_unlock(&fs_info->qgroup_lock);
out:
ulist_free(roots);
ulist_free(parents);
return ret;
}
/*
* btrfs_qgroup_account_ref is called for every ref that is added to or deleted
* from the fs. First, all roots referencing the extent are searched, and
* then the space is accounted accordingly to the different roots. The
* accounting algorithm works in 3 steps documented inline.
*/
static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper)
{
int ret = 0;
if (!fs_info->quota_enabled)
return 0;
BUG_ON(!fs_info->quota_root);
mutex_lock(&fs_info->qgroup_rescan_lock);
if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
mutex_unlock(&fs_info->qgroup_rescan_lock);
return 0;
}
}
mutex_unlock(&fs_info->qgroup_rescan_lock);
ASSERT(is_fstree(oper->ref_root));
trace_btrfs_qgroup_account(oper);
switch (oper->type) {
case BTRFS_QGROUP_OPER_ADD_EXCL:
case BTRFS_QGROUP_OPER_SUB_EXCL:
ret = qgroup_excl_accounting(fs_info, oper);
break;
case BTRFS_QGROUP_OPER_ADD_SHARED:
case BTRFS_QGROUP_OPER_SUB_SHARED:
ret = qgroup_shared_accounting(trans, fs_info, oper);
break;
case BTRFS_QGROUP_OPER_SUB_SUBTREE:
ret = qgroup_subtree_accounting(trans, fs_info, oper);
break;
default:
ASSERT(0);
}
return ret;
}
int
btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
@ -2571,31 +1736,6 @@ cleanup:
return ret;
}
/*
* Needs to be called everytime we run delayed refs, even if there is an error
* in order to cleanup outstanding operations.
*/
int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_qgroup_operation *oper;
int ret = 0;
while (!list_empty(&trans->qgroup_ref_list)) {
oper = list_first_entry(&trans->qgroup_ref_list,
struct btrfs_qgroup_operation, list);
list_del_init(&oper->list);
if (!ret || !trans->aborted)
ret = btrfs_qgroup_account(trans, fs_info, oper);
spin_lock(&fs_info->qgroup_op_lock);
rb_erase(&oper->n, &fs_info->qgroup_op_tree);
spin_unlock(&fs_info->qgroup_op_lock);
btrfs_put_tree_mod_seq(fs_info, &oper->elem);
kfree(oper);
}
return ret;
}
/*
* called from commit_transaction. Writes all changed qgroups to disk.
*/

Просмотреть файл

@ -22,45 +22,6 @@
#include "ulist.h"
#include "delayed-ref.h"
/*
* A description of the operations, all of these operations only happen when we
* are adding the 1st reference for that subvolume in the case of adding space
* or on the last reference delete in the case of subtraction. The only
* exception is the last one, which is added for confusion.
*
* BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
* one pointing at the bytes we are adding. This is called on the first
* allocation.
*
* BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
* shared between subvols. This is called on the creation of a ref that already
* has refs from a different subvolume, so basically reflink.
*
* BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
* one referencing the range.
*
* BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
* refs with other subvolumes.
*/
enum btrfs_qgroup_operation_type {
BTRFS_QGROUP_OPER_ADD_EXCL,
BTRFS_QGROUP_OPER_ADD_SHARED,
BTRFS_QGROUP_OPER_SUB_EXCL,
BTRFS_QGROUP_OPER_SUB_SHARED,
BTRFS_QGROUP_OPER_SUB_SUBTREE,
};
struct btrfs_qgroup_operation {
u64 ref_root;
u64 bytenr;
u64 num_bytes;
u64 seq;
enum btrfs_qgroup_operation_type type;
struct seq_list elem;
struct rb_node n;
struct list_head list;
};
/*
* Record a dirty extent, and info qgroup to update quota on it
* TODO: Use kmem cache to alloc it.
@ -93,11 +54,6 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 ref_root,
u64 bytenr, u64 num_bytes,
enum btrfs_qgroup_operation_type type,
int mod_seq);
int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
struct btrfs_qgroup_extent_record
@ -110,11 +66,6 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
struct ulist *old_roots, struct ulist *new_roots);
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,

Просмотреть файл

@ -1117,61 +1117,6 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy,
TP_ARGS(wq)
);
#define show_oper_type(type) \
__print_symbolic(type, \
{ BTRFS_QGROUP_OPER_ADD_EXCL, "OPER_ADD_EXCL" }, \
{ BTRFS_QGROUP_OPER_ADD_SHARED, "OPER_ADD_SHARED" }, \
{ BTRFS_QGROUP_OPER_SUB_EXCL, "OPER_SUB_EXCL" }, \
{ BTRFS_QGROUP_OPER_SUB_SHARED, "OPER_SUB_SHARED" })
DECLARE_EVENT_CLASS(btrfs_qgroup_oper,
TP_PROTO(struct btrfs_qgroup_operation *oper),
TP_ARGS(oper),
TP_STRUCT__entry(
__field( u64, ref_root )
__field( u64, bytenr )
__field( u64, num_bytes )
__field( u64, seq )
__field( int, type )
__field( u64, elem_seq )
),
TP_fast_assign(
__entry->ref_root = oper->ref_root;
__entry->bytenr = oper->bytenr,
__entry->num_bytes = oper->num_bytes;
__entry->seq = oper->seq;
__entry->type = oper->type;
__entry->elem_seq = oper->elem.seq;
),
TP_printk("ref_root = %llu, bytenr = %llu, num_bytes = %llu, "
"seq = %llu, elem.seq = %llu, type = %s",
(unsigned long long)__entry->ref_root,
(unsigned long long)__entry->bytenr,
(unsigned long long)__entry->num_bytes,
(unsigned long long)__entry->seq,
(unsigned long long)__entry->elem_seq,
show_oper_type(__entry->type))
);
DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_account,
TP_PROTO(struct btrfs_qgroup_operation *oper),
TP_ARGS(oper)
);
DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_record_ref,
TP_PROTO(struct btrfs_qgroup_operation *oper),
TP_ARGS(oper)
);
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */