btrfs: qgroup: Move half of the qgroup accounting time out of commit trans

Just as Filipe pointed out, the most time consuming parts of qgroup are
btrfs_qgroup_account_extents() and
btrfs_qgroup_prepare_account_extents().
Which both call btrfs_find_all_roots() to get old_roots and new_roots
ulist.

What makes things worse is, we're calling that expensive
btrfs_find_all_roots() at transaction committing time with
TRANS_STATE_COMMIT_DOING, which will blocks all incoming transaction.

Such behavior is necessary for @new_roots search as current
btrfs_find_all_roots() can't do it correctly so we do call it just
before switch commit roots.

However for @old_roots search, it's not necessary as such search is
based on commit_root, so it will always be correct and we can move it
out of transaction committing.

This patch moves the @old_roots search part out of
commit_transaction(), so in theory we can half the time qgroup time
consumption at commit_transaction().

But please note that, this won't speedup qgroup overall, the total time
consumption is still the same, just reduce the performance stall.

Cc: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2017-02-15 10:43:03 +08:00 коммит произвёл David Sterba
Родитель 15b34517a6
Коммит fb235dc06f
3 изменённых файлов: 75 добавлений и 11 удалений

Просмотреть файл

@ -550,13 +550,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_ref_node *ref,
struct btrfs_qgroup_extent_record *qrecord, struct btrfs_qgroup_extent_record *qrecord,
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
int action, int is_data) int action, int is_data, int *qrecord_inserted_ret)
{ {
struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL; struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1; int count_mod = 1;
int must_insert_reserved = 0; int must_insert_reserved = 0;
int qrecord_inserted = 0;
/* If reserved is provided, it must be a data extent. */ /* If reserved is provided, it must be a data extent. */
BUG_ON(!is_data && reserved); BUG_ON(!is_data && reserved);
@ -623,6 +624,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
if(btrfs_qgroup_trace_extent_nolock(fs_info, if(btrfs_qgroup_trace_extent_nolock(fs_info,
delayed_refs, qrecord)) delayed_refs, qrecord))
kfree(qrecord); kfree(qrecord);
else
qrecord_inserted = 1;
} }
spin_lock_init(&head_ref->lock); spin_lock_init(&head_ref->lock);
@ -650,6 +653,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
atomic_inc(&delayed_refs->num_entries); atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++; trans->delayed_ref_updates++;
} }
if (qrecord_inserted_ret)
*qrecord_inserted_ret = qrecord_inserted;
return head_ref; return head_ref;
} }
@ -779,6 +784,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL; struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
BUG_ON(extent_op && extent_op->is_data); BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
@ -806,12 +812,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* the spin lock * the spin lock
*/ */
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, 0, 0, action, 0); bytenr, num_bytes, 0, 0, action, 0,
&qrecord_inserted);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action); num_bytes, parent, ref_root, level, action);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
if (qrecord_inserted)
return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0; return 0;
free_head_ref: free_head_ref:
@ -835,6 +844,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL; struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref) if (!ref)
@ -868,13 +878,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
*/ */
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, ref_root, reserved, bytenr, num_bytes, ref_root, reserved,
action, 1); action, 1, &qrecord_inserted);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset, num_bytes, parent, ref_root, owner, offset,
action); action);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
if (qrecord_inserted)
return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0; return 0;
} }
@ -897,7 +909,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr, add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data); extent_op->is_data, NULL);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
return 0; return 0;

Просмотреть файл

@ -1464,8 +1464,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
while (node) { while (node) {
record = rb_entry(node, struct btrfs_qgroup_extent_record, record = rb_entry(node, struct btrfs_qgroup_extent_record,
node); node);
ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0, if (WARN_ON(!record->old_roots))
&record->old_roots); ret = btrfs_find_all_roots(NULL, fs_info,
record->bytenr, 0, &record->old_roots);
if (ret < 0) if (ret < 0)
break; break;
if (qgroup_to_skip) if (qgroup_to_skip)
@ -1504,6 +1505,28 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
return 0; return 0;
} }
int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_extent_record *qrecord)
{
struct ulist *old_root;
u64 bytenr = qrecord->bytenr;
int ret;
ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
if (ret < 0)
return ret;
/*
* Here we don't need to get the lock of
* trans->transaction->delayed_refs, since inserted qrecord won't
* be deleted, only qrecord->node may be modified (new qrecord insert)
*
* So modifying qrecord->old_roots is safe here
*/
qrecord->old_roots = old_root;
return 0;
}
int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
gfp_t gfp_flag) gfp_t gfp_flag)
@ -1529,9 +1552,11 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
spin_lock(&delayed_refs->lock); spin_lock(&delayed_refs->lock);
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
spin_unlock(&delayed_refs->lock); spin_unlock(&delayed_refs->lock);
if (ret > 0) if (ret > 0) {
kfree(record); kfree(record);
return 0; return 0;
}
return btrfs_qgroup_trace_extent_post(fs_info, record);
} }
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,

Просмотреть файл

@ -94,9 +94,10 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info); struct btrfs_fs_info *fs_info);
/* /*
* Inform qgroup to trace one dirty extent, its info is recorded in @record. * Inform qgroup to trace one dirty extent, its info is recorded in @record.
* So qgroup can account it at commit trans time. * So qgroup can account it at transaction committing time.
* *
* No lock version, caller must acquire delayed ref lock and allocate memory. * No lock version, caller must acquire delayed ref lock and allocated memory,
* then call btrfs_qgroup_trace_extent_post() after exiting lock context.
* *
* Return 0 for success insert * Return 0 for success insert
* Return >0 for existing record, caller can free @record safely. * Return >0 for existing record, caller can free @record safely.
@ -107,12 +108,38 @@ int btrfs_qgroup_trace_extent_nolock(
struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_qgroup_extent_record *record); struct btrfs_qgroup_extent_record *record);
/*
* Post handler after qgroup_trace_extent_nolock().
*
* NOTE: Current qgroup does the expensive backref walk at transaction
* committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
* new transaction.
* This is designed to allow btrfs_find_all_roots() to get correct new_roots
* result.
*
* However for old_roots there is no need to do backref walk at that time,
* since we search commit roots to walk backref and result will always be
* correct.
*
* Due to the nature of no lock version, we can't do backref there.
* So we must call btrfs_qgroup_trace_extent_post() after exiting
* spinlock context.
*
* TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
* using current root, then we can move all expensive backref walk out of
* transaction committing, but not now as qgroup accounting will be wrong again.
*/
int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_extent_record *qrecord);
/* /*
* Inform qgroup to trace one dirty extent, specified by @bytenr and * Inform qgroup to trace one dirty extent, specified by @bytenr and
* @num_bytes. * @num_bytes.
* So qgroup can account it at commit trans time. * So qgroup can account it at commit trans time.
* *
* Better encapsulated version. * Better encapsulated version, with memory allocation and backref walk for
* commit roots.
* So this can sleep.
* *
* Return 0 if the operation is done. * Return 0 if the operation is done.
* Return <0 for error, like memory allocation failure or invalid parameter * Return <0 for error, like memory allocation failure or invalid parameter