-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAlrDg0UACgkQxWXV+ddt
 WDvtOQ//QCk0zH2EcPQnqOW6HqAfkkDc7D9P51sK1izNM3vBEYtbuPlY6wp3xnJr
 0hbPjGNU7vMC/4SIwSgEdXyAvlpOr1gm9n+w1GcxpkjLa8l4P+2wt9OX0BSzRUMu
 X7LQxqg2zmQibFy4b1MSmDGsO2dxB2eqvVUT/Ir4b56uqkdtValYRWY75APJIZ5l
 6w0Ja3HVvgOX3pVwSmadCpfMEonN4JE+mfHaP8RajAlTGQcUPq8If9w4BtEoWQRl
 QC7kUCCTmp+isnzH7u4EqEQC6XUqEqeuQH+Bli1pNYTvipHY+9EO1dSxHZoCelgk
 M9PpQz8x+N6ZMcMNtJQVifkfN6tAp/acdWBTZtlpqB8nZR4v5bBndS/5TNBMu3/v
 JfhMEsIUz5o2mWz9qUneyK80RoTRkL5SfdgOclx2Yd7K1fKuJsUjmdXaT08BzotS
 5cxTFZu7EcFJyg4eHemjdyRYr3cUkS19P2uIJle9nj3DAMpCvIyL1c1vn5eB/7MN
 3JeRME6AOQcD7sFgVNuYhGVdIBuwHU6kj4mf1WN27YKGdbsaMZsFz7/HH3SsBLOF
 E0p7Q25HcHSrAimcmLTifN+gol8Y5m6dT0Pjuf9y9QbWvHwh7oRq7DVQ3L8hJkGz
 j64b0jlv43P0QorWvsX6/VJBevWedhe1hZtrBhPFSE0CtJ3Ml4Q=
 =wkSk
 -----END PGP SIGNATURE-----

Merge tag 'for-4.17-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "There are a several user visible changes, the rest is mostly invisible
  and continues to clean up the whole code base.

  User visible changes:
   - new mount option nossd_spread (pair for ssd_spread)

   - mount option subvolid will detect junk after the number and fail
     the mount

   - add message after cancelled device replace

   - direct module dependency on libcrc32, removed own crc wrappers

   - removed user space transaction ioctls

   - use lighter locking when reading /proc/self/mounts, RCU instead of
     mutex to avoid unnecessary contention

  Enhancements:
   - skip writeback of last page when truncating file to same size

   - send: do not issue unnecessary truncate operations

   - mount option token specifiers: use %u for unsigned values, more
     validation

   - selftests: more tree block validations

  qgroups:
   - preparatory work for splitting reservation types for data and
     metadata, this should allow for more accurate tracking and fix some
     issues with underflows or do further enhancements

   - split metadata reservations for started and joined transaction so
     they do not get mixed up and are accounted correctly at commit time

   - with the above, it's possible to revert patch that potentially
     deadlocks when trying to make more space by explicitly committing
     when the quota limit is hit

   - fix root item corruption when multiple same source snapshots are
     created with quota enabled

  RAID56:
   - make sure target is identical to source when raid56 rebuild fails
     after dev-replace

   - faster rebuild during scrub, batch by stripes and not
     block-by-block

   - make more use of cached data when rebuilding from a missing device

  Fixes:
   - null pointer deref when device replace target is missing

   - fix fsync after hole punching when using no-holes feature

   - fix lockdep splat when allocating percpu data with wrong GFP flags

  Cleanups, refactoring, core changes:
   - drop redunant parameters from various functions

   - kill and opencode trivial helpers

   - __cold/__exit function annotations

   - dead code removal

   - continued audit and documentation of memory barriers

   - error handling: handle removal from uuid tree

   - error handling: remove handling of impossible condtitons

   - more debugging or error messages

   - updated tracepoints

   - one VLA use removal (and one still left)"

* tag 'for-4.17-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (164 commits)
  btrfs: lift errors from add_extent_changeset to the callers
  Btrfs: print error messages when failing to read trees
  btrfs: user proper type for btrfs_mask_flags flags
  btrfs: split dev-replace locking helpers for read and write
  btrfs: remove stale comments about fs_mutex
  btrfs: use RCU in btrfs_show_devname for device list traversal
  btrfs: update barrier in should_cow_block
  btrfs: use lockdep_assert_held for mutexes
  btrfs: use lockdep_assert_held for spinlocks
  btrfs: Validate child tree block's level and first key
  btrfs: tests/qgroup: Fix wrong tree backref level
  Btrfs: fix copy_items() return value when logging an inode
  Btrfs: fix fsync after hole punching when using no-holes feature
  btrfs: use helper to set ulist aux from a qgroup
  Revert "btrfs: qgroups: Retry after commit on getting EDQUOT"
  btrfs: qgroup: Update trace events for metadata reservation
  btrfs: qgroup: Use root::qgroup_meta_rsv_* to record qgroup meta reserved space
  btrfs: delayed-inode: Use new qgroup meta rsv for delayed inode and item
  btrfs: qgroup: Use separate meta reservation type for delalloc
  btrfs: qgroup: Introduce function to convert META_PREALLOC into META_PERTRANS
  ...
This commit is contained in:
Linus Torvalds 2018-04-04 13:03:38 -07:00
Родитель 547c43d777 57599c7e77
Коммит 94514bbe9e
68 изменённых файлов: 2079 добавлений и 1702 удалений

Просмотреть файл

@ -1,7 +1,6 @@
config BTRFS_FS
tristate "Btrfs filesystem support"
select CRYPTO
select CRYPTO_CRC32C
select LIBCRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS

Просмотреть файл

@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
uuid-tree.o props.o free-space-tree.o tree-checker.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

Просмотреть файл

@ -46,12 +46,12 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
BUG();
}
size = __btrfs_getxattr(inode, name, "", 0);
size = btrfs_getxattr(inode, name, "", 0);
if (size > 0) {
value = kzalloc(size, GFP_KERNEL);
if (!value)
return ERR_PTR(-ENOMEM);
size = __btrfs_getxattr(inode, name, value, size);
size = btrfs_getxattr(inode, name, value, size);
}
if (size > 0) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
@ -65,9 +65,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
return acl;
}
/*
* Needs to be called with fs_mutex held
*/
static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct posix_acl *acl, int type)
{
@ -101,7 +98,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
goto out;
}
ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
ret = btrfs_setxattr(trans, inode, name, value, size, 0);
out:
kfree(value);
@ -127,11 +124,6 @@ int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
return ret;
}
/*
* btrfs_init_acl is already generally called under fs_mutex, so the locking
* stuff has been fixed to work with that. If the locking stuff changes, we
* need to re-evaluate the acl locking stuff.
*/
int btrfs_init_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir)
{

Просмотреть файл

@ -170,7 +170,7 @@ int __init btrfs_prelim_ref_init(void)
return 0;
}
void btrfs_prelim_ref_exit(void)
void __cold btrfs_prelim_ref_exit(void)
{
kmem_cache_destroy(btrfs_prelim_ref_cache);
}
@ -738,7 +738,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
BUG_ON(ref->key_for_search.type);
BUG_ON(!ref->wanted_disk_byte);
eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0);
eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0,
ref->level - 1, NULL);
if (IS_ERR(eb)) {
free_pref(ref);
return PTR_ERR(eb);
@ -773,15 +774,12 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
struct btrfs_key key;
struct btrfs_key tmp_op_key;
struct btrfs_key *op_key = NULL;
struct rb_node *n;
int count;
int ret = 0;
if (extent_op && extent_op->update_key) {
if (extent_op && extent_op->update_key)
btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
op_key = &tmp_op_key;
}
spin_lock(&head->lock);
for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
@ -1291,7 +1289,8 @@ again:
ref->level == 0) {
struct extent_buffer *eb;
eb = read_tree_block(fs_info, ref->parent, 0);
eb = read_tree_block(fs_info, ref->parent, 0,
ref->level, NULL);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;

Просмотреть файл

@ -73,7 +73,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr);
int __init btrfs_prelim_ref_init(void);
void btrfs_prelim_ref_exit(void);
void __cold btrfs_prelim_ref_exit(void);
struct prelim_ref {
struct rb_node rbnode;

Просмотреть файл

@ -195,7 +195,6 @@ struct btrfs_inode {
/* Hook into fs_info->delayed_iputs */
struct list_head delayed_iput;
long delayed_iput_count;
/*
* To avoid races between lockless (i_mutex not held) direct IO writes
@ -365,6 +364,4 @@ static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
logical_start, csum, csum_expected, mirror_num);
}
bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
#endif

Просмотреть файл

@ -96,9 +96,9 @@
#include <linux/blkdev.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/crc32c.h>
#include "ctree.h"
#include "disk-io.h"
#include "hash.h"
#include "transaction.h"
#include "extent_io.h"
#include "volumes.h"
@ -1736,7 +1736,7 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
size_t sublen = i ? PAGE_SIZE :
(PAGE_SIZE - BTRFS_CSUM_SIZE);
crc = btrfs_crc32c(crc, data, sublen);
crc = crc32c(crc, data, sublen);
}
btrfs_csum_final(crc, csum);
if (memcmp(csum, h->csum, state->csum_size))

Просмотреть файл

@ -1133,7 +1133,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
return ret;
}
void btrfs_exit_compress(void)
void __cold btrfs_exit_compress(void)
{
free_workspaces();
}

Просмотреть файл

@ -76,7 +76,7 @@ struct compressed_bio {
};
void __init btrfs_init_compress(void);
void btrfs_exit_compress(void);
void __cold btrfs_exit_compress(void);
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
u64 start, struct page **pages,

Просмотреть файл

@ -41,8 +41,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *src_buf);
static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
int level, int slot);
static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
struct btrfs_path *btrfs_alloc_path(void)
{
@ -301,11 +299,6 @@ enum mod_log_op {
MOD_LOG_ROOT_REPLACE,
};
struct tree_mod_move {
int dst_slot;
int nr_items;
};
struct tree_mod_root {
u64 logical;
u8 level;
@ -328,32 +321,15 @@ struct tree_mod_elem {
u64 blockptr;
/* this is used for op == MOD_LOG_MOVE_KEYS */
struct tree_mod_move move;
struct {
int dst_slot;
int nr_items;
} move;
/* this is used for op == MOD_LOG_ROOT_REPLACE */
struct tree_mod_root old_root;
};
static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
{
read_lock(&fs_info->tree_mod_log_lock);
}
static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
{
read_unlock(&fs_info->tree_mod_log_lock);
}
static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
{
write_lock(&fs_info->tree_mod_log_lock);
}
static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
{
write_unlock(&fs_info->tree_mod_log_lock);
}
/*
* Pull a new tree mod seq number for our operation.
*/
@ -373,14 +349,14 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem)
{
tree_mod_log_write_lock(fs_info);
write_lock(&fs_info->tree_mod_log_lock);
spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) {
elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);
write_unlock(&fs_info->tree_mod_log_lock);
return elem->seq;
}
@ -422,7 +398,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
* anything that's lower than the lowest existing (read: blocked)
* sequence number can be removed from the tree.
*/
tree_mod_log_write_lock(fs_info);
write_lock(&fs_info->tree_mod_log_lock);
tm_root = &fs_info->tree_mod_log;
for (node = rb_first(tm_root); node; node = next) {
next = rb_next(node);
@ -432,7 +408,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
rb_erase(node, tm_root);
kfree(tm);
}
tree_mod_log_write_unlock(fs_info);
write_unlock(&fs_info->tree_mod_log_lock);
}
/*
@ -443,7 +419,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
* for root replace operations, or the logical address of the affected
* block for all other operations.
*
* Note: must be called with write lock (tree_mod_log_write_lock).
* Note: must be called with write lock for fs_info::tree_mod_log_lock.
*/
static noinline int
__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@ -481,7 +457,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
* Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
* returns zero with the tree_mod_log_lock acquired. The caller must hold
* this until all tree mod log insertions are recorded in the rb tree and then
* call tree_mod_log_write_unlock() to release.
* write unlock fs_info::tree_mod_log_lock.
*/
static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb) {
@ -491,9 +467,9 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
if (eb && btrfs_header_level(eb) == 0)
return 1;
tree_mod_log_write_lock(fs_info);
write_lock(&fs_info->tree_mod_log_lock);
if (list_empty(&(fs_info)->tree_mod_seq_list)) {
tree_mod_log_write_unlock(fs_info);
write_unlock(&fs_info->tree_mod_log_lock);
return 1;
}
@ -536,38 +512,34 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
return tm;
}
static noinline int
tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int slot,
enum mod_log_op op, gfp_t flags)
static noinline int tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
enum mod_log_op op, gfp_t flags)
{
struct tree_mod_elem *tm;
int ret;
if (!tree_mod_need_log(fs_info, eb))
if (!tree_mod_need_log(eb->fs_info, eb))
return 0;
tm = alloc_tree_mod_elem(eb, slot, op, flags);
if (!tm)
return -ENOMEM;
if (tree_mod_dont_log(fs_info, eb)) {
if (tree_mod_dont_log(eb->fs_info, eb)) {
kfree(tm);
return 0;
}
ret = __tree_mod_log_insert(fs_info, tm);
tree_mod_log_write_unlock(fs_info);
ret = __tree_mod_log_insert(eb->fs_info, tm);
write_unlock(&eb->fs_info->tree_mod_log_lock);
if (ret)
kfree(tm);
return ret;
}
static noinline int
tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int dst_slot, int src_slot,
int nr_items)
static noinline int tree_mod_log_insert_move(struct extent_buffer *eb,
int dst_slot, int src_slot, int nr_items)
{
struct tree_mod_elem *tm = NULL;
struct tree_mod_elem **tm_list = NULL;
@ -575,7 +547,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
int i;
int locked = 0;
if (!tree_mod_need_log(fs_info, eb))
if (!tree_mod_need_log(eb->fs_info, eb))
return 0;
tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
@ -603,7 +575,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
}
}
if (tree_mod_dont_log(fs_info, eb))
if (tree_mod_dont_log(eb->fs_info, eb))
goto free_tms;
locked = 1;
@ -613,26 +585,26 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
* buffer, i.e. dst_slot < src_slot.
*/
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
ret = __tree_mod_log_insert(fs_info, tm_list[i]);
ret = __tree_mod_log_insert(eb->fs_info, tm_list[i]);
if (ret)
goto free_tms;
}
ret = __tree_mod_log_insert(fs_info, tm);
ret = __tree_mod_log_insert(eb->fs_info, tm);
if (ret)
goto free_tms;
tree_mod_log_write_unlock(fs_info);
write_unlock(&eb->fs_info->tree_mod_log_lock);
kfree(tm_list);
return 0;
free_tms:
for (i = 0; i < nr_items; i++) {
if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
kfree(tm_list[i]);
}
if (locked)
tree_mod_log_write_unlock(fs_info);
write_unlock(&eb->fs_info->tree_mod_log_lock);
kfree(tm_list);
kfree(tm);
@ -660,12 +632,10 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
return 0;
}
static noinline int
tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *old_root,
struct extent_buffer *new_root,
int log_removal)
static noinline int tree_mod_log_insert_root(struct extent_buffer *old_root,
struct extent_buffer *new_root, int log_removal)
{
struct btrfs_fs_info *fs_info = old_root->fs_info;
struct tree_mod_elem *tm = NULL;
struct tree_mod_elem **tm_list = NULL;
int nritems = 0;
@ -713,7 +683,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
if (!ret)
ret = __tree_mod_log_insert(fs_info, tm);
tree_mod_log_write_unlock(fs_info);
write_unlock(&fs_info->tree_mod_log_lock);
if (ret)
goto free_tms;
kfree(tm_list);
@ -740,7 +710,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
struct tree_mod_elem *cur = NULL;
struct tree_mod_elem *found = NULL;
tree_mod_log_read_lock(fs_info);
read_lock(&fs_info->tree_mod_log_lock);
tm_root = &fs_info->tree_mod_log;
node = tm_root->rb_node;
while (node) {
@ -768,7 +738,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
break;
}
}
tree_mod_log_read_unlock(fs_info);
read_unlock(&fs_info->tree_mod_log_lock);
return found;
}
@ -849,7 +819,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
goto free_tms;
}
tree_mod_log_write_unlock(fs_info);
write_unlock(&fs_info->tree_mod_log_lock);
kfree(tm_list);
return 0;
@ -861,36 +831,13 @@ free_tms:
kfree(tm_list[i]);
}
if (locked)
tree_mod_log_write_unlock(fs_info);
write_unlock(&fs_info->tree_mod_log_lock);
kfree(tm_list);
return ret;
}
static inline void
tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
int dst_offset, int src_offset, int nr_items)
{
int ret;
ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
nr_items);
BUG_ON(ret < 0);
}
static noinline void
tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int slot, int atomic)
{
int ret;
ret = tree_mod_log_insert_key(fs_info, eb, slot,
MOD_LOG_KEY_REPLACE,
atomic ? GFP_ATOMIC : GFP_NOFS);
BUG_ON(ret < 0);
}
static noinline int
tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
static noinline int tree_mod_log_free_eb(struct extent_buffer *eb)
{
struct tree_mod_elem **tm_list = NULL;
int nritems = 0;
@ -900,7 +847,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
if (btrfs_header_level(eb) == 0)
return 0;
if (!tree_mod_need_log(fs_info, NULL))
if (!tree_mod_need_log(eb->fs_info, NULL))
return 0;
nritems = btrfs_header_nritems(eb);
@ -917,11 +864,11 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
}
}
if (tree_mod_dont_log(fs_info, eb))
if (tree_mod_dont_log(eb->fs_info, eb))
goto free_tms;
ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
tree_mod_log_write_unlock(fs_info);
ret = __tree_mod_log_free_eb(eb->fs_info, tm_list, nritems);
write_unlock(&eb->fs_info->tree_mod_log_lock);
if (ret)
goto free_tms;
kfree(tm_list);
@ -936,17 +883,6 @@ free_tms:
return ret;
}
static noinline void
tree_mod_log_set_root_pointer(struct btrfs_root *root,
struct extent_buffer *new_root_node,
int log_removal)
{
int ret;
ret = tree_mod_log_insert_root(root->fs_info, root->node,
new_root_node, log_removal);
BUG_ON(ret < 0);
}
/*
* check if the tree block can be shared by multiple trees
*/
@ -1173,7 +1109,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
parent_start = buf->start;
extent_buffer_get(cow);
tree_mod_log_set_root_pointer(root, cow, 1);
ret = tree_mod_log_insert_root(root->node, cow, 1);
BUG_ON(ret < 0);
rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, root, buf, parent_start,
@ -1182,7 +1119,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
add_root_to_dirty_list(root);
} else {
WARN_ON(trans->transid != btrfs_header_generation(parent));
tree_mod_log_insert_key(fs_info, parent, parent_slot,
tree_mod_log_insert_key(parent, parent_slot,
MOD_LOG_KEY_REPLACE, GFP_NOFS);
btrfs_set_node_blockptr(parent, parent_slot,
cow->start);
@ -1190,7 +1127,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid);
btrfs_mark_buffer_dirty(parent);
if (last_ref) {
ret = tree_mod_log_free_eb(fs_info, buf);
ret = tree_mod_log_free_eb(buf);
if (ret) {
btrfs_abort_transaction(trans, ret);
return ret;
@ -1211,9 +1148,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
* returns the logical address of the oldest predecessor of the given root.
* entries older than time_seq are ignored.
*/
static struct tree_mod_elem *
__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb_root, u64 time_seq)
static struct tree_mod_elem *__tree_mod_log_oldest_root(
struct extent_buffer *eb_root, u64 time_seq)
{
struct tree_mod_elem *tm;
struct tree_mod_elem *found = NULL;
@ -1230,7 +1166,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
* first operation that's logged for this root.
*/
while (1) {
tm = tree_mod_log_search_oldest(fs_info, root_logical,
tm = tree_mod_log_search_oldest(eb_root->fs_info, root_logical,
time_seq);
if (!looped && !tm)
return NULL;
@ -1279,7 +1215,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
unsigned long p_size = sizeof(struct btrfs_key_ptr);
n = btrfs_header_nritems(eb);
tree_mod_log_read_lock(fs_info);
read_lock(&fs_info->tree_mod_log_lock);
while (tm && tm->seq >= time_seq) {
/*
* all the operations are recorded with the operator used for
@ -1334,7 +1270,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
if (tm->logical != first_tm->logical)
break;
}
tree_mod_log_read_unlock(fs_info);
read_unlock(&fs_info->tree_mod_log_lock);
btrfs_set_header_nritems(eb, n);
}
@ -1418,9 +1354,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
struct tree_mod_root *old_root = NULL;
u64 old_generation = 0;
u64 logical;
int level;
eb_root = btrfs_read_lock_root_node(root);
tm = __tree_mod_log_oldest_root(fs_info, eb_root, time_seq);
tm = __tree_mod_log_oldest_root(eb_root, time_seq);
if (!tm)
return eb_root;
@ -1428,15 +1365,17 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
old_root = &tm->old_root;
old_generation = tm->generation;
logical = old_root->logical;
level = old_root->level;
} else {
logical = eb_root->start;
level = btrfs_header_level(eb_root);
}
tm = tree_mod_log_search(fs_info, logical, time_seq);
if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
old = read_tree_block(fs_info, logical, 0);
old = read_tree_block(fs_info, logical, 0, level, NULL);
if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
if (!IS_ERR(old))
free_extent_buffer(old);
@ -1484,7 +1423,7 @@ int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
int level;
struct extent_buffer *eb_root = btrfs_root_node(root);
tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
tm = __tree_mod_log_oldest_root(eb_root, time_seq);
if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
level = tm->old_root.level;
} else {
@ -1502,8 +1441,8 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
if (btrfs_is_testing(root->fs_info))
return 0;
/* ensure we can see the force_cow */
smp_rmb();
/* Ensure we can see the FORCE_COW bit */
smp_mb__before_atomic();
/*
* We do not need to cow a block if
@ -1656,6 +1595,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
btrfs_set_lock_blocking(parent);
for (i = start_slot; i <= end_slot; i++) {
struct btrfs_key first_key;
int close = 1;
btrfs_node_key(parent, &disk_key, i);
@ -1665,6 +1605,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
progress_passed = 1;
blocknr = btrfs_node_blockptr(parent, i);
gen = btrfs_node_ptr_generation(parent, i);
btrfs_node_key_to_cpu(parent, &first_key, i);
if (last_block == 0)
last_block = blocknr;
@ -1688,7 +1629,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
uptodate = 0;
if (!cur || !uptodate) {
if (!cur) {
cur = read_tree_block(fs_info, blocknr, gen);
cur = read_tree_block(fs_info, blocknr, gen,
parent_level - 1,
&first_key);
if (IS_ERR(cur)) {
return PTR_ERR(cur);
} else if (!extent_buffer_uptodate(cur)) {
@ -1696,7 +1639,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
return -EIO;
}
} else if (!uptodate) {
err = btrfs_read_buffer(cur, gen);
err = btrfs_read_buffer(cur, gen,
parent_level - 1,&first_key);
if (err) {
free_extent_buffer(cur);
return err;
@ -1849,14 +1793,17 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
{
int level = btrfs_header_level(parent);
struct extent_buffer *eb;
struct btrfs_key first_key;
if (slot < 0 || slot >= btrfs_header_nritems(parent))
return ERR_PTR(-ENOENT);
BUG_ON(level == 0);
btrfs_node_key_to_cpu(parent, &first_key, slot);
eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
btrfs_node_ptr_generation(parent, slot));
btrfs_node_ptr_generation(parent, slot),
level - 1, &first_key);
if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
eb = ERR_PTR(-EIO);
@ -1928,7 +1875,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto enospc;
}
tree_mod_log_set_root_pointer(root, child, 1);
ret = tree_mod_log_insert_root(root->node, child, 1);
BUG_ON(ret < 0);
rcu_assign_pointer(root->node, child);
add_root_to_dirty_list(root);
@ -2007,8 +1955,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
} else {
struct btrfs_disk_key right_key;
btrfs_node_key(right, &right_key, 0);
tree_mod_log_set_node_key(fs_info, parent,
pslot + 1, 0);
ret = tree_mod_log_insert_key(parent, pslot + 1,
MOD_LOG_KEY_REPLACE, GFP_NOFS);
BUG_ON(ret < 0);
btrfs_set_node_key(parent, &right_key, pslot + 1);
btrfs_mark_buffer_dirty(parent);
}
@ -2052,7 +2001,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* update the parent key to reflect our changes */
struct btrfs_disk_key mid_key;
btrfs_node_key(mid, &mid_key, 0);
tree_mod_log_set_node_key(fs_info, parent, pslot, 0);
ret = tree_mod_log_insert_key(parent, pslot,
MOD_LOG_KEY_REPLACE, GFP_NOFS);
BUG_ON(ret < 0);
btrfs_set_node_key(parent, &mid_key, pslot);
btrfs_mark_buffer_dirty(parent);
}
@ -2153,7 +2104,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
orig_slot += left_nr;
btrfs_node_key(mid, &disk_key, 0);
tree_mod_log_set_node_key(fs_info, parent, pslot, 0);
ret = tree_mod_log_insert_key(parent, pslot,
MOD_LOG_KEY_REPLACE, GFP_NOFS);
BUG_ON(ret < 0);
btrfs_set_node_key(parent, &disk_key, pslot);
btrfs_mark_buffer_dirty(parent);
if (btrfs_header_nritems(left) > orig_slot) {
@ -2207,8 +2160,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
btrfs_node_key(right, &disk_key, 0);
tree_mod_log_set_node_key(fs_info, parent,
pslot + 1, 0);
ret = tree_mod_log_insert_key(parent, pslot + 1,
MOD_LOG_KEY_REPLACE, GFP_NOFS);
BUG_ON(ret < 0);
btrfs_set_node_key(parent, &disk_key, pslot + 1);
btrfs_mark_buffer_dirty(parent);
@ -2445,10 +2399,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
u64 gen;
struct extent_buffer *b = *eb_ret;
struct extent_buffer *tmp;
struct btrfs_key first_key;
int ret;
int parent_level;
blocknr = btrfs_node_blockptr(b, slot);
gen = btrfs_node_ptr_generation(b, slot);
parent_level = btrfs_header_level(b);
btrfs_node_key_to_cpu(b, &first_key, slot);
tmp = find_extent_buffer(fs_info, blocknr);
if (tmp) {
@ -2467,7 +2425,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
btrfs_set_path_blocking(p);
/* now we're allowed to do a blocking uptodate check */
ret = btrfs_read_buffer(tmp, gen);
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
if (!ret) {
*eb_ret = tmp;
return 0;
@ -2494,7 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
btrfs_release_path(p);
ret = -EAGAIN;
tmp = read_tree_block(fs_info, blocknr, 0);
tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
&first_key);
if (!IS_ERR(tmp)) {
/*
* If the read above didn't mark this buffer up to date,
@ -3161,13 +3120,17 @@ static void fixup_low_keys(struct btrfs_fs_info *fs_info,
{
int i;
struct extent_buffer *t;
int ret;
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
int tslot = path->slots[i];
if (!path->nodes[i])
break;
t = path->nodes[i];
tree_mod_log_set_node_key(fs_info, t, tslot, 1);
ret = tree_mod_log_insert_key(t, tslot, MOD_LOG_KEY_REPLACE,
GFP_ATOMIC);
BUG_ON(ret < 0);
btrfs_set_node_key(t, key, tslot);
btrfs_mark_buffer_dirty(path->nodes[i]);
if (tslot != 0)
@ -3264,8 +3227,8 @@ static int push_node_left(struct btrfs_trans_handle *trans,
if (push_items < src_nritems) {
/*
* don't call tree_mod_log_eb_move here, key removal was already
* fully logged by tree_mod_log_eb_copy above.
* Don't call tree_mod_log_insert_move here, key removal was
* already fully logged by tree_mod_log_eb_copy above.
*/
memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(push_items),
@ -3320,7 +3283,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
if (max_push < push_items)
push_items = max_push;
tree_mod_log_eb_move(fs_info, dst, push_items, 0, dst_nritems);
ret = tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
BUG_ON(ret < 0);
memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
btrfs_node_key_ptr_offset(0),
(dst_nritems) *
@ -3363,6 +3327,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
struct extent_buffer *c;
struct extent_buffer *old;
struct btrfs_disk_key lower_key;
int ret;
BUG_ON(path->nodes[level]);
BUG_ON(path->nodes[level-1] != root->node);
@ -3401,7 +3366,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
old = root->node;
tree_mod_log_set_root_pointer(root, c, 0);
ret = tree_mod_log_insert_root(root->node, c, 0);
BUG_ON(ret < 0);
rcu_assign_pointer(root->node, c);
/* the super has an extra ref to root->node */
@ -3438,17 +3404,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
BUG_ON(slot > nritems);
BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(fs_info));
if (slot != nritems) {
if (level)
tree_mod_log_eb_move(fs_info, lower, slot + 1,
slot, nritems - slot);
if (level) {
ret = tree_mod_log_insert_move(lower, slot + 1, slot,
nritems - slot);
BUG_ON(ret < 0);
}
memmove_extent_buffer(lower,
btrfs_node_key_ptr_offset(slot + 1),
btrfs_node_key_ptr_offset(slot),
(nritems - slot) * sizeof(struct btrfs_key_ptr));
}
if (level) {
ret = tree_mod_log_insert_key(fs_info, lower, slot,
MOD_LOG_KEY_ADD, GFP_NOFS);
ret = tree_mod_log_insert_key(lower, slot, MOD_LOG_KEY_ADD,
GFP_NOFS);
BUG_ON(ret < 0);
}
btrfs_set_node_key(lower, key, slot);
@ -4911,17 +4879,19 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
nritems = btrfs_header_nritems(parent);
if (slot != nritems - 1) {
if (level)
tree_mod_log_eb_move(fs_info, parent, slot,
slot + 1, nritems - slot - 1);
if (level) {
ret = tree_mod_log_insert_move(parent, slot, slot + 1,
nritems - slot - 1);
BUG_ON(ret < 0);
}
memmove_extent_buffer(parent,
btrfs_node_key_ptr_offset(slot),
btrfs_node_key_ptr_offset(slot + 1),
sizeof(struct btrfs_key_ptr) *
(nritems - slot - 1));
} else if (level) {
ret = tree_mod_log_insert_key(fs_info, parent, slot,
MOD_LOG_KEY_REMOVE, GFP_NOFS);
ret = tree_mod_log_insert_key(parent, slot, MOD_LOG_KEY_REMOVE,
GFP_NOFS);
BUG_ON(ret < 0);
}
@ -5145,9 +5115,6 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
* into min_key, so you can call btrfs_search_slot with cow=1 on the
* key and get a writable path.
*
* This does lock as it descends, and path->keep_locks should be set
* to 1 by the caller.
*
* This honors path->lowest_level to prevent descent past a given level
* of the tree.
*

Просмотреть файл

@ -40,6 +40,7 @@
#include <linux/sizes.h>
#include <linux/dynamic_debug.h>
#include <linux/refcount.h>
#include <linux/crc32c.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
@ -65,6 +66,8 @@ struct btrfs_ordered_sum;
#define BTRFS_MAX_LEVEL 8
#define BTRFS_OLDEST_GENERATION 0ULL
#define BTRFS_COMPAT_EXTENT_TREE_V0
/*
@ -86,9 +89,9 @@ struct btrfs_ordered_sum;
*/
#define BTRFS_LINK_MAX 65535U
/* four bytes for CRC32 */
static const int btrfs_csum_sizes[] = { 4 };
/* four bytes for CRC32 */
#define BTRFS_EMPTY_DIR_SIZE 0
/* ioprio of readahead is set to idle */
@ -98,6 +101,7 @@ static const int btrfs_csum_sizes[] = { 4 };
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
/*
* Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
*/
@ -381,8 +385,9 @@ struct btrfs_dev_replace {
/* For raid type sysfs entries */
struct raid_kobject {
int raid_type;
u64 flags;
struct kobject kobj;
struct list_head list;
};
struct btrfs_space_info {
@ -707,7 +712,6 @@ struct btrfs_delayed_root;
#define BTRFS_FS_LOG_RECOVERING 4
#define BTRFS_FS_OPEN 5
#define BTRFS_FS_QUOTA_ENABLED 6
#define BTRFS_FS_QUOTA_ENABLING 7
#define BTRFS_FS_UPDATE_UUID_TREE_GEN 9
#define BTRFS_FS_CREATING_FREE_SPACE_TREE 10
#define BTRFS_FS_BTREE_ERR 11
@ -788,7 +792,7 @@ struct btrfs_fs_info {
unsigned long pending_changes;
unsigned long compress_type:4;
unsigned int compress_level;
int commit_interval;
u32 commit_interval;
/*
* It is a suggestive number, the read side is safe even it gets a
* wrong number because we will write out the data into a regular
@ -877,7 +881,6 @@ struct btrfs_fs_info {
struct rb_root tree_mod_log;
atomic_t async_delalloc_pages;
atomic_t open_ioctl_trans;
/*
* this is used to protect the following list -- ordered_roots.
@ -935,9 +938,11 @@ struct btrfs_fs_info {
struct btrfs_workqueue *extent_workers;
struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread;
int thread_pool_size;
u32 thread_pool_size;
struct kobject *space_info_kobj;
struct list_head pending_raid_kobjs;
spinlock_t pending_raid_kobjs_lock; /* uncontended */
u64 total_pinned;
@ -952,9 +957,9 @@ struct btrfs_fs_info {
struct btrfs_fs_devices *fs_devices;
/*
* the space_info list is almost entirely read only. It only changes
* when we add a new raid type to the FS, and that happens
* very rarely. RCU is used to protect it.
* The space_info list is effectively read only after initial
* setup. It is populated at mount time and cleaned up after
* all block groups are removed. RCU is used to protect it.
*/
struct list_head space_info;
@ -993,8 +998,8 @@ struct btrfs_fs_info {
struct btrfs_balance_control *balance_ctl;
wait_queue_head_t balance_wait_q;
unsigned data_chunk_allocations;
unsigned metadata_ratio;
u32 data_chunk_allocations;
u32 metadata_ratio;
void *bdev_holder;
@ -1260,12 +1265,13 @@ struct btrfs_root {
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshotted;
/* For qgroup metadata space reserve */
atomic64_t qgroup_meta_rsv;
/* For qgroup metadata reserved space */
spinlock_t qgroup_meta_rsv_lock;
u64 qgroup_meta_rsv_pertrans;
u64 qgroup_meta_rsv_prealloc;
};
struct btrfs_file_private {
struct btrfs_trans_handle *trans;
void *filldir_buf;
};
@ -2554,6 +2560,20 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right,
((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \
btrfs_item_offset_nr(leaf, slot)))
static inline u64 btrfs_name_hash(const char *name, int len)
{
return crc32c((u32)~1, name, len);
}
/*
* Figure the key offset of an extended inode ref
*/
static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
int len)
{
return (u64) crc32c(parent_objectid, name, len);
}
static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
{
return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
@ -2608,7 +2628,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, unsigned long count);
unsigned long count);
int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
unsigned long count, u64 transid, int wait);
int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
@ -2628,7 +2648,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
u64 bytenr);
void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int get_block_group_index(struct btrfs_block_group_cache *cache);
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
@ -2668,15 +2687,13 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len);
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
@ -2688,6 +2705,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytes_used,
u64 type, u64 chunk_offset, u64 size);
void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
struct btrfs_fs_info *fs_info,
const u64 chunk_offset);
@ -2697,8 +2715,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info);
u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info);
u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info);
@ -2730,11 +2747,10 @@ int btrfs_check_data_free_space(struct inode *inode,
void btrfs_free_reserved_data_space(struct inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
void btrfs_delalloc_release_space(struct inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
struct extent_changeset *reserved,
u64 start, u64 len, bool qgroup_free);
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
u64 len);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode);
@ -2745,10 +2761,12 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
u64 *qgroup_reserved, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free);
int btrfs_delalloc_reserve_space(struct inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
@ -2792,7 +2810,6 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int __get_raid_index(u64 flags);
int btrfs_start_write_no_snapshotting(struct btrfs_root *root);
void btrfs_end_write_no_snapshotting(struct btrfs_root *root);
void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
@ -3195,8 +3212,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode);
int __init btrfs_init_cachep(void);
void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file);
void __cold btrfs_destroy_cachep(void);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *was_new);
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
@ -3246,7 +3262,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
/* file.c */
int __init btrfs_auto_defrag_init(void);
void btrfs_auto_defrag_exit(void);
void __cold btrfs_auto_defrag_exit(void);
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode);
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
@ -3281,25 +3297,23 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
/* sysfs.c */
int __init btrfs_init_sysfs(void);
void btrfs_exit_sysfs(void);
void __cold btrfs_exit_sysfs(void);
int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info);
void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
/* xattr.c */
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
unsigned long new_flags);
int btrfs_sync_fs(struct super_block *sb, int wait);
static inline __printf(2, 3)
static inline __printf(2, 3) __cold
void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
}
#ifdef CONFIG_PRINTK
__printf(2, 3)
__cold
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
#else
#define btrfs_printk(fs_info, fmt, args...) \

Просмотреть файл

@ -23,6 +23,7 @@
#include "disk-io.h"
#include "transaction.h"
#include "ctree.h"
#include "qgroup.h"
#define BTRFS_DELAYED_WRITEBACK 512
#define BTRFS_DELAYED_BACKGROUND 128
@ -42,7 +43,7 @@ int __init btrfs_delayed_inode_init(void)
return 0;
}
void btrfs_delayed_inode_exit(void)
void __cold btrfs_delayed_inode_exit(void)
{
kmem_cache_destroy(delayed_node_cache);
}
@ -552,11 +553,12 @@ static struct btrfs_delayed_item *__btrfs_next_delayed_item(
}
static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_root *root,
struct btrfs_delayed_item *item)
{
struct btrfs_block_rsv *src_rsv;
struct btrfs_block_rsv *dst_rsv;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 num_bytes;
int ret;
@ -578,15 +580,17 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
return ret;
}
static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
struct btrfs_delayed_item *item)
{
struct btrfs_block_rsv *rsv;
struct btrfs_fs_info *fs_info = root->fs_info;
if (!item->bytes_reserved)
return;
rsv = &fs_info->delayed_block_rsv;
btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved);
trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid, item->bytes_reserved,
0);
@ -611,6 +615,9 @@ static int btrfs_delayed_inode_reserve_metadata(
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
if (ret < 0)
return ret;
/*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
@ -630,8 +637,10 @@ static int btrfs_delayed_inode_reserve_metadata(
* EAGAIN to make us stop the transaction we have, so return
* ENOSPC instead so that btrfs_dirty_inode knows what to do.
*/
if (ret == -EAGAIN)
if (ret == -EAGAIN) {
ret = -ENOSPC;
btrfs_qgroup_free_meta_prealloc(root, num_bytes);
}
if (!ret) {
node->bytes_reserved = num_bytes;
trace_btrfs_space_reservation(fs_info,
@ -653,7 +662,8 @@ static int btrfs_delayed_inode_reserve_metadata(
}
static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_node *node)
struct btrfs_delayed_node *node,
bool qgroup_free)
{
struct btrfs_block_rsv *rsv;
@ -665,6 +675,12 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
node->inode_id, node->bytes_reserved, 0);
btrfs_block_rsv_release(fs_info, rsv,
node->bytes_reserved);
if (qgroup_free)
btrfs_qgroup_free_meta_prealloc(node->root,
node->bytes_reserved);
else
btrfs_qgroup_convert_reserved_meta(node->root,
node->bytes_reserved);
node->bytes_reserved = 0;
}
@ -766,7 +782,7 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
curr->data_len);
slot++;
btrfs_delayed_item_release_metadata(fs_info, curr);
btrfs_delayed_item_release_metadata(root, curr);
list_del(&curr->tree_list);
btrfs_release_delayed_item(curr);
@ -788,7 +804,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_delayed_item *delayed_item)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
char *ptr;
int ret;
@ -806,7 +821,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
delayed_item->data_len);
btrfs_mark_buffer_dirty(leaf);
btrfs_delayed_item_release_metadata(fs_info, delayed_item);
btrfs_delayed_item_release_metadata(root, delayed_item);
return 0;
}
@ -858,7 +873,6 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_delayed_item *item)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_delayed_item *curr, *next;
struct extent_buffer *leaf;
struct btrfs_key key;
@ -908,7 +922,7 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
goto out;
list_for_each_entry_safe(curr, next, &head, tree_list) {
btrfs_delayed_item_release_metadata(fs_info, curr);
btrfs_delayed_item_release_metadata(root, curr);
list_del(&curr->tree_list);
btrfs_release_delayed_item(curr);
}
@ -1051,7 +1065,7 @@ out:
no_iref:
btrfs_release_path(path);
err_out:
btrfs_delayed_inode_release_metadata(fs_info, node);
btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
btrfs_release_delayed_inode(node);
return ret;
@ -1115,9 +1129,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
* Returns < 0 on error and returns with an aborted transaction with any
* outstanding delayed items cleaned up.
*/
static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, int nr)
static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_path *path;
@ -1162,16 +1176,14 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
return ret;
}
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
{
return __btrfs_run_delayed_items(trans, fs_info, -1);
return __btrfs_run_delayed_items(trans, -1);
}
int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, int nr)
int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
{
return __btrfs_run_delayed_items(trans, fs_info, nr);
return __btrfs_run_delayed_items(trans, nr);
}
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
@ -1443,7 +1455,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
btrfs_set_stack_dir_type(dir_item, type);
memcpy((char *)(dir_item + 1), name, name_len);
ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, delayed_item);
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
/*
* we have reserved enough space when we start a new transaction,
* so reserving metadata failure is impossible
@ -1480,7 +1492,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
return 1;
}
btrfs_delayed_item_release_metadata(fs_info, item);
btrfs_delayed_item_release_metadata(node->root, item);
btrfs_release_delayed_item(item);
mutex_unlock(&node->mutex);
return 0;
@ -1515,7 +1527,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
item->key = item_key;
ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, item);
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
/*
* we have reserved enough space when we start a new transaction,
* so reserving metadata failure is impossible.
@ -1880,7 +1892,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
mutex_lock(&delayed_node->mutex);
curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
while (curr_item) {
btrfs_delayed_item_release_metadata(fs_info, curr_item);
btrfs_delayed_item_release_metadata(root, curr_item);
prev_item = curr_item;
curr_item = __btrfs_next_delayed_item(prev_item);
btrfs_release_delayed_item(prev_item);
@ -1888,7 +1900,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
while (curr_item) {
btrfs_delayed_item_release_metadata(fs_info, curr_item);
btrfs_delayed_item_release_metadata(root, curr_item);
prev_item = curr_item;
curr_item = __btrfs_next_delayed_item(prev_item);
btrfs_release_delayed_item(prev_item);
@ -1898,7 +1910,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
btrfs_release_delayed_iref(delayed_node);
if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
btrfs_delayed_inode_release_metadata(fs_info, delayed_node);
btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
btrfs_release_delayed_inode(delayed_node);
}
mutex_unlock(&delayed_node->mutex);

Просмотреть файл

@ -111,10 +111,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, int nr);
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans);
int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr);
void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info);
@ -151,7 +149,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
/* for init */
int __init btrfs_delayed_inode_init(void);
void btrfs_delayed_inode_exit(void);
void __cold btrfs_delayed_inode_exit(void);
/* for debugging */
void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info);

Просмотреть файл

@ -216,7 +216,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
delayed_refs = &trans->transaction->delayed_refs;
assert_spin_locked(&delayed_refs->lock);
lockdep_assert_held(&delayed_refs->lock);
if (mutex_trylock(&head->mutex))
return 0;
@ -239,7 +239,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *ref)
{
assert_spin_locked(&head->lock);
lockdep_assert_held(&head->lock);
rb_erase(&ref->ref_node, &head->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
@ -307,7 +307,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct rb_node *node;
u64 seq = 0;
assert_spin_locked(&head->lock);
lockdep_assert_held(&head->lock);
if (RB_EMPTY_ROOT(&head->ref_tree))
return;
@ -930,7 +930,7 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt
return find_ref_head(&delayed_refs->href_root, bytenr, 0);
}
void btrfs_delayed_ref_exit(void)
void __cold btrfs_delayed_ref_exit(void)
{
kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);

Просмотреть файл

@ -204,7 +204,7 @@ extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
int __init btrfs_delayed_ref_init(void);
void btrfs_delayed_ref_exit(void);
void __cold btrfs_delayed_ref_exit(void);
static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)

Просмотреть файл

@ -44,7 +44,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev,
struct btrfs_device *tgtdev);
static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
static int btrfs_dev_replace_kthread(void *data);
static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info);
@ -174,8 +173,14 @@ no_valid_dev_replace_entry_found:
}
set_bit(BTRFS_DEV_STATE_REPLACE_TGT,
&dev_replace->tgtdev->dev_state);
btrfs_init_dev_replace_tgtdev_for_resume(fs_info,
dev_replace->tgtdev);
WARN_ON(fs_info->fs_devices->rw_devices == 0);
dev_replace->tgtdev->io_width = fs_info->sectorsize;
dev_replace->tgtdev->io_align = fs_info->sectorsize;
dev_replace->tgtdev->sector_size = fs_info->sectorsize;
dev_replace->tgtdev->fs_info = fs_info;
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
&dev_replace->tgtdev->dev_state);
}
break;
}
@ -200,13 +205,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_dev_replace_item *ptr;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
btrfs_dev_replace_lock(dev_replace, 0);
btrfs_dev_replace_read_lock(dev_replace);
if (!dev_replace->is_valid ||
!dev_replace->item_needs_writeback) {
btrfs_dev_replace_unlock(dev_replace, 0);
btrfs_dev_replace_read_unlock(dev_replace);
return 0;
}
btrfs_dev_replace_unlock(dev_replace, 0);
btrfs_dev_replace_read_unlock(dev_replace);
key.objectid = 0;
key.type = BTRFS_DEV_REPLACE_KEY;
@ -264,7 +269,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
ptr = btrfs_item_ptr(eb, path->slots[0],
struct btrfs_dev_replace_item);
btrfs_dev_replace_lock(dev_replace, 1);
btrfs_dev_replace_write_lock(dev_replace);
if (dev_replace->srcdev)
btrfs_set_dev_replace_src_devid(eb, ptr,
dev_replace->srcdev->devid);
@ -287,7 +292,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
btrfs_set_dev_replace_cursor_right(eb, ptr,
dev_replace->cursor_right);
dev_replace->item_needs_writeback = 0;
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
btrfs_mark_buffer_dirty(eb);
@ -307,7 +312,7 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
static char* btrfs_dev_name(struct btrfs_device *device)
{
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
return "<missing disk>";
else
return rcu_str_deref(device->name);
@ -352,7 +357,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
return PTR_ERR(trans);
}
btrfs_dev_replace_lock(dev_replace, 1);
btrfs_dev_replace_write_lock(dev_replace);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@ -390,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
dev_replace->item_needs_writeback = 1;
atomic64_set(&dev_replace->num_write_errors, 0);
atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
if (ret)
@ -402,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_dev_replace_lock(dev_replace, 1);
btrfs_dev_replace_write_lock(dev_replace);
goto leave;
}
@ -426,7 +431,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
leave:
dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL;
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
return ret;
}
@ -493,18 +498,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* don't allow cancel or unmount to disturb the finishing procedure */
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace, 0);
btrfs_dev_replace_read_lock(dev_replace);
/* was the operation canceled, or is it finished? */
if (dev_replace->replace_state !=
BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
btrfs_dev_replace_unlock(dev_replace, 0);
btrfs_dev_replace_read_unlock(dev_replace);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return 0;
}
tgt_device = dev_replace->tgtdev;
src_device = dev_replace->srcdev;
btrfs_dev_replace_unlock(dev_replace, 0);
btrfs_dev_replace_read_unlock(dev_replace);
/*
* flush all outstanding I/O and inode extent mappings before the
@ -529,7 +534,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* keep away write_all_supers() during the finishing procedure */
mutex_lock(&fs_info->fs_devices->device_list_mutex);
mutex_lock(&fs_info->chunk_mutex);
btrfs_dev_replace_lock(dev_replace, 1);
btrfs_dev_replace_write_lock(dev_replace);
dev_replace->replace_state =
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
@ -549,7 +554,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_dev_name(src_device),
src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret);
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
mutex_unlock(&uuid_mutex);
@ -586,7 +591,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
fs_info->fs_devices->rw_devices++;
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
btrfs_rm_dev_replace_blocked(fs_info);
@ -679,7 +684,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
{
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
btrfs_dev_replace_lock(dev_replace, 0);
btrfs_dev_replace_read_lock(dev_replace);
/* even if !dev_replace_is_valid, the values are good enough for
* the replace_status ioctl */
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
@ -691,41 +696,36 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
args->status.num_uncorrectable_read_errors =
atomic64_read(&dev_replace->num_uncorrectable_read_errors);
args->status.progress_1000 = btrfs_dev_replace_progress(fs_info);
btrfs_dev_replace_unlock(dev_replace, 0);
btrfs_dev_replace_read_unlock(dev_replace);
}
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args)
{
args->result = __btrfs_dev_replace_cancel(fs_info);
return 0;
}
static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
{
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct btrfs_device *tgt_device = NULL;
struct btrfs_device *src_device = NULL;
struct btrfs_trans_handle *trans;
struct btrfs_root *root = fs_info->tree_root;
u64 result;
int result;
int ret;
if (sb_rdonly(fs_info->sb))
return -EROFS;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace, 1);
btrfs_dev_replace_write_lock(dev_replace);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
goto leave;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
tgt_device = dev_replace->tgtdev;
src_device = dev_replace->srcdev;
dev_replace->tgtdev = NULL;
dev_replace->srcdev = NULL;
break;
@ -733,7 +733,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
btrfs_scrub_cancel(fs_info);
trans = btrfs_start_transaction(root, 0);
@ -743,6 +743,12 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
}
ret = btrfs_commit_transaction(trans);
WARN_ON(ret);
btrfs_info_in_rcu(fs_info,
"dev_replace from %s (devid %llu) to %s canceled",
btrfs_dev_name(src_device), src_device->devid,
btrfs_dev_name(tgt_device));
if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
@ -756,7 +762,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace, 1);
btrfs_dev_replace_write_lock(dev_replace);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@ -772,7 +778,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
break;
}
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
}
@ -782,12 +788,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
struct task_struct *task;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
btrfs_dev_replace_lock(dev_replace, 1);
btrfs_dev_replace_write_lock(dev_replace);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
return 0;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
break;
@ -801,10 +807,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
"cannot continue dev_replace, tgtdev is missing");
btrfs_info(fs_info,
"you may cancel the operation after 'mount -o degraded'");
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
return 0;
}
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_dev_replace_write_unlock(dev_replace);
WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
@ -873,35 +879,35 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
return 1;
}
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
void btrfs_dev_replace_read_lock(struct btrfs_dev_replace *dev_replace)
{
read_lock(&dev_replace->lock);
atomic_inc(&dev_replace->read_locks);
}
void btrfs_dev_replace_read_unlock(struct btrfs_dev_replace *dev_replace)
{
ASSERT(atomic_read(&dev_replace->read_locks) > 0);
atomic_dec(&dev_replace->read_locks);
read_unlock(&dev_replace->lock);
}
void btrfs_dev_replace_write_lock(struct btrfs_dev_replace *dev_replace)
{
if (rw == 1) {
/* write */
again:
wait_event(dev_replace->read_lock_wq,
atomic_read(&dev_replace->blocking_readers) == 0);
write_lock(&dev_replace->lock);
if (atomic_read(&dev_replace->blocking_readers)) {
write_unlock(&dev_replace->lock);
goto again;
}
} else {
read_lock(&dev_replace->lock);
atomic_inc(&dev_replace->read_locks);
wait_event(dev_replace->read_lock_wq,
atomic_read(&dev_replace->blocking_readers) == 0);
write_lock(&dev_replace->lock);
if (atomic_read(&dev_replace->blocking_readers)) {
write_unlock(&dev_replace->lock);
goto again;
}
}
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
void btrfs_dev_replace_write_unlock(struct btrfs_dev_replace *dev_replace)
{
if (rw == 1) {
/* write */
ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
write_unlock(&dev_replace->lock);
} else {
ASSERT(atomic_read(&dev_replace->read_locks) > 0);
atomic_dec(&dev_replace->read_locks);
read_unlock(&dev_replace->lock);
}
ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
write_unlock(&dev_replace->lock);
}
/* inc blocking cnt and release read lock */

Просмотреть файл

@ -32,13 +32,14 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
int read_src);
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args);
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw);
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw);
void btrfs_dev_replace_read_lock(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_read_unlock(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_write_lock(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_write_unlock(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_clear_lock_blocking(
struct btrfs_dev_replace *dev_replace);

Просмотреть файл

@ -18,7 +18,6 @@
#include "ctree.h"
#include "disk-io.h"
#include "hash.h"
#include "transaction.h"
/*

Просмотреть файл

@ -31,10 +31,10 @@
#include <linux/uuid.h>
#include <linux/semaphore.h>
#include <linux/error-injection.h>
#include <linux/crc32c.h>
#include <asm/unaligned.h>
#include "ctree.h"
#include "disk-io.h"
#include "hash.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "volumes.h"
@ -110,7 +110,7 @@ int __init btrfs_end_io_wq_init(void)
return 0;
}
void btrfs_end_io_wq_exit(void)
void __cold btrfs_end_io_wq_exit(void)
{
kmem_cache_destroy(btrfs_end_io_wq_cache);
}
@ -124,8 +124,8 @@ struct async_submit_bio {
void *private_data;
struct btrfs_fs_info *fs_info;
struct bio *bio;
extent_submit_bio_hook_t *submit_bio_start;
extent_submit_bio_hook_t *submit_bio_done;
extent_submit_bio_start_t *submit_bio_start;
extent_submit_bio_done_t *submit_bio_done;
int mirror_num;
unsigned long bio_flags;
/*
@ -270,7 +270,7 @@ out:
u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
{
return btrfs_crc32c(seed, data, len);
return crc32c(seed, data, len);
}
void btrfs_csum_final(u32 crc, u8 *result)
@ -403,8 +403,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
u32 crc = ~(u32)0;
const int csum_size = sizeof(crc);
char result[csum_size];
char result[sizeof(crc)];
/*
* The super_block structure does not span the whole
@ -415,7 +414,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
btrfs_csum_final(crc, result);
if (memcmp(raw_disk_sb, result, csum_size))
if (memcmp(raw_disk_sb, result, sizeof(result)))
ret = 1;
}
@ -428,13 +427,59 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
return ret;
}
static int verify_level_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int level,
struct btrfs_key *first_key)
{
int found_level;
struct btrfs_key found_key;
int ret;
found_level = btrfs_header_level(eb);
if (found_level != level) {
#ifdef CONFIG_BTRFS_DEBUG
WARN_ON(1);
btrfs_err(fs_info,
"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
eb->start, level, found_level);
#endif
return -EIO;
}
if (!first_key)
return 0;
if (found_level)
btrfs_node_key_to_cpu(eb, &found_key, 0);
else
btrfs_item_key_to_cpu(eb, &found_key, 0);
ret = btrfs_comp_cpu_keys(first_key, &found_key);
#ifdef CONFIG_BTRFS_DEBUG
if (ret) {
WARN_ON(1);
btrfs_err(fs_info,
"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)",
eb->start, first_key->objectid, first_key->type,
first_key->offset, found_key.objectid,
found_key.type, found_key.offset);
}
#endif
return ret;
}
/*
* helper to read a given tree block, doing retries as required when
* the checksums don't match and we have alternate mirrors to try.
*
* @parent_transid: expected transid, skip check if 0
* @level: expected level, mandatory check
* @first_key: expected key of first slot, skip check if NULL
*/
static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb,
u64 parent_transid)
u64 parent_transid, int level,
struct btrfs_key *first_key)
{
struct extent_io_tree *io_tree;
int failed = 0;
@ -449,11 +494,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
mirror_num);
if (!ret) {
if (!verify_parent_transid(io_tree, eb,
if (verify_parent_transid(io_tree, eb,
parent_transid, 0))
break;
else
ret = -EIO;
else if (verify_level_key(fs_info, eb, level,
first_key))
ret = -EUCLEAN;
else
break;
}
/*
@ -461,7 +509,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
* there is no reason to read the other copies, they won't be
* any less wrong.
*/
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) ||
ret == -EUCLEAN)
break;
num_copies = btrfs_num_copies(fs_info,
@ -602,12 +651,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
if (found_level > 0 && btrfs_check_node(root, eb))
if (found_level > 0 && btrfs_check_node(fs_info, eb))
ret = -EIO;
if (!ret)
@ -710,14 +759,6 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
return 0;
}
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
{
unsigned long limit = min_t(unsigned long,
info->thread_pool_size,
info->fs_devices->open_devices);
return 256 * limit;
}
static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async;
@ -725,7 +766,6 @@ static void run_one_async_start(struct btrfs_work *work)
async = container_of(work, struct async_submit_bio, work);
ret = async->submit_bio_start(async->private_data, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
if (ret)
async->status = ret;
@ -744,8 +784,7 @@ static void run_one_async_done(struct btrfs_work *work)
return;
}
async->submit_bio_done(async->private_data, async->bio, async->mirror_num,
async->bio_flags, async->bio_offset);
async->submit_bio_done(async->private_data, async->bio, async->mirror_num);
}
static void run_one_async_free(struct btrfs_work *work)
@ -759,8 +798,8 @@ static void run_one_async_free(struct btrfs_work *work)
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset, void *private_data,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done)
extent_submit_bio_start_t *submit_bio_start,
extent_submit_bio_done_t *submit_bio_done)
{
struct async_submit_bio *async;
@ -807,8 +846,7 @@ static blk_status_t btree_csum_one_bio(struct bio *bio)
return errno_to_blk_status(ret);
}
static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
u64 bio_offset)
{
/*
@ -818,9 +856,8 @@ static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio
return btree_csum_one_bio(bio);
}
static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num)
{
struct inode *inode = private_data;
blk_status_t ret;
@ -879,8 +916,8 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
*/
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
bio_offset, private_data,
__btree_submit_bio_start,
__btree_submit_bio_done);
btree_submit_bio_start,
btree_submit_bio_done);
}
if (ret)
@ -1062,8 +1099,17 @@ void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
buf->start, buf->start + buf->len - 1);
}
/*
* Read tree block at logical address @bytenr and do variant basic but critical
* verification.
*
* @parent_transid: expected transid of this tree block, skip check if 0
* @level: expected level, mandatory check
* @first_key: expected key in slot 0, skip check if NULL
*/
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 parent_transid)
u64 parent_transid, int level,
struct btrfs_key *first_key)
{
struct extent_buffer *buf = NULL;
int ret;
@ -1072,7 +1118,8 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
if (IS_ERR(buf))
return buf;
ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
level, first_key);
if (ret) {
free_extent_buffer(buf);
return ERR_PTR(ret);
@ -1108,7 +1155,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
if (!writers)
return ERR_PTR(-ENOMEM);
ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
ret = percpu_counter_init(&writers->counter, 0, GFP_NOFS);
if (ret < 0) {
kfree(writers);
return ERR_PTR(ret);
@ -1160,6 +1207,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
spin_lock_init(&root->accounting_lock);
spin_lock_init(&root->log_extents_lock[0]);
spin_lock_init(&root->log_extents_lock[1]);
spin_lock_init(&root->qgroup_meta_rsv_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
mutex_init(&root->ordered_extent_mutex);
@ -1176,7 +1224,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
atomic_set(&root->orphan_inodes, 0);
refcount_set(&root->refs, 1);
atomic_set(&root->will_be_snapshotted, 0);
atomic64_set(&root->qgroup_meta_rsv, 0);
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
@ -1401,6 +1448,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_path *path;
u64 generation;
int ret;
int level;
path = btrfs_alloc_path();
if (!path)
@ -1423,9 +1471,10 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
}
generation = btrfs_root_generation(&root->root_item);
level = btrfs_root_level(&root->root_item);
root->node = read_tree_block(fs_info,
btrfs_root_bytenr(&root->root_item),
generation);
generation, level, NULL);
if (IS_ERR(root->node)) {
ret = PTR_ERR(root->node);
goto find_fail;
@ -1808,12 +1857,10 @@ sleep:
if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
&fs_info->fs_state)))
btrfs_cleanup_transaction(fs_info);
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop() &&
(!btrfs_transaction_blocked(fs_info) ||
cannot_commit))
schedule_timeout(delay);
__set_current_state(TASK_RUNNING);
schedule_timeout_interruptible(delay);
} while (!kthread_should_stop());
return 0;
}
@ -2183,7 +2230,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices)
{
int max_active = fs_info->thread_pool_size;
u32 max_active = fs_info->thread_pool_size;
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
fs_info->workers =
@ -2276,6 +2323,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
struct btrfs_root *log_tree_root;
struct btrfs_super_block *disk_super = fs_info->super_copy;
u64 bytenr = btrfs_super_log_root(disk_super);
int level = btrfs_super_log_root_level(disk_super);
if (fs_devices->rw_devices == 0) {
btrfs_warn(fs_info, "log replay required on RO media");
@ -2289,7 +2337,8 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
__setup_root(log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
log_tree_root->node = read_tree_block(fs_info, bytenr,
fs_info->generation + 1);
fs_info->generation + 1,
level, NULL);
if (IS_ERR(log_tree_root->node)) {
btrfs_warn(fs_info, "failed to read log tree");
ret = PTR_ERR(log_tree_root->node);
@ -2334,23 +2383,29 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
location.offset = 0;
root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root))
return PTR_ERR(root);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->extent_root = root;
location.objectid = BTRFS_DEV_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root))
return PTR_ERR(root);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->dev_root = root;
btrfs_init_devices_late(fs_info);
location.objectid = BTRFS_CSUM_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root))
return PTR_ERR(root);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->csum_root = root;
@ -2367,7 +2422,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
if (IS_ERR(root)) {
ret = PTR_ERR(root);
if (ret != -ENOENT)
return ret;
goto out;
} else {
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->uuid_root = root;
@ -2376,13 +2431,19 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root))
return PTR_ERR(root);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->free_space_root = root;
}
return 0;
out:
btrfs_warn(fs_info, "failed to read root (objectid=%llu): %d",
location.objectid, ret);
return ret;
}
int open_ctree(struct super_block *sb,
@ -2404,8 +2465,8 @@ int open_ctree(struct super_block *sb,
int err = -EINVAL;
int num_backups_tried = 0;
int backup_index = 0;
int max_active;
int clear_free_space_tree = 0;
int level;
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@ -2447,6 +2508,8 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->delayed_iputs);
INIT_LIST_HEAD(&fs_info->delalloc_roots);
INIT_LIST_HEAD(&fs_info->caching_block_groups);
INIT_LIST_HEAD(&fs_info->pending_raid_kobjs);
spin_lock_init(&fs_info->pending_raid_kobjs_lock);
spin_lock_init(&fs_info->delalloc_root_lock);
spin_lock_init(&fs_info->trans_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
@ -2713,8 +2776,6 @@ int open_ctree(struct super_block *sb,
goto fail_alloc;
}
max_active = fs_info->thread_pool_size;
ret = btrfs_init_workqueues(fs_info, fs_devices);
if (ret) {
err = ret;
@ -2741,12 +2802,13 @@ int open_ctree(struct super_block *sb,
}
generation = btrfs_super_chunk_root_generation(disk_super);
level = btrfs_super_chunk_root_level(disk_super);
__setup_root(chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
chunk_root->node = read_tree_block(fs_info,
btrfs_super_chunk_root(disk_super),
generation);
generation, level, NULL);
if (IS_ERR(chunk_root->node) ||
!extent_buffer_uptodate(chunk_root->node)) {
btrfs_err(fs_info, "failed to read chunk root");
@ -2768,10 +2830,10 @@ int open_ctree(struct super_block *sb,
}
/*
* keep the device that is marked to be the target device for the
* dev_replace procedure
* Keep the devid that is marked to be the target device for the
* device replace procedure
*/
btrfs_close_extra_devices(fs_devices, 0);
btrfs_free_extra_devids(fs_devices, 0);
if (!fs_devices->latest_bdev) {
btrfs_err(fs_info, "failed to read devices");
@ -2780,10 +2842,11 @@ int open_ctree(struct super_block *sb,
retry_root_backup:
generation = btrfs_super_generation(disk_super);
level = btrfs_super_root_level(disk_super);
tree_root->node = read_tree_block(fs_info,
btrfs_super_root(disk_super),
generation);
generation, level, NULL);
if (IS_ERR(tree_root->node) ||
!extent_buffer_uptodate(tree_root->node)) {
btrfs_warn(fs_info, "failed to read tree root");
@ -2834,7 +2897,7 @@ retry_root_backup:
goto fail_block_groups;
}
btrfs_close_extra_devices(fs_devices, 1);
btrfs_free_extra_devids(fs_devices, 1);
ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
if (ret) {
@ -2953,6 +3016,7 @@ retry_root_backup:
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (IS_ERR(fs_info->fs_root)) {
err = PTR_ERR(fs_info->fs_root);
btrfs_warn(fs_info, "failed to read fs tree: %d", err);
goto fail_qgroup;
}
@ -3290,6 +3354,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
struct buffer_head *bh;
int i;
int errors = 0;
bool primary_failed = false;
u64 bytenr;
if (max_mirrors == 0)
@ -3306,11 +3371,16 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
BTRFS_SUPER_INFO_SIZE);
if (!bh) {
errors++;
if (i == 0)
primary_failed = true;
continue;
}
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
if (!buffer_uptodate(bh)) {
errors++;
if (i == 0)
primary_failed = true;
}
/* drop our reference */
brelse(bh);
@ -3319,6 +3389,13 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
brelse(bh);
}
/* log error, force error return */
if (primary_failed) {
btrfs_err(device->fs_info, "error writing primary super block to device %llu",
device->devid);
return -1;
}
return errors < i ? 0 : -1;
}
@ -3851,7 +3928,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
* So here we should only check item pointers, not item data.
*/
if (btrfs_header_level(buf) == 0 &&
btrfs_check_leaf_relaxed(root, buf)) {
btrfs_check_leaf_relaxed(fs_info, buf)) {
btrfs_print_leaf(buf);
ASSERT(0);
}
@ -3890,12 +3967,14 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
__btrfs_btree_balance_dirty(fs_info, 0);
}
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
struct btrfs_key *first_key)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
return btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
return btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
level, first_key);
}
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
@ -4314,11 +4393,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
cache = list_first_entry(&cur_trans->dirty_bgs,
struct btrfs_block_group_cache,
dirty_list);
if (!cache) {
btrfs_err(fs_info, "orphan block group dirty_bgs list");
spin_unlock(&cur_trans->dirty_bgs_lock);
return;
}
if (!list_empty(&cache->io_list)) {
spin_unlock(&cur_trans->dirty_bgs_lock);
@ -4338,14 +4412,14 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
}
spin_unlock(&cur_trans->dirty_bgs_lock);
/*
* Refer to the definition of io_bgs member for details why it's safe
* to use it without any locking
*/
while (!list_empty(&cur_trans->io_bgs)) {
cache = list_first_entry(&cur_trans->io_bgs,
struct btrfs_block_group_cache,
io_list);
if (!cache) {
btrfs_err(fs_info, "orphan block group on io_bgs list");
return;
}
list_del_init(&cache->io_list);
spin_lock(&cache->lock);

Просмотреть файл

@ -52,8 +52,9 @@ static inline u64 btrfs_sb_offset(int mirror)
struct btrfs_device;
struct btrfs_fs_devices;
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 parent_transid, int level,
struct btrfs_key *first_key);
void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr);
int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
int mirror_num, struct extent_buffer **eb);
@ -123,7 +124,8 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root)
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
struct btrfs_key *first_key);
u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, u8 *result);
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
@ -131,9 +133,8 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset, void *private_data,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done);
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
extent_submit_bio_start_t *submit_bio_start,
extent_submit_bio_done_t *submit_bio_done);
int btrfs_write_tree_block(struct extent_buffer *buf);
void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
@ -154,7 +155,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
int create);
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int __init btrfs_end_io_wq_init(void);
void btrfs_end_io_wq_exit(void);
void __cold btrfs_end_io_wq_exit(void);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);

Просмотреть файл

@ -27,7 +27,7 @@
#include <linux/ratelimit.h>
#include <linux/percpu_counter.h>
#include <linux/lockdep.h>
#include "hash.h"
#include <linux/crc32c.h>
#include "tree-log.h"
#include "disk-io.h"
#include "print-tree.h"
@ -535,13 +535,11 @@ static noinline void caching_thread(struct btrfs_work *work)
struct btrfs_block_group_cache *block_group;
struct btrfs_fs_info *fs_info;
struct btrfs_caching_control *caching_ctl;
struct btrfs_root *extent_root;
int ret;
caching_ctl = container_of(work, struct btrfs_caching_control, work);
block_group = caching_ctl->block_group;
fs_info = block_group->fs_info;
extent_root = fs_info->extent_root;
mutex_lock(&caching_ctl->mutex);
down_read(&fs_info->commit_root_sem);
@ -1203,11 +1201,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
__le64 lenum;
lenum = cpu_to_le64(root_objectid);
high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
lenum = cpu_to_le64(owner);
low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
lenum = cpu_to_le64(offset);
low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
return ((u64)high_crc << 31) ^ (u64)low_crc;
}
@ -2652,9 +2650,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
* Returns -ENOMEM or -EIO on failure and will abort the transaction.
*/
static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
unsigned long nr)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *locked_ref = NULL;
@ -2994,7 +2992,7 @@ static void delayed_ref_async_start(struct btrfs_work *work)
if (trans->transid > async->transid)
goto end;
ret = btrfs_run_delayed_refs(trans, fs_info, async->count);
ret = btrfs_run_delayed_refs(trans, async->count);
if (ret)
async->error = ret;
end:
@ -3053,8 +3051,9 @@ int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
* Returns <0 on error and aborts the transaction
*/
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, unsigned long count)
unsigned long count)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct rb_node *node;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_head *head;
@ -3078,7 +3077,7 @@ again:
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif
trans->can_flush_pending_bgs = false;
ret = __btrfs_run_delayed_refs(trans, fs_info, count);
ret = __btrfs_run_delayed_refs(trans, count);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
return ret;
@ -3086,7 +3085,7 @@ again:
if (run_all) {
if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, fs_info);
btrfs_create_pending_block_groups(trans);
spin_lock(&delayed_refs->lock);
node = rb_first(&delayed_refs->href_root);
@ -3660,9 +3659,9 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
* the commit latency by getting rid of the easy block groups while
* we're still allowing others to join the commit.
*/
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_cache *cache;
struct btrfs_transaction *cur_trans = trans->transaction;
int ret = 0;
@ -3686,7 +3685,7 @@ again:
* make sure all the block groups on our dirty list actually
* exist
*/
btrfs_create_pending_block_groups(trans, fs_info);
btrfs_create_pending_block_groups(trans);
if (!path) {
path = btrfs_alloc_path();
@ -3741,8 +3740,9 @@ again:
should_put = 0;
/*
* the cache_write_mutex is protecting
* the io_list
* The cache_write_mutex is protecting the
* io_list, also refer to the definition of
* btrfs_transaction::io_bgs for more details
*/
list_add_tail(&cache->io_list, io);
} else {
@ -3800,7 +3800,7 @@ again:
* go through delayed refs for all the stuff we've just kicked off
* and then loop back (just once)
*/
ret = btrfs_run_delayed_refs(trans, fs_info, 0);
ret = btrfs_run_delayed_refs(trans, 0);
if (!ret && loops == 0) {
loops++;
spin_lock(&cur_trans->dirty_bgs_lock);
@ -3882,7 +3882,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
cache_save_setup(cache, trans, path);
if (!ret)
ret = btrfs_run_delayed_refs(trans, fs_info,
ret = btrfs_run_delayed_refs(trans,
(unsigned long) -1);
if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
@ -3934,6 +3934,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
}
spin_unlock(&cur_trans->dirty_bgs_lock);
/*
* Refer to the definition of io_bgs member for details why it's safe
* to use it without any locking
*/
while (!list_empty(io)) {
cache = list_first_entry(io, struct btrfs_block_group_cache,
io_list);
@ -4332,8 +4336,7 @@ again:
/* commit the current transaction and try again */
commit_trans:
if (need_commit &&
!atomic_read(&fs_info->open_ioctl_trans)) {
if (need_commit) {
need_commit--;
if (need_commit > 0) {
@ -4541,7 +4544,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
* Needed because we can end up allocating a system chunk and for an
* atomic and race free space reservation in the chunk block reserve.
*/
ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
lockdep_assert_held(&fs_info->chunk_mutex);
info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
spin_lock(&info->lock);
@ -4602,11 +4605,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
return -ENOSPC;
space_info = __find_space_info(fs_info, flags);
if (!space_info) {
ret = create_space_info(fs_info, flags, &space_info);
if (ret)
return ret;
}
ASSERT(space_info);
again:
spin_lock(&space_info->lock);
@ -4705,7 +4704,7 @@ out:
*/
if (trans->can_flush_pending_bgs &&
trans->chunk_bytes_reserved >= (u64)SZ_2M) {
btrfs_create_pending_block_groups(trans, fs_info);
btrfs_create_pending_block_groups(trans);
btrfs_trans_release_chunk_metadata(trans);
}
return ret;
@ -4826,7 +4825,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
long time_left;
unsigned long nr_pages;
int loops;
enum btrfs_reserve_flush_enum flush;
/* Calc the number of the pages we need flush for space reservation */
items = calc_reclaim_items_nr(fs_info, to_reclaim);
@ -4867,10 +4865,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
atomic_read(&fs_info->async_delalloc_pages) <=
(int)max_reclaim);
skip_async:
if (!trans)
flush = BTRFS_RESERVE_FLUSH_ALL;
else
flush = BTRFS_RESERVE_NO_FLUSH;
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets) &&
list_empty(&space_info->priority_tickets)) {
@ -4993,7 +4987,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
ret = PTR_ERR(trans);
break;
}
ret = btrfs_run_delayed_items_nr(trans, fs_info, nr);
ret = btrfs_run_delayed_items_nr(trans, nr);
btrfs_end_transaction(trans);
break;
case FLUSH_DELALLOC:
@ -5388,10 +5382,15 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
!block_rsv_use_bytes(global_rsv, orig_bytes))
ret = 0;
}
if (ret == -ENOSPC)
if (ret == -ENOSPC) {
trace_btrfs_space_reservation(fs_info, "space_info:enospc",
block_rsv->space_info->flags,
orig_bytes, 1);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
dump_space_info(fs_info, block_rsv->space_info,
orig_bytes, 0);
}
return ret;
}
@ -5760,6 +5759,9 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
if (num_bytes == 0)
return 0;
ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
if (ret)
return ret;
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
@ -5772,11 +5774,15 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
/**
* btrfs_inode_rsv_release - release any excessive reservation.
* @inode - the inode we need to release from.
* @qgroup_free - free or convert qgroup meta.
* Unlike normal operation, qgroup meta reservation needs to know if we are
* freeing qgroup reservation or just converting it into per-trans. Normally
* @qgroup_free is true for error handling, and false for normal release.
*
* This is the same as btrfs_block_rsv_release, except that it handles the
* tracepoint for the reservation.
*/
static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
@ -5792,6 +5798,10 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
if (released > 0)
trace_btrfs_space_reservation(fs_info, "delalloc",
btrfs_ino(inode), released, 0);
if (qgroup_free)
btrfs_qgroup_free_meta_prealloc(inode->root, released);
else
btrfs_qgroup_convert_reserved_meta(inode->root, released);
}
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
@ -5892,24 +5902,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
}
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
if (!trans->block_rsv) {
ASSERT(!trans->bytes_reserved);
return;
}
if (!trans->bytes_reserved)
return;
ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, trans->bytes_reserved, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv,
trans->bytes_reserved);
trans->bytes_reserved = 0;
}
/*
* To be called after all the new block groups attached to the transaction
@ -5951,7 +5943,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
*/
u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
num_bytes, 1);
return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
}
@ -5995,7 +5987,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
/* One for parent inode, two for dir entries */
num_bytes = 3 * fs_info->nodesize;
ret = btrfs_qgroup_reserve_meta(root, num_bytes, true);
ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
if (ret)
return ret;
} else {
@ -6014,7 +6006,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
if (ret && *qgroup_reserved)
btrfs_qgroup_free_meta(root, *qgroup_reserved);
btrfs_qgroup_free_meta_prealloc(root, *qgroup_reserved);
return ret;
}
@ -6051,7 +6043,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
struct btrfs_root *root = inode->root;
unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
@ -6068,13 +6059,13 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
if (btrfs_is_free_space_inode(inode)) {
flush = BTRFS_RESERVE_NO_FLUSH;
delalloc_lock = false;
} else if (current->journal_info) {
flush = BTRFS_RESERVE_FLUSH_LIMIT;
}
} else {
if (current->journal_info)
flush = BTRFS_RESERVE_FLUSH_LIMIT;
if (flush != BTRFS_RESERVE_NO_FLUSH &&
btrfs_transaction_in_commit(fs_info))
schedule_timeout(1);
if (btrfs_transaction_in_commit(fs_info))
schedule_timeout(1);
}
if (delalloc_lock)
mutex_lock(&inode->delalloc_mutex);
@ -6089,19 +6080,9 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
ret = btrfs_qgroup_reserve_meta(root,
nr_extents * fs_info->nodesize, true);
if (ret)
goto out_fail;
}
ret = btrfs_inode_rsv_refill(inode, flush);
if (unlikely(ret)) {
btrfs_qgroup_free_meta(root,
nr_extents * fs_info->nodesize);
if (unlikely(ret))
goto out_fail;
}
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
@ -6115,7 +6096,7 @@ out_fail:
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
btrfs_inode_rsv_release(inode);
btrfs_inode_rsv_release(inode, true);
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
return ret;
@ -6125,12 +6106,14 @@ out_fail:
* btrfs_delalloc_release_metadata - release a metadata reservation for an inode
* @inode: the inode to release the reservation for.
* @num_bytes: the number of bytes we are releasing.
* @qgroup_free: free qgroup reservation or convert it to per-trans reservation
*
* This will release the metadata reservation for an inode. This can be called
* once we complete IO for a given set of bytes to release their metadata
* reservations, or on error for the same reason.
*/
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
@ -6143,13 +6126,14 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
if (btrfs_is_testing(fs_info))
return;
btrfs_inode_rsv_release(inode);
btrfs_inode_rsv_release(inode, qgroup_free);
}
/**
* btrfs_delalloc_release_extents - release our outstanding_extents
* @inode: the inode to balance the reservation for.
* @num_bytes: the number of bytes we originally reserved with
* @qgroup_free: do we need to free qgroup meta reservation or convert them.
*
* When we reserve space we increase outstanding_extents for the extents we may
* add. Once we've set the range as delalloc or created our ordered extents we
@ -6157,7 +6141,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
* temporarily tracked outstanding_extents. This _must_ be used in conjunction
* with btrfs_delalloc_reserve_metadata.
*/
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
unsigned num_extents;
@ -6171,7 +6156,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
if (btrfs_is_testing(fs_info))
return;
btrfs_inode_rsv_release(inode);
btrfs_inode_rsv_release(inode, qgroup_free);
}
/**
@ -6227,9 +6212,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
*/
void btrfs_delalloc_release_space(struct inode *inode,
struct extent_changeset *reserved,
u64 start, u64 len)
u64 start, u64 len, bool qgroup_free)
{
btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
btrfs_free_reserved_data_space(inode, reserved, start, len);
}
@ -6783,9 +6768,9 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
return 0;
}
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_cache *block_group, *tmp;
struct list_head *deleted_bgs;
struct extent_io_tree *unpin;
@ -7351,29 +7336,6 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
return ret;
}
int __get_raid_index(u64 flags)
{
if (flags & BTRFS_BLOCK_GROUP_RAID10)
return BTRFS_RAID_RAID10;
else if (flags & BTRFS_BLOCK_GROUP_RAID1)
return BTRFS_RAID_RAID1;
else if (flags & BTRFS_BLOCK_GROUP_DUP)
return BTRFS_RAID_DUP;
else if (flags & BTRFS_BLOCK_GROUP_RAID0)
return BTRFS_RAID_RAID0;
else if (flags & BTRFS_BLOCK_GROUP_RAID5)
return BTRFS_RAID_RAID5;
else if (flags & BTRFS_BLOCK_GROUP_RAID6)
return BTRFS_RAID_RAID6;
return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
}
int get_block_group_index(struct btrfs_block_group_cache *cache)
{
return __get_raid_index(cache->flags);
}
static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = "raid10",
[BTRFS_RAID_RAID1] = "raid1",
@ -7488,7 +7450,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
u64 empty_cluster = 0;
struct btrfs_space_info *space_info;
int loop = 0;
int index = __get_raid_index(flags);
int index = btrfs_bg_flags_to_raid_index(flags);
bool failed_cluster_refill = false;
bool failed_alloc = false;
bool use_cluster = true;
@ -7574,7 +7536,8 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
btrfs_put_block_group(block_group);
up_read(&space_info->groups_sem);
} else {
index = get_block_group_index(block_group);
index = btrfs_bg_flags_to_raid_index(
block_group->flags);
btrfs_lock_block_group(block_group, delalloc);
goto have_block_group;
}
@ -7584,7 +7547,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
}
search:
have_caching_bg = false;
if (index == 0 || index == __get_raid_index(flags))
if (index == 0 || index == btrfs_bg_flags_to_raid_index(flags))
full_search = true;
down_read(&space_info->groups_sem);
list_for_each_entry(block_group, &space_info->block_groups[index],
@ -7842,7 +7805,8 @@ checks:
loop:
failed_cluster_refill = false;
failed_alloc = false;
BUG_ON(index != get_block_group_index(block_group));
BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
index);
btrfs_release_block_group(block_group, delalloc);
cond_resched();
}
@ -7996,6 +7960,51 @@ again:
up_read(&info->groups_sem);
}
/*
* btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
* hole that is at least as big as @num_bytes.
*
* @root - The root that will contain this extent
*
* @ram_bytes - The amount of space in ram that @num_bytes take. This
* is used for accounting purposes. This value differs
* from @num_bytes only in the case of compressed extents.
*
* @num_bytes - Number of bytes to allocate on-disk.
*
* @min_alloc_size - Indicates the minimum amount of space that the
* allocator should try to satisfy. In some cases
* @num_bytes may be larger than what is required and if
* the filesystem is fragmented then allocation fails.
* However, the presence of @min_alloc_size gives a
* chance to try and satisfy the smaller allocation.
*
* @empty_size - A hint that you plan on doing more COW. This is the
* size in bytes the allocator should try to find free
* next to the block it returns. This is just a hint and
* may be ignored by the allocator.
*
* @hint_byte - Hint to the allocator to start searching above the byte
* address passed. It might be ignored.
*
* @ins - This key is modified to record the found hole. It will
* have the following values:
* ins->objectid == start position
* ins->flags = BTRFS_EXTENT_ITEM_KEY
* ins->offset == the size of the hole.
*
* @is_data - Boolean flag indicating whether an extent is
* allocated for data (true) or metadata (false)
*
* @delalloc - Boolean flag indicating whether this allocation is for
* delalloc or not. If 'true' data_rwsem of block groups
* is going to be acquired.
*
*
* Returns 0 when an allocation succeeded or < 0 when an error occurred. In
* case -ENOSPC is returned then @ins->offset will contain the size of the
* largest available hole the allocator managed to find.
*/
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
@ -8699,6 +8708,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
u64 parent;
u32 blocksize;
struct btrfs_key key;
struct btrfs_key first_key;
struct extent_buffer *next;
int level = wc->level;
int reada = 0;
@ -8719,6 +8729,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
}
bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
btrfs_node_key_to_cpu(path->nodes[level], &first_key,
path->slots[level]);
blocksize = fs_info->nodesize;
next = find_extent_buffer(fs_info, bytenr);
@ -8783,7 +8795,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
if (!next) {
if (reada && level == 1)
reada_walk_down(trans, root, wc, path);
next = read_tree_block(fs_info, bytenr, generation);
next = read_tree_block(fs_info, bytenr, generation, level - 1,
&first_key);
if (IS_ERR(next)) {
return PTR_ERR(next);
} else if (!extent_buffer_uptodate(next)) {
@ -9648,7 +9661,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
*/
target = get_restripe_target(fs_info, block_group->flags);
if (target) {
index = __get_raid_index(extended_to_chunk(target));
index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target));
} else {
/*
* this is just a balance, so if we were marked as full
@ -9662,7 +9675,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
goto out;
}
index = get_block_group_index(block_group);
index = btrfs_bg_flags_to_raid_index(block_group->flags);
}
if (index == BTRFS_RAID_RAID10) {
@ -9911,10 +9924,40 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
return 0;
}
/* link_block_group will queue up kobjects to add when we're reclaim-safe */
void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *space_info;
struct raid_kobject *rkobj;
LIST_HEAD(list);
int index;
int ret = 0;
spin_lock(&fs_info->pending_raid_kobjs_lock);
list_splice_init(&fs_info->pending_raid_kobjs, &list);
spin_unlock(&fs_info->pending_raid_kobjs_lock);
list_for_each_entry(rkobj, &list, list) {
space_info = __find_space_info(fs_info, rkobj->flags);
index = btrfs_bg_flags_to_raid_index(rkobj->flags);
ret = kobject_add(&rkobj->kobj, &space_info->kobj,
"%s", get_raid_name(index));
if (ret) {
kobject_put(&rkobj->kobj);
break;
}
}
if (ret)
btrfs_warn(fs_info,
"failed to add kobject for block cache, ignoring");
}
static void link_block_group(struct btrfs_block_group_cache *cache)
{
struct btrfs_space_info *space_info = cache->space_info;
int index = get_block_group_index(cache);
struct btrfs_fs_info *fs_info = cache->fs_info;
int index = btrfs_bg_flags_to_raid_index(cache->flags);
bool first = false;
down_write(&space_info->groups_sem);
@ -9924,27 +9967,20 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
up_write(&space_info->groups_sem);
if (first) {
struct raid_kobject *rkobj;
int ret;
rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
if (!rkobj)
goto out_err;
rkobj->raid_type = index;
kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
ret = kobject_add(&rkobj->kobj, &space_info->kobj,
"%s", get_raid_name(index));
if (ret) {
kobject_put(&rkobj->kobj);
goto out_err;
struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
if (!rkobj) {
btrfs_warn(cache->fs_info,
"couldn't alloc memory for raid level kobject");
return;
}
rkobj->flags = cache->flags;
kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
spin_lock(&fs_info->pending_raid_kobjs_lock);
list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
spin_unlock(&fs_info->pending_raid_kobjs_lock);
space_info->block_group_kobjs[index] = &rkobj->kobj;
}
return;
out_err:
btrfs_warn(cache->fs_info,
"failed to add kobject for block cache, ignoring");
}
static struct btrfs_block_group_cache *
@ -10160,6 +10196,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
inc_block_group_ro(cache, 1);
}
btrfs_add_raid_kobjects(info);
init_global_block_rsv(info);
ret = 0;
error:
@ -10167,9 +10204,9 @@ error:
return ret;
}
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_cache *block_group, *tmp;
struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_block_group_item item;
@ -10254,15 +10291,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
* with its ->space_info set.
*/
cache->space_info = __find_space_info(fs_info, cache->flags);
if (!cache->space_info) {
ret = create_space_info(fs_info, cache->flags,
&cache->space_info);
if (ret) {
btrfs_remove_free_space_cache(cache);
btrfs_put_block_group(cache);
return ret;
}
}
ASSERT(cache->space_info);
ret = btrfs_add_block_group_cache(fs_info, cache);
if (ret) {
@ -10334,7 +10363,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
block_group->key.offset);
memcpy(&key, &block_group->key, sizeof(key));
index = get_block_group_index(block_group);
index = btrfs_bg_flags_to_raid_index(block_group->flags);
if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10))

Просмотреть файл

@ -76,8 +76,8 @@ void btrfs_leak_debug_check(void)
while (!list_empty(&buffers)) {
eb = list_entry(buffers.next, struct extent_buffer, leak_list);
pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n",
eb->start, eb->len, atomic_read(&eb->refs));
pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
list_del(&eb->leak_list);
kmem_cache_free(extent_buffer_cache, eb);
}
@ -119,23 +119,22 @@ struct extent_page_data {
unsigned int sync_io:1;
};
static void add_extent_changeset(struct extent_state *state, unsigned bits,
static int add_extent_changeset(struct extent_state *state, unsigned bits,
struct extent_changeset *changeset,
int set)
{
int ret;
if (!changeset)
return;
return 0;
if (set && (state->state & bits) == bits)
return;
return 0;
if (!set && (state->state & bits) == 0)
return;
return 0;
changeset->bytes_changed += state->end - state->start + 1;
ret = ulist_add(&changeset->range_changed, state->start, state->end,
GFP_ATOMIC);
/* ENOMEM */
BUG_ON(ret < 0);
return ret;
}
static void flush_write_bio(struct extent_page_data *epd);
@ -187,7 +186,7 @@ free_state_cache:
return -ENOMEM;
}
void extent_io_exit(void)
void __cold extent_io_exit(void)
{
btrfs_leak_debug_check();
@ -527,6 +526,7 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
{
struct extent_state *next;
unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
int ret;
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
@ -534,7 +534,8 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
tree->dirty_bytes -= range;
}
clear_state_cb(tree, state, bits);
add_extent_changeset(state, bits_to_clear, changeset, 0);
ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
BUG_ON(ret < 0);
state->state &= ~bits_to_clear;
if (wake)
wake_up(&state->wq);
@ -805,13 +806,15 @@ static void set_state_bits(struct extent_io_tree *tree,
unsigned *bits, struct extent_changeset *changeset)
{
unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
int ret;
set_state_cb(tree, state, bits);
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes += range;
}
add_extent_changeset(state, bits_to_set, changeset, 1);
ret = add_extent_changeset(state, bits_to_set, changeset, 1);
BUG_ON(ret < 0);
state->state |= bits_to_set;
}
@ -2744,20 +2747,21 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
return blk_status_to_errno(ret);
}
static int merge_bio(struct extent_io_tree *tree, struct page *page,
unsigned long offset, size_t size, struct bio *bio,
unsigned long bio_flags)
{
int ret = 0;
if (tree->ops)
ret = tree->ops->merge_bio_hook(page, offset, size, bio,
bio_flags);
return ret;
}
/*
* @opf: bio REQ_OP_* and REQ_* flags as one value
* @tree: tree so we can call our merge_bio hook
* @wbc: optional writeback control for io accounting
* @page: page to add to the bio
* @pg_offset: offset of the new bio or to check whether we are adding
* a contiguous page to the previous one
* @size: portion of page that we want to write
* @offset: starting offset in the page
* @bdev: attach newly created bios to this bdev
* @bio_ret: must be valid pointer, newly allocated bio will be stored there
* @end_io_func: end_io callback for new bio
* @mirror_num: desired mirror to read/write
* @prev_bio_flags: flags of previous bio to see if we can merge the current one
* @bio_flags: flags of the current bio to see if we can merge them
*/
static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
struct writeback_control *wbc,
@ -2773,21 +2777,27 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
{
int ret = 0;
struct bio *bio;
int contig = 0;
int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
size_t page_size = min_t(size_t, size, PAGE_SIZE);
sector_t sector = offset >> 9;
if (bio_ret && *bio_ret) {
ASSERT(bio_ret);
if (*bio_ret) {
bool contig;
bool can_merge = true;
bio = *bio_ret;
if (old_compressed)
if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
contig = bio->bi_iter.bi_sector == sector;
else
contig = bio_end_sector(bio) == sector;
if (prev_bio_flags != bio_flags || !contig ||
if (tree->ops && tree->ops->merge_bio_hook(page, offset,
page_size, bio, bio_flags))
can_merge = false;
if (prev_bio_flags != bio_flags || !contig || !can_merge ||
force_bio_submit ||
merge_bio(tree, page, pg_offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, pg_offset) < page_size) {
ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
if (ret < 0) {
@ -2813,10 +2823,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
wbc_account_io(wbc, page, page_size);
}
if (bio_ret)
*bio_ret = bio;
else
ret = submit_one_bio(bio, mirror_num, bio_flags);
*bio_ret = bio;
return ret;
}
@ -2886,8 +2893,7 @@ static int __do_readpage(struct extent_io_tree *tree,
{
struct inode *inode = page->mapping->host;
u64 start = page_offset(page);
u64 page_end = start + PAGE_SIZE - 1;
u64 end;
const u64 end = start + PAGE_SIZE - 1;
u64 cur = start;
u64 extent_offset;
u64 last_byte = i_size_read(inode);
@ -2905,7 +2911,6 @@ static int __do_readpage(struct extent_io_tree *tree,
set_page_extent_mapped(page);
end = page_end;
if (!PageUptodate(page)) {
if (cleancache_get_page(page) == 0) {
BUG_ON(blocksize != PAGE_SIZE);
@ -5230,11 +5235,6 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
}
}
int extent_buffer_uptodate(struct extent_buffer *eb)
{
return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
}
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, int wait, int mirror_num)
{

Просмотреть файл

@ -83,8 +83,8 @@ static inline int le_test_bit(int nr, const u8 *addr)
return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
}
extern void le_bitmap_set(u8 *map, unsigned int start, int len);
extern void le_bitmap_clear(u8 *map, unsigned int start, int len);
void le_bitmap_set(u8 *map, unsigned int start, int len);
void le_bitmap_clear(u8 *map, unsigned int start, int len);
struct extent_state;
struct btrfs_root;
@ -95,6 +95,13 @@ struct io_failure_record;
typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset);
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
struct bio *bio, u64 bio_offset);
typedef blk_status_t (extent_submit_bio_done_t)(void *private_data,
struct bio *bio, int mirror_num);
struct extent_io_ops {
/*
* The following callbacks must be allways defined, the function
@ -286,7 +293,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
int __init extent_io_init(void);
void extent_io_exit(void);
void __cold extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
@ -455,6 +462,11 @@ static inline void extent_buffer_get(struct extent_buffer *eb)
atomic_inc(&eb->refs);
}
static inline int extent_buffer_uptodate(struct extent_buffer *eb)
{
return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
}
int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
unsigned long start, unsigned long len);
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
@ -489,7 +501,6 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb);
int set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(struct extent_buffer *eb);
int map_private_extent_buffer(const struct extent_buffer *eb,
unsigned long offset, unsigned long min_len,

Просмотреть файл

@ -2,7 +2,6 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include "ctree.h"
#include "extent_map.h"
#include "compression.h"
@ -20,7 +19,7 @@ int __init extent_map_init(void)
return 0;
}
void extent_map_exit(void)
void __cold extent_map_exit(void)
{
kmem_cache_destroy(extent_map_cache);
}
@ -552,6 +551,9 @@ int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
ret = 0;
existing = search_extent_mapping(em_tree, start, len);
trace_btrfs_handle_em_exist(existing, em, start, len);
/*
* existing will always be non-NULL, since there must be
* extent causing the -EEXIST.

Просмотреть файл

@ -86,7 +86,7 @@ void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *alloc_extent_map(void);
void free_extent_map(struct extent_map *em);
int __init extent_map_init(void);
void extent_map_exit(void);
void __cold extent_map_exit(void);
int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,

Просмотреть файл

@ -1691,7 +1691,7 @@ again:
force_page_uptodate);
if (ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode),
reserve_bytes);
reserve_bytes, true);
break;
}
@ -1703,7 +1703,7 @@ again:
if (extents_locked == -EAGAIN)
goto again;
btrfs_delalloc_release_extents(BTRFS_I(inode),
reserve_bytes);
reserve_bytes, true);
ret = extents_locked;
break;
}
@ -1738,7 +1738,7 @@ again:
fs_info->sb->s_blocksize_bits;
if (only_release_metadata) {
btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes);
release_bytes, true);
} else {
u64 __pos;
@ -1747,7 +1747,7 @@ again:
(dirty_pages << PAGE_SHIFT);
btrfs_delalloc_release_space(inode,
data_reserved, __pos,
release_bytes);
release_bytes, true);
}
}
@ -1760,7 +1760,8 @@ again:
if (extents_locked)
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state);
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
(ret != 0));
if (ret) {
btrfs_drop_pages(pages, num_pages);
break;
@ -1800,11 +1801,11 @@ again:
if (only_release_metadata) {
btrfs_end_write_no_snapshotting(root);
btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes);
release_bytes, true);
} else {
btrfs_delalloc_release_space(inode, data_reserved,
round_down(pos, fs_info->sectorsize),
release_bytes);
release_bytes, true);
}
}
@ -1997,8 +1998,6 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
{
struct btrfs_file_private *private = filp->private_data;
if (private && private->trans)
btrfs_ioctl_trans_end(filp);
if (private && private->filldir_buf)
kfree(private->filldir_buf);
kfree(private);
@ -2189,12 +2188,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
/*
* ok we haven't committed the transaction yet, lets do a commit
*/
if (file->private_data)
btrfs_ioctl_trans_end(file);
/*
* We use start here because we will need to wait on the IO to complete
* in btrfs_sync_log, which could require joining a transaction (for
@ -2214,7 +2207,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
trans->sync = true;
ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
ret = 1;
@ -2482,7 +2475,8 @@ static int btrfs_punch_hole_lock_range(struct inode *inode,
if ((!ordered ||
(ordered->file_offset + ordered->len <= lockstart ||
ordered->file_offset > lockend)) &&
!btrfs_page_exists_in_range(inode, lockstart, lockend)) {
!filemap_range_has_page(inode->i_mapping,
lockstart, lockend)) {
if (ordered)
btrfs_put_ordered_extent(ordered);
break;
@ -3378,7 +3372,7 @@ const struct file_operations btrfs_file_operations = {
.dedupe_file_range = btrfs_dedupe_file_range,
};
void btrfs_auto_defrag_exit(void)
void __cold btrfs_auto_defrag_exit(void)
{
kmem_cache_destroy(btrfs_inode_defrag_cachep);
}

Просмотреть файл

@ -3547,7 +3547,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
if (ret) {
if (release_metadata)
btrfs_delalloc_release_metadata(BTRFS_I(inode),
inode->i_size);
inode->i_size, true);
#ifdef DEBUG
btrfs_err(fs_info,
"failed to write free ino cache for root %llu",

Просмотреть файл

@ -1071,7 +1071,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->reada = 1;
path->reada = READA_FORWARD;
path2 = btrfs_alloc_path();
if (!path2) {
@ -1573,7 +1573,7 @@ int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
*/
path->skip_locking = 1;
path->search_commit_root = 1;
path->reada = 1;
path->reada = READA_FORWARD;
info = search_free_space_info(NULL, fs_info, block_group, path, 0);
if (IS_ERR(info)) {

Просмотреть файл

@ -1,54 +0,0 @@
/*
* Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <crypto/hash.h>
#include <linux/err.h>
#include "hash.h"
static struct crypto_shash *tfm;
int __init btrfs_hash_init(void)
{
tfm = crypto_alloc_shash("crc32c", 0, 0);
return PTR_ERR_OR_ZERO(tfm);
}
const char* btrfs_crc32c_impl(void)
{
return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm));
}
void btrfs_hash_exit(void)
{
crypto_free_shash(tfm);
}
u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
{
SHASH_DESC_ON_STACK(shash, tfm);
u32 *ctx = (u32 *)shash_desc_ctx(shash);
u32 retval;
int err;
shash->tfm = tfm;
shash->flags = 0;
*ctx = crc;
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
retval = *ctx;
barrier_data(ctx);
return retval;
}

Просмотреть файл

@ -1,43 +0,0 @@
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __HASH__
#define __HASH__
int __init btrfs_hash_init(void);
void btrfs_hash_exit(void);
const char* btrfs_crc32c_impl(void);
u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
static inline u64 btrfs_name_hash(const char *name, int len)
{
return btrfs_crc32c((u32)~1, name, len);
}
/*
* Figure the key offset of an extended inode ref
*/
static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
int len)
{
return (u64) btrfs_crc32c(parent_objectid, name, len);
}
#endif

Просмотреть файл

@ -18,7 +18,6 @@
#include "ctree.h"
#include "disk-io.h"
#include "hash.h"
#include "transaction.h"
#include "print-tree.h"

Просмотреть файл

@ -500,12 +500,12 @@ again:
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
goto out_put;
}
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
out_put:
iput(inode);
out_release:

Просмотреть файл

@ -58,7 +58,6 @@
#include "free-space-cache.h"
#include "inode-map.h"
#include "backref.h"
#include "hash.h"
#include "props.h"
#include "qgroup.h"
#include "dedupe.h"
@ -102,7 +101,7 @@ static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
};
static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct inode *inode);
static int btrfs_truncate(struct inode *inode, bool skip_writeback);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
@ -277,12 +276,12 @@ fail:
* does the checks required to make sure the data is small enough
* to fit as an inline extent.
*/
static noinline int cow_file_range_inline(struct btrfs_root *root,
struct inode *inode, u64 start,
static noinline int cow_file_range_inline(struct inode *inode, u64 start,
u64 end, size_t compressed_size,
int compress_type,
struct page **compressed_pages)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
u64 isize = i_size_read(inode);
@ -458,7 +457,6 @@ static noinline void compress_file_range(struct inode *inode,
int *num_added)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 blocksize = fs_info->sectorsize;
u64 actual_end;
u64 isize = i_size_read(inode);
@ -580,11 +578,11 @@ cont:
/* we didn't compress the entire range, try
* to make an uncompressed inline extent.
*/
ret = cow_file_range_inline(root, inode, start, end,
0, BTRFS_COMPRESS_NONE, NULL);
ret = cow_file_range_inline(inode, start, end, 0,
BTRFS_COMPRESS_NONE, NULL);
} else {
/* try making a compressed inline extent */
ret = cow_file_range_inline(root, inode, start, end,
ret = cow_file_range_inline(inode, start, end,
total_compressed,
compress_type, pages);
}
@ -961,7 +959,6 @@ static noinline int cow_file_range(struct inode *inode,
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
u64 disk_num_bytes;
u64 cur_alloc_size = 0;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
@ -979,14 +976,14 @@ static noinline int cow_file_range(struct inode *inode,
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
if (start == 0) {
/* lets try to make an inline extent */
ret = cow_file_range_inline(root, inode, start, end, 0,
BTRFS_COMPRESS_NONE, NULL);
ret = cow_file_range_inline(inode, start, end, 0,
BTRFS_COMPRESS_NONE, NULL);
if (ret == 0) {
/*
* We use DO_ACCOUNTING here because we need the
@ -1010,15 +1007,12 @@ static noinline int cow_file_range(struct inode *inode,
}
}
BUG_ON(disk_num_bytes >
btrfs_super_total_bytes(fs_info->super_copy));
alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
while (disk_num_bytes > 0) {
cur_alloc_size = disk_num_bytes;
while (num_bytes > 0) {
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
fs_info->sectorsize, 0, alloc_hint,
&ins, 1, 1);
@ -1082,11 +1076,10 @@ static noinline int cow_file_range(struct inode *inode,
delalloc_end, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
page_ops);
if (disk_num_bytes < cur_alloc_size)
disk_num_bytes = 0;
if (num_bytes < cur_alloc_size)
num_bytes = 0;
else
disk_num_bytes -= cur_alloc_size;
num_bytes -= cur_alloc_size;
num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
extent_reserved = false;
@ -1262,6 +1255,8 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
list_del(&sums->list);
kfree(sums);
}
if (ret < 0)
return ret;
return 1;
}
@ -1394,10 +1389,23 @@ next_slot:
goto out_check;
if (btrfs_extent_readonly(fs_info, disk_bytenr))
goto out_check;
if (btrfs_cross_ref_exist(root, ino,
found_key.offset -
extent_offset, disk_bytenr))
ret = btrfs_cross_ref_exist(root, ino,
found_key.offset -
extent_offset, disk_bytenr);
if (ret) {
/*
* ret could be -EIO if the above fails to read
* metadata.
*/
if (ret < 0) {
if (cow_start != (u64)-1)
cur_offset = cow_start;
goto error;
}
WARN_ON_ONCE(nolock);
goto out_check;
}
disk_bytenr += extent_offset;
disk_bytenr += cur_offset - found_key.offset;
num_bytes = min(end + 1, extent_end) - cur_offset;
@ -1415,10 +1423,22 @@ next_slot:
* this ensure that csum for a given extent are
* either valid or do not exist.
*/
if (csum_exist_in_range(fs_info, disk_bytenr,
num_bytes)) {
ret = csum_exist_in_range(fs_info, disk_bytenr,
num_bytes);
if (ret) {
if (!nolock)
btrfs_end_write_no_snapshotting(root);
/*
* ret could be -EIO if the above fails to read
* metadata.
*/
if (ret < 0) {
if (cow_start != (u64)-1)
cur_offset = cow_start;
goto error;
}
WARN_ON_ONCE(nolock);
goto out_check;
}
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
@ -1847,7 +1867,7 @@ static void btrfs_clear_bit_hook(void *private_data,
*/
if (*bits & EXTENT_CLEAR_META_RESV &&
root != fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, len);
btrfs_delalloc_release_metadata(inode, len, false);
/* For sanity tests. */
if (btrfs_is_testing(fs_info))
@ -1921,8 +1941,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
u64 bio_offset)
{
struct inode *inode = private_data;
@ -1941,9 +1960,8 @@ static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
static blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num)
{
struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@ -2015,8 +2033,8 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
/* we're doing a write, do the async checksumming */
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
bio_offset, inode,
__btrfs_submit_bio_start,
__btrfs_submit_bio_done);
btrfs_submit_bio_start,
btrfs_submit_bio_done);
goto out;
} else if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@ -2134,7 +2152,7 @@ again:
ClearPageChecked(page);
set_page_dirty(page);
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
@ -2754,12 +2772,10 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
struct sa_defrag_extent_backref *backref;
struct sa_defrag_extent_backref *prev = NULL;
struct inode *inode;
struct btrfs_root *root;
struct rb_node *node;
int ret;
inode = new->inode;
root = BTRFS_I(inode)->root;
path = btrfs_alloc_path();
if (!path)
@ -3247,6 +3263,16 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
start, (size_t)(end - start + 1));
}
/*
* btrfs_add_delayed_iput - perform a delayed iput on @inode
*
* @inode: The inode we want to perform iput on
*
* This function uses the generic vfs_inode::i_count to track whether we should
* just decrement it (in case it's > 1) or if this is the last iput then link
* the inode to the delayed iput machinery. Delayed iputs are processed at
* transaction commit time/superblock commit/cleaner kthread.
*/
void btrfs_add_delayed_iput(struct inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@ -3256,12 +3282,8 @@ void btrfs_add_delayed_iput(struct inode *inode)
return;
spin_lock(&fs_info->delayed_iput_lock);
if (binode->delayed_iput_count == 0) {
ASSERT(list_empty(&binode->delayed_iput));
list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
} else {
binode->delayed_iput_count++;
}
ASSERT(list_empty(&binode->delayed_iput));
list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
spin_unlock(&fs_info->delayed_iput_lock);
}
@ -3274,13 +3296,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
if (inode->delayed_iput_count) {
inode->delayed_iput_count--;
list_move_tail(&inode->delayed_iput,
&fs_info->delayed_iputs);
} else {
list_del_init(&inode->delayed_iput);
}
list_del_init(&inode->delayed_iput);
spin_unlock(&fs_info->delayed_iput_lock);
iput(&inode->vfs_inode);
spin_lock(&fs_info->delayed_iput_lock);
@ -3350,7 +3366,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = NULL;
int reserve = 0;
int insert = 0;
bool insert = false;
int ret;
if (!root->orphan_block_rsv) {
@ -3360,7 +3376,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&inode->runtime_flags))
insert = true;
if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
&inode->runtime_flags))
reserve = 1;
spin_lock(&root->orphan_lock);
/* If someone has created ->orphan_block_rsv, be happy to use it. */
if (!root->orphan_block_rsv) {
root->orphan_block_rsv = block_rsv;
} else if (block_rsv) {
@ -3368,26 +3393,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
block_rsv = NULL;
}
if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&inode->runtime_flags)) {
#if 0
/*
* For proper ENOSPC handling, we should do orphan
* cleanup when mounting. But this introduces backward
* compatibility issue.
*/
if (!xchg(&root->orphan_item_inserted, 1))
insert = 2;
else
insert = 1;
#endif
insert = 1;
if (insert)
atomic_inc(&root->orphan_inodes);
}
if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
&inode->runtime_flags))
reserve = 1;
spin_unlock(&root->orphan_lock);
/* grab metadata reservation from transaction handle */
@ -3411,7 +3418,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
}
/* insert an orphan item to track this unlinked/truncated file */
if (insert >= 1) {
if (insert) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
if (ret) {
if (reserve) {
@ -3435,15 +3442,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
ret = 0;
}
/* insert an orphan item to track subvolume contains orphan files */
if (insert >= 2) {
ret = btrfs_insert_orphan_item(trans, fs_info->tree_root,
root->root_key.objectid);
if (ret && ret != -EEXIST) {
btrfs_abort_transaction(trans, ret);
return ret;
}
}
return 0;
}
@ -3644,7 +3642,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
goto out;
}
ret = btrfs_truncate(inode);
ret = btrfs_truncate(inode, false);
if (ret)
btrfs_orphan_del(NULL, BTRFS_I(inode));
} else {
@ -4711,7 +4709,6 @@ delete:
if (updates) {
trans->delayed_ref_updates = 0;
ret = btrfs_run_delayed_refs(trans,
fs_info,
updates * 2);
if (ret && !err)
err = ret;
@ -4751,8 +4748,7 @@ error:
unsigned long updates = trans->delayed_ref_updates;
if (updates) {
trans->delayed_ref_updates = 0;
ret = btrfs_run_delayed_refs(trans, fs_info,
updates * 2);
ret = btrfs_run_delayed_refs(trans, updates * 2);
if (ret && !err)
err = ret;
}
@ -4806,8 +4802,8 @@ again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode, data_reserved,
block_start, blocksize);
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
block_start, blocksize, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
ret = -ENOMEM;
goto out;
}
@ -4874,8 +4870,8 @@ again:
out_unlock:
if (ret)
btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize);
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
blocksize, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
unlock_page(page);
put_page(page);
out:
@ -5130,7 +5126,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
inode_dio_wait(inode);
btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
ret = btrfs_truncate(inode);
ret = btrfs_truncate(inode, newsize == oldsize);
if (ret && inode->i_nlink) {
int err;
@ -5466,7 +5462,8 @@ no_delete:
/*
* this returns the key found in the dir entry in the location pointer.
* If no dir entries were found, location->objectid is 0.
* If no dir entries were found, returns -ENOENT.
* If found a corrupted location in dir entry, returns -EUCLEAN.
*/
static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
struct btrfs_key *location)
@ -5484,27 +5481,27 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
name, namelen, 0);
if (IS_ERR(di))
if (!di) {
ret = -ENOENT;
goto out;
}
if (IS_ERR(di)) {
ret = PTR_ERR(di);
if (IS_ERR_OR_NULL(di))
goto out_err;
goto out;
}
btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
if (location->type != BTRFS_INODE_ITEM_KEY &&
location->type != BTRFS_ROOT_ITEM_KEY) {
ret = -EUCLEAN;
btrfs_warn(root->fs_info,
"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
__func__, name, btrfs_ino(BTRFS_I(dir)),
location->objectid, location->type, location->offset);
goto out_err;
}
out:
btrfs_free_path(path);
return ret;
out_err:
location->objectid = 0;
goto out;
}
/*
@ -5807,9 +5804,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
if (ret < 0)
return ERR_PTR(ret);
if (location.objectid == 0)
return ERR_PTR(-ENOENT);
if (location.type == BTRFS_INODE_ITEM_KEY) {
inode = btrfs_iget(dir->i_sb, &location, root, NULL);
return inode;
@ -7443,76 +7437,6 @@ out:
return ret;
}
bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
{
struct radix_tree_root *root = &inode->i_mapping->page_tree;
bool found = false;
void **pagep = NULL;
struct page *page = NULL;
unsigned long start_idx;
unsigned long end_idx;
start_idx = start >> PAGE_SHIFT;
/*
* end is the last byte in the last page. end == start is legal
*/
end_idx = end >> PAGE_SHIFT;
rcu_read_lock();
/* Most of the code in this while loop is lifted from
* find_get_page. It's been modified to begin searching from a
* page and return just the first page found in that range. If the
* found idx is less than or equal to the end idx then we know that
* a page exists. If no pages are found or if those pages are
* outside of the range then we're fine (yay!) */
while (page == NULL &&
radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) {
page = radix_tree_deref_slot(pagep);
if (unlikely(!page))
break;
if (radix_tree_exception(page)) {
if (radix_tree_deref_retry(page)) {
page = NULL;
continue;
}
/*
* Otherwise, shmem/tmpfs must be storing a swap entry
* here as an exceptional entry: so return it without
* attempting to raise page count.
*/
page = NULL;
break; /* TODO: Is this relevant for this use case? */
}
if (!page_cache_get_speculative(page)) {
page = NULL;
continue;
}
/*
* Has the page moved?
* This is part of the lockless pagecache protocol. See
* include/linux/pagemap.h for details.
*/
if (unlikely(page != *pagep)) {
put_page(page);
page = NULL;
}
}
if (page) {
if (page->index <= end_idx)
found = true;
put_page(page);
}
rcu_read_unlock();
return found;
}
static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
struct extent_state **cached_state, int writing)
{
@ -7538,8 +7462,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
* get stale data.
*/
if (!ordered &&
(!writing ||
!btrfs_page_exists_in_range(inode, lockstart, lockend)))
(!writing || !filemap_range_has_page(inode->i_mapping,
lockstart, lockend)))
break;
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
@ -8270,9 +8194,8 @@ static void btrfs_endio_direct_write(struct bio *bio)
bio_put(bio);
}
static blk_status_t __btrfs_submit_bio_start_direct_io(void *private_data,
struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 offset)
static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
struct bio *bio, u64 offset)
{
struct inode *inode = private_data;
blk_status_t ret;
@ -8298,13 +8221,13 @@ static void btrfs_end_dio_bio(struct bio *bio)
err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
if (err) {
dip->errors = 1;
/*
* before atomic variable goto zero, we must make sure
* dip->errors is perceived to be set.
* We want to perceive the errors flag being set before
* decrementing the reference count. We don't need a barrier
* since atomic operations with a return value are fully
* ordered as per atomic_t.txt
*/
smp_mb__before_atomic();
dip->errors = 1;
}
/* if there are more bios still pending for this dio, just exit */
@ -8352,9 +8275,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
return 0;
}
static inline blk_status_t
__btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
int async_submit)
static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
struct inode *inode, u64 file_offset, int async_submit)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_dio_private *dip = bio->bi_private;
@ -8377,8 +8299,8 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
if (write && async_submit) {
ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
file_offset, inode,
__btrfs_submit_bio_start_direct_io,
__btrfs_submit_bio_done);
btrfs_submit_bio_start_direct_io,
btrfs_submit_bio_done);
goto err;
} else if (write) {
/*
@ -8464,7 +8386,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
*/
atomic_inc(&dip->pending_bios);
status = __btrfs_submit_dio_bio(bio, inode, file_offset,
status = btrfs_submit_dio_bio(bio, inode, file_offset,
async_submit);
if (status) {
bio_put(bio);
@ -8484,7 +8406,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
} while (submit_len > 0);
submit:
status = __btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
if (!status)
return 0;
@ -8492,10 +8414,11 @@ submit:
out_err:
dip->errors = 1;
/*
* before atomic variable goto zero, we must
* make sure dip->errors is perceived to be set.
* Before atomic variable goto zero, we must make sure dip->errors is
* perceived to be set. This ordering is ensured by the fact that an
* atomic operations with a return value are fully ordered as per
* atomic_t.txt
*/
smp_mb__before_atomic();
if (atomic_dec_and_test(&dip->pending_bios))
bio_io_error(dip->orig_bio);
@ -8713,7 +8636,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (ret < 0 && ret != -EIOCBQUEUED) {
if (dio_data.reserve)
btrfs_delalloc_release_space(inode, data_reserved,
offset, dio_data.reserve);
offset, dio_data.reserve, true);
/*
* On error we might have left some ordered extents
* without submitting corresponding bios for them, so
@ -8729,8 +8652,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
false);
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, data_reserved,
offset, count - (size_t)ret);
btrfs_delalloc_release_extents(BTRFS_I(inode), count);
offset, count - (size_t)ret, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
}
out:
if (wakeup)
@ -9045,7 +8968,8 @@ again:
if (reserved_space < PAGE_SIZE) {
end = page_start + reserved_space - 1;
btrfs_delalloc_release_space(inode, data_reserved,
page_start, PAGE_SIZE - reserved_space);
page_start, PAGE_SIZE - reserved_space,
true);
}
}
@ -9095,23 +9019,23 @@ again:
out_unlock:
if (!ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
return VM_FAULT_LOCKED;
}
unlock_page(page);
out:
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
btrfs_delalloc_release_space(inode, data_reserved, page_start,
reserved_space);
reserved_space, (ret != 0));
out_noreserve:
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
return ret;
}
static int btrfs_truncate(struct inode *inode)
static int btrfs_truncate(struct inode *inode, bool skip_writeback)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
@ -9122,10 +9046,12 @@ static int btrfs_truncate(struct inode *inode)
u64 mask = fs_info->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
(u64)-1);
if (ret)
return ret;
if (!skip_writeback) {
ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
(u64)-1);
if (ret)
return ret;
}
/*
* Yes ladies and gentlemen, this is indeed ugly. The fact is we have
@ -9335,7 +9261,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->dir_index = 0;
ei->last_unlink_trans = 0;
ei->last_log_commit = 0;
ei->delayed_iput_count = 0;
spin_lock_init(&ei->lock);
ei->outstanding_extents = 0;
@ -9455,7 +9380,7 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
void btrfs_destroy_cachep(void)
void __cold btrfs_destroy_cachep(void)
{
/*
* Make sure all delayed rcu free inodes are flushed before we

Просмотреть файл

@ -106,7 +106,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
int no_time_update);
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
static unsigned int btrfs_mask_flags(umode_t mode, unsigned int flags)
{
if (S_ISDIR(mode))
return flags;
@ -1197,7 +1197,7 @@ again:
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT,
(page_cnt - i_done) << PAGE_SHIFT);
(page_cnt - i_done) << PAGE_SHIFT, true);
}
@ -1215,7 +1215,8 @@ again:
unlock_page(pages[i]);
put_page(pages[i]);
}
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
false);
extent_changeset_free(data_reserved);
return i_done;
out:
@ -1225,8 +1226,9 @@ out:
}
btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT);
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
page_cnt << PAGE_SHIFT, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
true);
extent_changeset_free(data_reserved);
return ret;
@ -2600,7 +2602,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
range->len = (u64)-1;
}
ret = btrfs_defrag_file(file_inode(file), file,
range, 0, 0);
range, BTRFS_OLDEST_GENERATION, 0);
if (ret > 0)
ret = 0;
kfree(range);
@ -3936,73 +3938,6 @@ int btrfs_clone_file_range(struct file *src_file, loff_t off,
return btrfs_clone_files(dst_file, src_file, off, len, destoff);
}
/*
* there are many ways the trans_start and trans_end ioctls can lead
* to deadlocks. They should only be used by applications that
* basically own the machine, and have a very in depth understanding
* of all the possible deadlocks and enospc problems.
*/
static long btrfs_ioctl_trans_start(struct file *file)
{
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_file_private *private;
int ret;
static bool warned = false;
ret = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out;
if (!warned) {
btrfs_warn(fs_info,
"Userspace transaction mechanism is considered "
"deprecated and slated to be removed in 4.17. "
"If you have a valid use case please "
"speak up on the mailing list");
WARN_ON(1);
warned = true;
}
ret = -EINPROGRESS;
private = file->private_data;
if (private && private->trans)
goto out;
if (!private) {
private = kzalloc(sizeof(struct btrfs_file_private),
GFP_KERNEL);
if (!private)
return -ENOMEM;
file->private_data = private;
}
ret = -EROFS;
if (btrfs_root_readonly(root))
goto out;
ret = mnt_want_write_file(file);
if (ret)
goto out;
atomic_inc(&fs_info->open_ioctl_trans);
ret = -ENOMEM;
trans = btrfs_start_ioctl_transaction(root);
if (IS_ERR(trans))
goto out_drop;
private->trans = trans;
return 0;
out_drop:
atomic_dec(&fs_info->open_ioctl_trans);
mnt_drop_write_file(file);
out:
return ret;
}
static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
{
struct inode *inode = file_inode(file);
@ -4244,30 +4179,6 @@ out:
return ret;
}
/*
* there are many ways the trans_start and trans_end ioctls can lead
* to deadlocks. They should only be used by applications that
* basically own the machine, and have a very in depth understanding
* of all the possible deadlocks and enospc problems.
*/
long btrfs_ioctl_trans_end(struct file *file)
{
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_file_private *private = file->private_data;
if (!private || !private->trans)
return -EINVAL;
btrfs_end_transaction(private->trans);
private->trans = NULL;
atomic_dec(&root->fs_info->open_ioctl_trans);
mnt_drop_write_file(file);
return 0;
}
static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
void __user *argp)
{
@ -4429,7 +4340,8 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
ret = 0;
break;
case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL:
ret = btrfs_dev_replace_cancel(fs_info, p);
p->result = btrfs_dev_replace_cancel(fs_info);
ret = 0;
break;
default:
ret = -EINVAL;
@ -5138,10 +5050,17 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid,
BTRFS_UUID_SIZE);
if (received_uuid_changed &&
!btrfs_is_empty_uuid(root_item->received_uuid))
btrfs_uuid_tree_rem(trans, fs_info, root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
root->root_key.objectid);
!btrfs_is_empty_uuid(root_item->received_uuid)) {
ret = btrfs_uuid_tree_rem(trans, fs_info,
root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
root->root_key.objectid);
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out;
}
}
memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
btrfs_set_root_stransid(root_item, sa->stransid);
btrfs_set_root_rtransid(root_item, sa->rtransid);
@ -5574,10 +5493,6 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_dev_info(fs_info, argp);
case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_TRANS_START:
return btrfs_ioctl_trans_start(file);
case BTRFS_IOC_TRANS_END:
return btrfs_ioctl_trans_end(file);
case BTRFS_IOC_TREE_SEARCH:
return btrfs_ioctl_tree_search(file, argp);
case BTRFS_IOC_TREE_SEARCH_V2:

Просмотреть файл

@ -290,7 +290,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
/*
* Make sure counter is updated before we wake up waiters.
*/
smp_mb();
smp_mb__after_atomic();
if (waitqueue_active(&eb->write_lock_wq))
wake_up(&eb->write_lock_wq);
} else {

Просмотреть файл

@ -382,14 +382,12 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
struct workspace *workspace = list_entry(ws, struct workspace, list);
size_t in_len;
size_t out_len;
size_t tot_len;
int ret = 0;
char *kaddr;
unsigned long bytes;
BUG_ON(srclen < LZO_LEN);
tot_len = read_compress_length(data_in);
data_in += LZO_LEN;
in_len = read_compress_length(data_in);

Просмотреть файл

@ -610,7 +610,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock);
if (root != fs_info->tree_root)
btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false);
tree = &btrfs_inode->ordered_tree;
spin_lock_irq(&tree->lock);
@ -1154,7 +1154,7 @@ int __init ordered_data_init(void)
return 0;
}
void ordered_data_exit(void)
void __cold ordered_data_exit(void)
{
kmem_cache_destroy(btrfs_ordered_extent_cache);
}

Просмотреть файл

@ -151,7 +151,9 @@ static inline int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info,
unsigned long bytes)
{
int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
return sizeof(struct btrfs_ordered_sum) + num_sectors * sizeof(u32);
int csum_size = btrfs_super_csum_size(fs_info->super_copy);
return sizeof(struct btrfs_ordered_sum) + num_sectors * csum_size;
}
static inline void
@ -215,5 +217,5 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *log, u64 transid);
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
int __init ordered_data_init(void);
void ordered_data_exit(void);
void __cold ordered_data_exit(void);
#endif

Просмотреть файл

@ -365,9 +365,13 @@ void btrfs_print_tree(struct extent_buffer *c)
btrfs_node_blockptr(c, i));
}
for (i = 0; i < nr; i++) {
struct extent_buffer *next = read_tree_block(fs_info,
btrfs_node_blockptr(c, i),
btrfs_node_ptr_generation(c, i));
struct btrfs_key first_key;
struct extent_buffer *next;
btrfs_node_key_to_cpu(c, &first_key, i);
next = read_tree_block(fs_info, btrfs_node_blockptr(c, i),
btrfs_node_ptr_generation(c, i),
level - 1, &first_key);
if (IS_ERR(next)) {
continue;
} else if (!extent_buffer_uptodate(next)) {

Просмотреть файл

@ -19,8 +19,8 @@
#include <linux/hashtable.h>
#include "props.h"
#include "btrfs_inode.h"
#include "hash.h"
#include "transaction.h"
#include "ctree.h"
#include "xattr.h"
#include "compression.h"
@ -116,7 +116,7 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
return -EINVAL;
if (value_len == 0) {
ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
ret = btrfs_setxattr(trans, inode, handler->xattr_name,
NULL, 0, flags);
if (ret)
return ret;
@ -130,13 +130,13 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
ret = handler->validate(value, value_len);
if (ret)
return ret;
ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
ret = btrfs_setxattr(trans, inode, handler->xattr_name,
value, value_len, flags);
if (ret)
return ret;
ret = handler->apply(inode, value, value_len);
if (ret) {
__btrfs_setxattr(trans, inode, handler->xattr_name,
btrfs_setxattr(trans, inode, handler->xattr_name,
NULL, 0, flags);
return ret;
}

Просмотреть файл

@ -47,6 +47,82 @@
* - check all ioctl parameters
*/
/*
* Helpers to access qgroup reservation
*
* Callers should ensure the lock context and type are valid
*/
static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
{
u64 ret = 0;
int i;
for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
ret += qgroup->rsv.values[i];
return ret;
}
#ifdef CONFIG_BTRFS_DEBUG
static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
{
if (type == BTRFS_QGROUP_RSV_DATA)
return "data";
if (type == BTRFS_QGROUP_RSV_META_PERTRANS)
return "meta_pertrans";
if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
return "meta_prealloc";
return NULL;
}
#endif
static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup, u64 num_bytes,
enum btrfs_qgroup_rsv_type type)
{
trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
qgroup->rsv.values[type] += num_bytes;
}
static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup, u64 num_bytes,
enum btrfs_qgroup_rsv_type type)
{
trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
if (qgroup->rsv.values[type] >= num_bytes) {
qgroup->rsv.values[type] -= num_bytes;
return;
}
#ifdef CONFIG_BTRFS_DEBUG
WARN_RATELIMIT(1,
"qgroup %llu %s reserved space underflow, have %llu to free %llu",
qgroup->qgroupid, qgroup_rsv_type_str(type),
qgroup->rsv.values[type], num_bytes);
#endif
qgroup->rsv.values[type] = 0;
}
static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *dest,
struct btrfs_qgroup *src)
{
int i;
for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i);
}
static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *dest,
struct btrfs_qgroup *src)
{
int i;
for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i);
}
static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
int mod)
{
@ -826,10 +902,8 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
int slot;
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (fs_info->quota_root) {
set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags);
if (fs_info->quota_root)
goto out;
}
fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
if (!fs_info->qgroup_ulist) {
@ -923,8 +997,15 @@ out_add_root:
}
spin_lock(&fs_info->qgroup_lock);
fs_info->quota_root = quota_root;
set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags);
set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
spin_unlock(&fs_info->qgroup_lock);
ret = qgroup_rescan_init(fs_info, 0, 1);
if (!ret) {
qgroup_rescan_zero_tracking(fs_info);
btrfs_queue_work(fs_info->qgroup_rescan_workers,
&fs_info->qgroup_rescan_work);
}
out_free_path:
btrfs_free_path(path);
out_free_root:
@ -991,33 +1072,29 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
}
static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup,
u64 num_bytes)
{
#ifdef CONFIG_BTRFS_DEBUG
WARN_ON(qgroup->reserved < num_bytes);
btrfs_debug(fs_info,
"qgroup %llu reserved space underflow, have: %llu, to free: %llu",
qgroup->qgroupid, qgroup->reserved, num_bytes);
#endif
qgroup->reserved = 0;
}
/*
* The easy accounting, if we are adding/removing the only ref for an extent
* then this qgroup and all of the parent qgroups get their reference and
* exclusive counts adjusted.
* The easy accounting, we're updating qgroup relationship whose child qgroup
* only has exclusive extents.
*
* In this case, all exclsuive extents will also be exlusive for parent, so
* excl/rfer just get added/removed.
*
* So is qgroup reservation space, which should also be added/removed to
* parent.
* Or when child tries to release reservation space, parent will underflow its
* reservation (for relationship adding case).
*
* Caller should hold fs_info->qgroup_lock.
*/
static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
struct ulist *tmp, u64 ref_root,
u64 num_bytes, int sign)
struct btrfs_qgroup *src, int sign)
{
struct btrfs_qgroup *qgroup;
struct btrfs_qgroup_list *glist;
struct ulist_node *unode;
struct ulist_iterator uiter;
u64 num_bytes = src->excl;
int ret = 0;
qgroup = find_qgroup_rb(fs_info, ref_root);
@ -1030,13 +1107,11 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
if (sign > 0) {
trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes);
if (qgroup->reserved < num_bytes)
report_reserved_underflow(fs_info, qgroup, num_bytes);
else
qgroup->reserved -= num_bytes;
}
if (sign > 0)
qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
else
qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
qgroup_dirty(fs_info, qgroup);
@ -1056,15 +1131,10 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
qgroup->rfer_cmpr += sign * num_bytes;
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
if (sign > 0) {
trace_qgroup_update_reserve(fs_info, qgroup,
-(s64)num_bytes);
if (qgroup->reserved < num_bytes)
report_reserved_underflow(fs_info, qgroup,
num_bytes);
else
qgroup->reserved -= num_bytes;
}
if (sign > 0)
qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
else
qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
qgroup->excl_cmpr += sign * num_bytes;
qgroup_dirty(fs_info, qgroup);
@ -1107,7 +1177,7 @@ static int quick_update_accounting(struct btrfs_fs_info *fs_info,
if (qgroup->excl == qgroup->rfer) {
ret = 0;
err = __qgroup_excl_accounting(fs_info, tmp, dst,
qgroup->excl, sign);
qgroup, sign);
if (err < 0) {
ret = err;
goto out;
@ -1414,7 +1484,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_extent_record *entry;
u64 bytenr = record->bytenr;
assert_spin_locked(&delayed_refs->lock);
lockdep_assert_held(&delayed_refs->lock);
trace_btrfs_qgroup_trace_extent(fs_info, record);
while (*p) {
@ -1614,7 +1684,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
return 0;
if (!extent_buffer_uptodate(root_eb)) {
ret = btrfs_read_buffer(root_eb, root_gen);
ret = btrfs_read_buffer(root_eb, root_gen, root_level, NULL);
if (ret)
goto out;
}
@ -1645,6 +1715,7 @@ walk_down:
level = root_level;
while (level >= 0) {
if (path->nodes[level] == NULL) {
struct btrfs_key first_key;
int parent_slot;
u64 child_gen;
u64 child_bytenr;
@ -1657,8 +1728,10 @@ walk_down:
parent_slot = path->slots[level + 1];
child_bytenr = btrfs_node_blockptr(eb, parent_slot);
child_gen = btrfs_node_ptr_generation(eb, parent_slot);
btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
eb = read_tree_block(fs_info, child_bytenr, child_gen);
eb = read_tree_block(fs_info, child_bytenr, child_gen,
level, &first_key);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
@ -2009,9 +2082,9 @@ out_free:
return ret;
}
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_qgroup_extent_record *record;
struct btrfs_delayed_ref_root *delayed_refs;
struct ulist *new_roots = NULL;
@ -2080,17 +2153,9 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
{
struct btrfs_root *quota_root = fs_info->quota_root;
int ret = 0;
int start_rescan_worker = 0;
if (!quota_root)
goto out;
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
start_rescan_worker = 1;
if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
return ret;
spin_lock(&fs_info->qgroup_lock);
while (!list_empty(&fs_info->dirty_qgroups)) {
@ -2119,18 +2184,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
if (ret)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
if (!ret && start_rescan_worker) {
ret = qgroup_rescan_init(fs_info, 0, 1);
if (!ret) {
qgroup_rescan_zero_tracking(fs_info);
btrfs_queue_work(fs_info->qgroup_rescan_workers,
&fs_info->qgroup_rescan_work);
}
ret = 0;
}
out:
return ret;
}
@ -2338,24 +2391,24 @@ out:
static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
{
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
return false;
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
return false;
return true;
}
static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
enum btrfs_qgroup_rsv_type type)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 ref_root = root->root_key.objectid;
int ret = 0;
int retried = 0;
struct ulist_node *unode;
struct ulist_iterator uiter;
@ -2369,7 +2422,6 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
capable(CAP_SYS_RESOURCE))
enforce = false;
retry:
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
if (!quota_root)
@ -2385,7 +2437,7 @@ retry:
*/
ulist_reinit(fs_info->qgroup_ulist);
ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
(uintptr_t)qgroup, GFP_ATOMIC);
qgroup_to_aux(qgroup), GFP_ATOMIC);
if (ret < 0)
goto out;
ULIST_ITER_INIT(&uiter);
@ -2396,27 +2448,6 @@ retry:
qg = unode_aux_to_qgroup(unode);
if (enforce && !qgroup_check_limits(qg, num_bytes)) {
/*
* Commit the tree and retry, since we may have
* deletions which would free up space.
*/
if (!retried && qg->reserved > 0) {
struct btrfs_trans_handle *trans;
spin_unlock(&fs_info->qgroup_lock);
ret = btrfs_start_delalloc_inodes(root, 0);
if (ret)
return ret;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans);
if (ret)
return ret;
retried++;
goto retry;
}
ret = -EDQUOT;
goto out;
}
@ -2424,7 +2455,7 @@ retry:
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
glist->group->qgroupid,
(uintptr_t)glist->group, GFP_ATOMIC);
qgroup_to_aux(glist->group), GFP_ATOMIC);
if (ret < 0)
goto out;
}
@ -2439,8 +2470,8 @@ retry:
qg = unode_aux_to_qgroup(unode);
trace_qgroup_update_reserve(fs_info, qg, num_bytes);
qg->reserved += num_bytes;
trace_qgroup_update_reserve(fs_info, qg, num_bytes, type);
qgroup_rsv_add(fs_info, qg, num_bytes, type);
}
out:
@ -2448,8 +2479,18 @@ out:
return ret;
}
/*
* Free @num_bytes of reserved space with @type for qgroup. (Normally level 0
* qgroup).
*
* Will handle all higher level qgroup too.
*
* NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup.
* This special case is only used for META_PERTRANS type.
*/
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes)
u64 ref_root, u64 num_bytes,
enum btrfs_qgroup_rsv_type type)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
@ -2463,6 +2504,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
if (num_bytes == 0)
return;
if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) {
WARN(1, "%s: Invalid type to free", __func__);
return;
}
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
@ -2473,9 +2518,16 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
if (!qgroup)
goto out;
if (num_bytes == (u64)-1)
/*
* We're freeing all pertrans rsv, get reserved value from
* level 0 qgroup as real num_bytes to free.
*/
num_bytes = qgroup->rsv.values[type];
ulist_reinit(fs_info->qgroup_ulist);
ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
(uintptr_t)qgroup, GFP_ATOMIC);
qgroup_to_aux(qgroup), GFP_ATOMIC);
if (ret < 0)
goto out;
ULIST_ITER_INIT(&uiter);
@ -2485,16 +2537,13 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
qg = unode_aux_to_qgroup(unode);
trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
if (qg->reserved < num_bytes)
report_reserved_underflow(fs_info, qg, num_bytes);
else
qg->reserved -= num_bytes;
trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes, type);
qgroup_rsv_release(fs_info, qg, num_bytes, type);
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
glist->group->qgroupid,
(uintptr_t)glist->group, GFP_ATOMIC);
qgroup_to_aux(glist->group), GFP_ATOMIC);
if (ret < 0)
goto out;
}
@ -2877,7 +2926,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
to_reserve, QGROUP_RESERVE);
if (ret < 0)
goto cleanup;
ret = qgroup_reserve(root, to_reserve, true);
ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
if (ret < 0)
goto cleanup;
@ -2940,7 +2989,8 @@ static int qgroup_free_reserved_data(struct inode *inode,
goto out;
freed += changeset.bytes_changed;
}
btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed);
btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed,
BTRFS_QGROUP_RSV_DATA);
ret = freed;
out:
extent_changeset_release(&changeset);
@ -2972,7 +3022,7 @@ static int __btrfs_qgroup_release_data(struct inode *inode,
if (free)
btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
BTRFS_I(inode)->root->objectid,
changeset.bytes_changed);
changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
ret = changeset.bytes_changed;
out:
extent_changeset_release(&changeset);
@ -3017,8 +3067,48 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
}
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
bool enforce)
static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type)
{
if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
type != BTRFS_QGROUP_RSV_META_PERTRANS)
return;
if (num_bytes == 0)
return;
spin_lock(&root->qgroup_meta_rsv_lock);
if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
root->qgroup_meta_rsv_prealloc += num_bytes;
else
root->qgroup_meta_rsv_pertrans += num_bytes;
spin_unlock(&root->qgroup_meta_rsv_lock);
}
static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type)
{
if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
type != BTRFS_QGROUP_RSV_META_PERTRANS)
return 0;
if (num_bytes == 0)
return 0;
spin_lock(&root->qgroup_meta_rsv_lock);
if (type == BTRFS_QGROUP_RSV_META_PREALLOC) {
num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc,
num_bytes);
root->qgroup_meta_rsv_prealloc -= num_bytes;
} else {
num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans,
num_bytes);
root->qgroup_meta_rsv_pertrans -= num_bytes;
}
spin_unlock(&root->qgroup_meta_rsv_lock);
return num_bytes;
}
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@ -3028,31 +3118,39 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
return 0;
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
trace_qgroup_meta_reserve(root, (s64)num_bytes);
ret = qgroup_reserve(root, num_bytes, enforce);
trace_qgroup_meta_reserve(root, type, (s64)num_bytes);
ret = qgroup_reserve(root, num_bytes, enforce, type);
if (ret < 0)
return ret;
atomic64_add(num_bytes, &root->qgroup_meta_rsv);
/*
* Record what we have reserved into root.
*
* To avoid quota disabled->enabled underflow.
* In that case, we may try to free space we haven't reserved
* (since quota was disabled), so record what we reserved into root.
* And ensure later release won't underflow this number.
*/
add_root_meta_rsv(root, num_bytes, type);
return ret;
}
void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 reserved;
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
!is_fstree(root->objectid))
return;
reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
if (reserved == 0)
return;
trace_qgroup_meta_reserve(root, -(s64)reserved);
btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
/* TODO: Update trace point to handle such free */
trace_qgroup_meta_free_all_pertrans(root);
/* Special value -1 means to free all reserved space */
btrfs_qgroup_free_refroot(fs_info, root->objectid, (u64)-1,
BTRFS_QGROUP_RSV_META_PERTRANS);
}
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@ -3060,11 +3158,75 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
!is_fstree(root->objectid))
return;
/*
* reservation for META_PREALLOC can happen before quota is enabled,
* which can lead to underflow.
* Here ensure we will only free what we really have reserved.
*/
num_bytes = sub_root_meta_rsv(root, num_bytes, type);
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
trace_qgroup_meta_reserve(root, -(s64)num_bytes);
btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
trace_qgroup_meta_reserve(root, type, -(s64)num_bytes);
btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes, type);
}
static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
int num_bytes)
{
struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_qgroup *qgroup;
struct ulist_node *unode;
struct ulist_iterator uiter;
int ret = 0;
if (num_bytes == 0)
return;
if (!quota_root)
return;
spin_lock(&fs_info->qgroup_lock);
qgroup = find_qgroup_rb(fs_info, ref_root);
if (!qgroup)
goto out;
ulist_reinit(fs_info->qgroup_ulist);
ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
qgroup_to_aux(qgroup), GFP_ATOMIC);
if (ret < 0)
goto out;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
struct btrfs_qgroup *qg;
struct btrfs_qgroup_list *glist;
qg = unode_aux_to_qgroup(unode);
qgroup_rsv_release(fs_info, qg, num_bytes,
BTRFS_QGROUP_RSV_META_PREALLOC);
qgroup_rsv_add(fs_info, qg, num_bytes,
BTRFS_QGROUP_RSV_META_PERTRANS);
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
glist->group->qgroupid,
qgroup_to_aux(glist->group), GFP_ATOMIC);
if (ret < 0)
goto out;
}
}
out:
spin_unlock(&fs_info->qgroup_lock);
}
void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
{
struct btrfs_fs_info *fs_info = root->fs_info;
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
!is_fstree(root->objectid))
return;
/* Same as btrfs_qgroup_free_meta_prealloc() */
num_bytes = sub_root_meta_rsv(root, num_bytes,
BTRFS_QGROUP_RSV_META_PREALLOC);
trace_qgroup_meta_convert(root, num_bytes);
qgroup_convert_meta(fs_info, root->objectid, num_bytes);
}
/*
@ -3092,7 +3254,7 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
}
btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
BTRFS_I(inode)->root->objectid,
changeset.bytes_changed);
changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
}
extent_changeset_release(&changeset);

Просмотреть файл

@ -61,6 +61,48 @@ struct btrfs_qgroup_extent_record {
struct ulist *old_roots;
};
/*
* Qgroup reservation types:
*
* DATA:
* space reserved for data
*
* META_PERTRANS:
* Space reserved for metadata (per-transaction)
* Due to the fact that qgroup data is only updated at transaction commit
* time, reserved space for metadata must be kept until transaction
* commits.
* Any metadata reserved that are used in btrfs_start_transaction() should
* be of this type.
*
* META_PREALLOC:
* There are cases where metadata space is reserved before starting
* transaction, and then btrfs_join_transaction() to get a trans handle.
* Any metadata reserved for such usage should be of this type.
* And after join_transaction() part (or all) of such reservation should
* be converted into META_PERTRANS.
*/
enum btrfs_qgroup_rsv_type {
BTRFS_QGROUP_RSV_DATA = 0,
BTRFS_QGROUP_RSV_META_PERTRANS,
BTRFS_QGROUP_RSV_META_PREALLOC,
BTRFS_QGROUP_RSV_LAST,
};
/*
* Represents how many bytes we have reserved for this qgroup.
*
* Each type should have different reservation behavior.
* E.g, data follows its io_tree flag modification, while
* *currently* meta is just reserve-and-clear during transcation.
*
* TODO: Add new type for reservation which can survive transaction commit.
* Currect metadata reservation behavior is not suitable for such case.
*/
struct btrfs_qgroup_rsv {
u64 values[BTRFS_QGROUP_RSV_LAST];
};
/*
* one struct for each qgroup, organized in fs_info->qgroup_tree.
*/
@ -87,7 +129,7 @@ struct btrfs_qgroup {
/*
* reservation tracking
*/
u64 reserved;
struct btrfs_qgroup_rsv rsv;
/*
* lists
@ -220,20 +262,21 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes,
struct ulist *old_roots, struct ulist *new_roots);
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes);
u64 ref_root, u64 num_bytes,
enum btrfs_qgroup_rsv_type type);
static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes)
{
trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
BTRFS_QGROUP_RSV_DATA);
}
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
@ -248,9 +291,54 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
bool enforce);
void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes);
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce);
/* Reserve metadata space for pertrans and prealloc type */
static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
int num_bytes, bool enforce)
{
return __btrfs_qgroup_reserve_meta(root, num_bytes,
BTRFS_QGROUP_RSV_META_PERTRANS, enforce);
}
static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
int num_bytes, bool enforce)
{
return __btrfs_qgroup_reserve_meta(root, num_bytes,
BTRFS_QGROUP_RSV_META_PREALLOC, enforce);
}
void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type);
/* Free per-transaction meta reservation for error handling */
static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root,
int num_bytes)
{
__btrfs_qgroup_free_meta(root, num_bytes,
BTRFS_QGROUP_RSV_META_PERTRANS);
}
/* Pre-allocated meta reservation can be freed at need */
static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root,
int num_bytes)
{
__btrfs_qgroup_free_meta(root, num_bytes,
BTRFS_QGROUP_RSV_META_PREALLOC);
}
/*
* Per-transaction meta reservation should be all freed at transaction commit
* time
*/
void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
/*
* Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS.
*
* This is called when preallocated meta reservation needs to be used.
* Normally after btrfs_join_transaction() call.
*/
void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
void btrfs_qgroup_check_reserved_leak(struct inode *inode);
#endif /* __BTRFS_QGROUP__ */

Просмотреть файл

@ -1987,7 +1987,13 @@ cleanup:
kfree(pointers);
cleanup_io:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
/*
* Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
* valid rbio which is consistent with ondisk content, thus such a
* valid rbio can be cached to avoid further disk reads.
*/
if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
/*
* - In case of two failures, where rbio->failb != -1:
*
@ -2008,8 +2014,6 @@ cleanup_io:
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
rbio_orig_end_io(rbio, err);
} else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
rbio_orig_end_io(rbio, err);
} else if (err == BLK_STS_OK) {
rbio->faila = -1;
@ -2768,24 +2772,8 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
return rbio;
}
static void missing_raid56_work(struct btrfs_work *work)
{
struct btrfs_raid_bio *rbio;
rbio = container_of(work, struct btrfs_raid_bio, work);
__raid56_parity_recover(rbio);
}
static void async_missing_raid56(struct btrfs_raid_bio *rbio)
{
btrfs_init_work(&rbio->work, btrfs_rmw_helper,
missing_raid56_work, NULL, NULL);
btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
}
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
{
if (!lock_stripe_add(rbio))
async_missing_raid56(rbio);
async_read_rebuild(rbio);
}

Просмотреть файл

@ -395,20 +395,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
goto error;
/* insert extent in reada_tree + all per-device trees, all or nothing */
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_lock(&fs_info->dev_replace);
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) {
re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
radix_tree_preload_end();
goto error;
}
if (ret) {
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
radix_tree_preload_end();
goto error;
}
@ -451,13 +451,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
}
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
goto error;
}
have_zone = 1;
}
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
if (!have_zone)
goto error;

Просмотреть файл

@ -579,11 +579,16 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
while (level >= 0) {
if (level) {
struct btrfs_key first_key;
block_bytenr = btrfs_node_blockptr(path->nodes[level],
path->slots[level]);
gen = btrfs_node_ptr_generation(path->nodes[level],
path->slots[level]);
eb = read_tree_block(fs_info, block_bytenr, gen);
btrfs_node_key_to_cpu(path->nodes[level], &first_key,
path->slots[level]);
eb = read_tree_block(fs_info, block_bytenr, gen,
level - 1, &first_key);
if (IS_ERR(eb))
return PTR_ERR(eb);
if (!extent_buffer_uptodate(eb)) {

Просмотреть файл

@ -1839,6 +1839,8 @@ again:
parent = eb;
while (1) {
struct btrfs_key first_key;
level = btrfs_header_level(parent);
BUG_ON(level < lowest_level);
@ -1852,6 +1854,7 @@ again:
old_bytenr = btrfs_node_blockptr(parent, slot);
blocksize = fs_info->nodesize;
old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
btrfs_node_key_to_cpu(parent, &key, slot);
if (level <= max_level) {
eb = path->nodes[level];
@ -1876,7 +1879,8 @@ again:
break;
}
eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen);
eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen,
level - 1, &first_key);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
break;
@ -2036,6 +2040,8 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
last_snapshot = btrfs_root_last_snapshot(&root->root_item);
for (i = *level; i > 0; i--) {
struct btrfs_key first_key;
eb = path->nodes[i];
nritems = btrfs_header_nritems(eb);
while (path->slots[i] < nritems) {
@ -2056,7 +2062,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
}
bytenr = btrfs_node_blockptr(eb, path->slots[i]);
eb = read_tree_block(fs_info, bytenr, ptr_gen);
btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]);
eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1,
&first_key);
if (IS_ERR(eb)) {
return PTR_ERR(eb);
} else if (!extent_buffer_uptodate(eb)) {
@ -2714,6 +2722,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
path->lowest_level = node->level + 1;
rc->backref_cache.path[node->level] = node;
list_for_each_entry(edge, &node->upper, list[LOWER]) {
struct btrfs_key first_key;
cond_resched();
upper = edge->node[UPPER];
@ -2779,7 +2789,9 @@ static int do_relocation(struct btrfs_trans_handle *trans,
blocksize = root->fs_info->nodesize;
generation = btrfs_node_ptr_generation(upper->eb, slot);
eb = read_tree_block(fs_info, bytenr, generation);
btrfs_node_key_to_cpu(upper->eb, &first_key, slot);
eb = read_tree_block(fs_info, bytenr, generation,
upper->level - 1, &first_key);
if (IS_ERR(eb)) {
err = PTR_ERR(eb);
goto next;
@ -2944,7 +2956,8 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb;
BUG_ON(block->key_ready);
eb = read_tree_block(fs_info, block->bytenr, block->key.offset);
eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
block->level, NULL);
if (IS_ERR(eb)) {
return PTR_ERR(eb);
} else if (!extent_buffer_uptodate(eb)) {
@ -3226,7 +3239,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
mask);
if (!page) {
btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE);
PAGE_SIZE, true);
ret = -ENOMEM;
goto out;
}
@ -3245,9 +3258,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
unlock_page(page);
put_page(page);
btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE);
PAGE_SIZE, true);
btrfs_delalloc_release_extents(BTRFS_I(inode),
PAGE_SIZE);
PAGE_SIZE, true);
ret = -EIO;
goto out;
}
@ -3274,9 +3287,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
unlock_page(page);
put_page(page);
btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE);
PAGE_SIZE, true);
btrfs_delalloc_release_extents(BTRFS_I(inode),
PAGE_SIZE);
PAGE_SIZE, true);
clear_extent_bits(&BTRFS_I(inode)->io_tree,
page_start, page_end,
@ -3292,7 +3305,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
put_page(page);
index++;
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE,
false);
balance_dirty_pages_ratelimited(inode->i_mapping);
btrfs_throttle(fs_info);
}

Просмотреть файл

@ -371,7 +371,7 @@ static struct full_stripe_lock *insert_full_stripe_lock(
struct full_stripe_lock *entry;
struct full_stripe_lock *ret;
WARN_ON(!mutex_is_locked(&locks_root->lock));
lockdep_assert_held(&locks_root->lock);
p = &locks_root->root.rb_node;
while (*p) {
@ -413,7 +413,7 @@ static struct full_stripe_lock *search_full_stripe_lock(
struct rb_node *node;
struct full_stripe_lock *entry;
WARN_ON(!mutex_is_locked(&locks_root->lock));
lockdep_assert_held(&locks_root->lock);
node = locks_root->root.rb_node;
while (node) {
@ -1111,7 +1111,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
struct scrub_ctx *sctx = sblock_to_check->sctx;
struct btrfs_device *dev;
struct btrfs_fs_info *fs_info;
u64 length;
u64 logical;
unsigned int failed_mirror_index;
unsigned int is_metadata;
@ -1139,7 +1138,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
spin_unlock(&sctx->stat_lock);
return 0;
}
length = sblock_to_check->page_count * PAGE_SIZE;
logical = sblock_to_check->pagev[0]->logical;
BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
@ -1412,8 +1410,17 @@ nodatasum_case:
if (!page_bad->io_error && !sctx->is_dev_replace)
continue;
/* try to find no-io-error page in mirrors */
if (page_bad->io_error) {
if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
/*
* In case of dev replace, if raid56 rebuild process
* didn't work out correct data, then copy the content
* in sblock_bad to make sure target device is identical
* to source device, instead of writing garbage data in
* sblock_for_recheck array to target device.
*/
sblock_other = NULL;
} else if (page_bad->io_error) {
/* try to find no-io-error page in mirrors */
for (mirror_index = 0;
mirror_index < BTRFS_MAX_MIRRORS &&
sblocks_for_recheck[mirror_index].page_count > 0;
@ -1718,6 +1725,45 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
return blk_status_to_errno(bio->bi_status);
}
static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
struct scrub_block *sblock)
{
struct scrub_page *first_page = sblock->pagev[0];
struct bio *bio;
int page_num;
/* All pages in sblock belong to the same stripe on the same device. */
ASSERT(first_page->dev);
if (!first_page->dev->bdev)
goto out;
bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
bio_set_dev(bio, first_page->dev->bdev);
for (page_num = 0; page_num < sblock->page_count; page_num++) {
struct scrub_page *page = sblock->pagev[page_num];
WARN_ON(!page->page);
bio_add_page(bio, page->page, PAGE_SIZE, 0);
}
if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
bio_put(bio);
goto out;
}
bio_put(bio);
scrub_recheck_block_checksum(sblock);
return;
out:
for (page_num = 0; page_num < sblock->page_count; page_num++)
sblock->pagev[page_num]->io_error = 1;
sblock->no_io_error_seen = 0;
}
/*
* this function will check the on disk data for checksum errors, header
* errors and read I/O errors. If any I/O errors happen, the exact pages
@ -1733,6 +1779,10 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
sblock->no_io_error_seen = 1;
/* short cut for raid56 */
if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
return scrub_recheck_block_on_raid56(fs_info, sblock);
for (page_num = 0; page_num < sblock->page_count; page_num++) {
struct bio *bio;
struct scrub_page *page = sblock->pagev[page_num];
@ -1748,19 +1798,12 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
bio_set_dev(bio, page->dev->bdev);
bio_add_page(bio, page->page, PAGE_SIZE, 0);
if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
}
} else {
bio->bi_iter.bi_sector = page->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
bio->bi_iter.bi_sector = page->physical >> 9;
bio->bi_opf = REQ_OP_READ;
if (btrfsic_submit_bio_wait(bio)) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
}
if (btrfsic_submit_bio_wait(bio)) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
}
bio_put(bio);
@ -2728,7 +2771,8 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
}
/* scrub extent tries to collect up to 64 kB for each bio */
static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
u64 logical, u64 len,
u64 physical, struct btrfs_device *dev, u64 flags,
u64 gen, int mirror_num, u64 physical_for_dev_replace)
{
@ -2737,13 +2781,19 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
u32 blocksize;
if (flags & BTRFS_EXTENT_FLAG_DATA) {
blocksize = sctx->fs_info->sectorsize;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
blocksize = map->stripe_len;
else
blocksize = sctx->fs_info->sectorsize;
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed++;
sctx->stat.data_bytes_scrubbed += len;
spin_unlock(&sctx->stat_lock);
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
blocksize = sctx->fs_info->nodesize;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
blocksize = map->stripe_len;
else
blocksize = sctx->fs_info->nodesize;
spin_lock(&sctx->stat_lock);
sctx->stat.tree_extents_scrubbed++;
sctx->stat.tree_bytes_scrubbed += len;
@ -2883,9 +2933,9 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
}
if (flags & BTRFS_EXTENT_FLAG_DATA) {
blocksize = sctx->fs_info->sectorsize;
blocksize = sparity->stripe_len;
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
blocksize = sctx->fs_info->nodesize;
blocksize = sparity->stripe_len;
} else {
blocksize = sctx->fs_info->sectorsize;
WARN_ON(1);
@ -3595,7 +3645,7 @@ again:
if (ret)
goto out;
ret = scrub_extent(sctx, extent_logical, extent_len,
ret = scrub_extent(sctx, map, extent_logical, extent_len,
extent_physical, extent_dev, flags,
generation, extent_mirror_num,
extent_logical - logical + physical);
@ -3885,11 +3935,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
break;
}
btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
btrfs_dev_replace_write_lock(&fs_info->dev_replace);
dev_replace->cursor_right = found_key.offset + length;
dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
btrfs_dev_replace_write_unlock(&fs_info->dev_replace);
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
found_key.offset, cache, is_dev_replace);
@ -3925,10 +3975,10 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
scrub_pause_off(fs_info);
btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
btrfs_dev_replace_write_lock(&fs_info->dev_replace);
dev_replace->cursor_left = dev_replace->cursor_right;
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
btrfs_dev_replace_write_unlock(&fs_info->dev_replace);
if (ro_set)
btrfs_dec_block_group_ro(cache);
@ -4144,16 +4194,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EIO;
}
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_lock(&fs_info->dev_replace);
if (dev->scrub_ctx ||
(!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EINPROGRESS;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
ret = scrub_workers_get(fs_info, is_dev_replace);
if (ret) {
@ -4480,7 +4530,8 @@ static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
* move on to the next inode.
*/
if (em->block_start > logical ||
em->block_start + em->block_len < logical + len) {
em->block_start + em->block_len < logical + len ||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
free_extent_map(em);
ret = 1;
goto out_unlock;
@ -4620,7 +4671,6 @@ static int write_page_nocow(struct scrub_ctx *sctx,
{
struct bio *bio;
struct btrfs_device *dev;
int ret;
dev = sctx->wr_tgtdev;
if (!dev)
@ -4635,17 +4685,15 @@ static int write_page_nocow(struct scrub_ctx *sctx,
bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
bio_set_dev(bio, dev->bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
ret = bio_add_page(bio, page, PAGE_SIZE, 0);
if (ret != PAGE_SIZE) {
leave_with_eio:
/* bio_add_page won't fail on a freshly allocated bio */
bio_add_page(bio, page, PAGE_SIZE, 0);
if (btrfsic_submit_bio_wait(bio)) {
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
}
if (btrfsic_submit_bio_wait(bio))
goto leave_with_eio;
bio_put(bio);
return 0;
}

Просмотреть файл

@ -27,10 +27,10 @@
#include <linux/vmalloc.h>
#include <linux/string.h>
#include <linux/compat.h>
#include <linux/crc32c.h>
#include "send.h"
#include "backref.h"
#include "hash.h"
#include "locking.h"
#include "disk-io.h"
#include "btrfs_inode.h"
@ -112,6 +112,7 @@ struct send_ctx {
u64 cur_inode_mode;
u64 cur_inode_rdev;
u64 cur_inode_last_extent;
u64 cur_inode_next_write_offset;
u64 send_progress;
@ -270,6 +271,7 @@ struct name_cache_entry {
char name[];
};
__cold
static void inconsistent_snapshot_error(struct send_ctx *sctx,
enum btrfs_compare_tree_result result,
const char *what)
@ -611,9 +613,9 @@ static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
}
#define TLV_PUT(sctx, attrtype, attrlen, data) \
#define TLV_PUT(sctx, attrtype, data, attrlen) \
do { \
ret = tlv_put(sctx, attrtype, attrlen, data); \
ret = tlv_put(sctx, attrtype, data, attrlen); \
if (ret < 0) \
goto tlv_put_failure; \
} while (0)
@ -695,7 +697,7 @@ static int send_cmd(struct send_ctx *sctx)
hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
hdr->crc = 0;
crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
hdr->crc = cpu_to_le32(crc);
ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
@ -5029,6 +5031,7 @@ static int send_hole(struct send_ctx *sctx, u64 end)
break;
offset += len;
}
sctx->cur_inode_next_write_offset = offset;
tlv_put_failure:
fs_path_free(p);
return ret;
@ -5264,6 +5267,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
} else {
ret = send_extent_data(sctx, offset, len);
}
sctx->cur_inode_next_write_offset = offset + len;
out:
return ret;
}
@ -5788,6 +5792,7 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
u64 right_gid;
int need_chmod = 0;
int need_chown = 0;
int need_truncate = 1;
int pending_move = 0;
int refs_processed = 0;
@ -5825,9 +5830,13 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
need_chown = 1;
if (!S_ISLNK(sctx->cur_inode_mode))
need_chmod = 1;
if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
need_truncate = 0;
} else {
u64 old_size;
ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
NULL, NULL, &right_mode, &right_uid,
&old_size, NULL, &right_mode, &right_uid,
&right_gid, NULL);
if (ret < 0)
goto out;
@ -5836,6 +5845,10 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
need_chown = 1;
if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
need_chmod = 1;
if ((old_size == sctx->cur_inode_size) ||
(sctx->cur_inode_size > old_size &&
sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
need_truncate = 0;
}
if (S_ISREG(sctx->cur_inode_mode)) {
@ -5854,10 +5867,13 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
goto out;
}
}
ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
sctx->cur_inode_size);
if (ret < 0)
goto out;
if (need_truncate) {
ret = send_truncate(sctx, sctx->cur_ino,
sctx->cur_inode_gen,
sctx->cur_inode_size);
if (ret < 0)
goto out;
}
}
if (need_chown) {
@ -5911,6 +5927,7 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_ino = key->objectid;
sctx->cur_inode_new_gen = 0;
sctx->cur_inode_last_extent = (u64)-1;
sctx->cur_inode_next_write_offset = 0;
/*
* Set send_progress to current inode. This will tell all get_cur_xxx

Просмотреть файл

@ -41,6 +41,7 @@
#include <linux/slab.h>
#include <linux/cleancache.h>
#include <linux/ratelimit.h>
#include <linux/crc32c.h>
#include <linux/btrfs.h>
#include "delayed-inode.h"
#include "ctree.h"
@ -48,7 +49,6 @@
#include "transaction.h"
#include "btrfs_inode.h"
#include "print-tree.h"
#include "hash.h"
#include "props.h"
#include "xattr.h"
#include "volumes.h"
@ -308,21 +308,50 @@ static void btrfs_put_super(struct super_block *sb)
}
enum {
Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
Opt_space_cache, Opt_space_cache_version, Opt_clear_cache,
Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid,
Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
Opt_skip_balance, Opt_check_integrity,
Opt_acl, Opt_noacl,
Opt_clear_cache,
Opt_commit_interval,
Opt_compress,
Opt_compress_force,
Opt_compress_force_type,
Opt_compress_type,
Opt_degraded,
Opt_device,
Opt_fatal_errors,
Opt_flushoncommit, Opt_noflushoncommit,
Opt_inode_cache, Opt_noinode_cache,
Opt_max_inline,
Opt_barrier, Opt_nobarrier,
Opt_datacow, Opt_nodatacow,
Opt_datasum, Opt_nodatasum,
Opt_defrag, Opt_nodefrag,
Opt_discard, Opt_nodiscard,
Opt_nologreplay,
Opt_norecovery,
Opt_ratio,
Opt_rescan_uuid_tree,
Opt_skip_balance,
Opt_space_cache, Opt_no_space_cache,
Opt_space_cache_version,
Opt_ssd, Opt_nossd,
Opt_ssd_spread, Opt_nossd_spread,
Opt_subvol,
Opt_subvolid,
Opt_thread_pool,
Opt_treelog, Opt_notreelog,
Opt_usebackuproot,
Opt_user_subvol_rm_allowed,
/* Deprecated options */
Opt_alloc_start,
Opt_recovery,
Opt_subvolrootid,
/* Debugging options */
Opt_check_integrity,
Opt_check_integrity_including_extent_data,
Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot,
Opt_nologreplay, Opt_norecovery,
Opt_check_integrity_print_mask,
Opt_enospc_debug, Opt_noenospc_debug,
#ifdef CONFIG_BTRFS_DEBUG
Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
#endif
@ -333,58 +362,63 @@ enum {
};
static const match_table_t tokens = {
{Opt_degraded, "degraded"},
{Opt_subvol, "subvol=%s"},
{Opt_subvolid, "subvolid=%s"},
{Opt_device, "device=%s"},
{Opt_nodatasum, "nodatasum"},
{Opt_datasum, "datasum"},
{Opt_nodatacow, "nodatacow"},
{Opt_datacow, "datacow"},
{Opt_nobarrier, "nobarrier"},
{Opt_barrier, "barrier"},
{Opt_max_inline, "max_inline=%s"},
{Opt_alloc_start, "alloc_start=%s"},
{Opt_thread_pool, "thread_pool=%d"},
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_clear_cache, "clear_cache"},
{Opt_commit_interval, "commit=%u"},
{Opt_compress, "compress"},
{Opt_compress_type, "compress=%s"},
{Opt_compress_force, "compress-force"},
{Opt_compress_force_type, "compress-force=%s"},
{Opt_ssd, "ssd"},
{Opt_ssd_spread, "ssd_spread"},
{Opt_nossd, "nossd"},
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_notreelog, "notreelog"},
{Opt_treelog, "treelog"},
{Opt_nologreplay, "nologreplay"},
{Opt_norecovery, "norecovery"},
{Opt_degraded, "degraded"},
{Opt_device, "device=%s"},
{Opt_fatal_errors, "fatal_errors=%s"},
{Opt_flushoncommit, "flushoncommit"},
{Opt_noflushoncommit, "noflushoncommit"},
{Opt_ratio, "metadata_ratio=%d"},
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
{Opt_space_cache, "space_cache"},
{Opt_space_cache_version, "space_cache=%s"},
{Opt_clear_cache, "clear_cache"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
{Opt_enospc_debug, "enospc_debug"},
{Opt_noenospc_debug, "noenospc_debug"},
{Opt_subvolrootid, "subvolrootid=%d"},
{Opt_defrag, "autodefrag"},
{Opt_nodefrag, "noautodefrag"},
{Opt_inode_cache, "inode_cache"},
{Opt_noinode_cache, "noinode_cache"},
{Opt_no_space_cache, "nospace_cache"},
{Opt_recovery, "recovery"}, /* deprecated */
{Opt_usebackuproot, "usebackuproot"},
{Opt_max_inline, "max_inline=%s"},
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
{Opt_datacow, "datacow"},
{Opt_nodatacow, "nodatacow"},
{Opt_datasum, "datasum"},
{Opt_nodatasum, "nodatasum"},
{Opt_defrag, "autodefrag"},
{Opt_nodefrag, "noautodefrag"},
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
{Opt_nologreplay, "nologreplay"},
{Opt_norecovery, "norecovery"},
{Opt_ratio, "metadata_ratio=%u"},
{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
{Opt_skip_balance, "skip_balance"},
{Opt_space_cache, "space_cache"},
{Opt_no_space_cache, "nospace_cache"},
{Opt_space_cache_version, "space_cache=%s"},
{Opt_ssd, "ssd"},
{Opt_nossd, "nossd"},
{Opt_ssd_spread, "ssd_spread"},
{Opt_nossd_spread, "nossd_spread"},
{Opt_subvol, "subvol=%s"},
{Opt_subvolid, "subvolid=%s"},
{Opt_thread_pool, "thread_pool=%u"},
{Opt_treelog, "treelog"},
{Opt_notreelog, "notreelog"},
{Opt_usebackuproot, "usebackuproot"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
/* Deprecated options */
{Opt_alloc_start, "alloc_start=%s"},
{Opt_recovery, "recovery"},
{Opt_subvolrootid, "subvolrootid=%d"},
/* Debugging options */
{Opt_check_integrity, "check_int"},
{Opt_check_integrity_including_extent_data, "check_int_data"},
{Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
{Opt_fatal_errors, "fatal_errors=%s"},
{Opt_commit_interval, "commit=%d"},
{Opt_check_integrity_print_mask, "check_int_print_mask=%u"},
{Opt_enospc_debug, "enospc_debug"},
{Opt_noenospc_debug, "noenospc_debug"},
#ifdef CONFIG_BTRFS_DEBUG
{Opt_fragment_data, "fragment=data"},
{Opt_fragment_metadata, "fragment=metadata"},
@ -579,6 +613,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_opt(info->mount_opt, NOSSD);
btrfs_clear_and_info(info, SSD,
"not using ssd optimizations");
/* Fallthrough */
case Opt_nossd_spread:
btrfs_clear_and_info(info, SSD_SPREAD,
"not using spread ssd allocation scheme");
break;
@ -594,12 +630,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
ret = match_int(&args[0], &intarg);
if (ret) {
goto out;
} else if (intarg > 0) {
info->thread_pool_size = intarg;
} else {
} else if (intarg == 0) {
ret = -EINVAL;
goto out;
}
info->thread_pool_size = intarg;
break;
case Opt_max_inline:
num = match_strdup(&args[0]);
@ -658,16 +693,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_ratio:
ret = match_int(&args[0], &intarg);
if (ret) {
if (ret)
goto out;
} else if (intarg >= 0) {
info->metadata_ratio = intarg;
btrfs_info(info, "metadata ratio %d",
info->metadata_ratio);
} else {
ret = -EINVAL;
goto out;
}
info->metadata_ratio = intarg;
btrfs_info(info, "metadata ratio %u",
info->metadata_ratio);
break;
case Opt_discard:
btrfs_set_and_info(info, DISCARD,
@ -762,17 +792,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_check_integrity_print_mask:
ret = match_int(&args[0], &intarg);
if (ret) {
if (ret)
goto out;
} else if (intarg >= 0) {
info->check_integrity_print_mask = intarg;
btrfs_info(info,
"check_integrity_print_mask 0x%x",
info->check_integrity_print_mask);
} else {
ret = -EINVAL;
goto out;
}
info->check_integrity_print_mask = intarg;
btrfs_info(info, "check_integrity_print_mask 0x%x",
info->check_integrity_print_mask);
break;
#else
case Opt_check_integrity_including_extent_data:
@ -798,24 +822,18 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_commit_interval:
intarg = 0;
ret = match_int(&args[0], &intarg);
if (ret < 0) {
btrfs_err(info, "invalid commit interval");
ret = -EINVAL;
if (ret)
goto out;
}
if (intarg > 0) {
if (intarg > 300) {
btrfs_warn(info,
"excessive commit interval %d",
intarg);
}
info->commit_interval = intarg;
} else {
if (intarg == 0) {
btrfs_info(info,
"using default commit interval %ds",
"using default commit interval %us",
BTRFS_DEFAULT_COMMIT_INTERVAL);
info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
} else if (intarg > 300) {
btrfs_warn(info, "excessive commit interval %d",
intarg);
}
info->commit_interval = intarg;
break;
#ifdef CONFIG_BTRFS_DEBUG
case Opt_fragment_all:
@ -932,8 +950,8 @@ static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
{
substring_t args[MAX_OPT_ARGS];
char *opts, *orig, *p;
char *num = NULL;
int error = 0;
u64 subvolid;
if (!options)
return 0;
@ -963,18 +981,15 @@ static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
}
break;
case Opt_subvolid:
num = match_strdup(&args[0]);
if (num) {
*subvol_objectid = memparse(num, NULL);
kfree(num);
/* we want the original fs_tree */
if (!*subvol_objectid)
*subvol_objectid =
BTRFS_FS_TREE_OBJECTID;
} else {
error = -EINVAL;
error = match_u64(&args[0], &subvolid);
if (error)
goto out;
}
/* we want the original fs_tree */
if (subvolid == 0)
subvolid = BTRFS_FS_TREE_OBJECTID;
*subvol_objectid = subvolid;
break;
case Opt_subvolrootid:
pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
@ -1284,7 +1299,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_printf(seq, ",max_inline=%llu", info->max_inline);
if (info->thread_pool_size != min_t(unsigned long,
num_online_cpus() + 2, 8))
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
if (btrfs_test_opt(info, COMPRESS)) {
compress_type = btrfs_compress_type2str(info->compress_type);
if (btrfs_test_opt(info, FORCE_COMPRESS))
@ -1340,12 +1355,11 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
info->check_integrity_print_mask);
#endif
if (info->metadata_ratio)
seq_printf(seq, ",metadata_ratio=%d",
info->metadata_ratio);
seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
seq_puts(seq, ",fatal_errors=panic");
if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
seq_printf(seq, ",commit=%d", info->commit_interval);
seq_printf(seq, ",commit=%u", info->commit_interval);
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_test_opt(info, FRAGMENT_DATA))
seq_puts(seq, ",fragment=data");
@ -1690,7 +1704,7 @@ out:
}
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
int new_pool_size, int old_pool_size)
u32 new_pool_size, u32 old_pool_size)
{
if (new_pool_size == old_pool_size)
return;
@ -1758,8 +1772,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
unsigned long old_opts = fs_info->mount_opt;
unsigned long old_compress_type = fs_info->compress_type;
u64 old_max_inline = fs_info->max_inline;
int old_thread_pool_size = fs_info->thread_pool_size;
unsigned int old_metadata_ratio = fs_info->metadata_ratio;
u32 old_thread_pool_size = fs_info->thread_pool_size;
u32 old_metadata_ratio = fs_info->metadata_ratio;
int ret;
sync_filesystem(sb);
@ -2290,11 +2304,18 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
struct list_head *head;
struct rcu_string *name;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
/*
* Lightweight locking of the devices. We should not need
* device_list_mutex here as we only read the device data and the list
* is protected by RCU. Even if a device is deleted during the list
* traversals, we'll get valid data, the freeing callback will wait at
* least until until the rcu_read_unlock.
*/
rcu_read_lock();
cur_devices = fs_info->fs_devices;
while (cur_devices) {
head = &cur_devices->devices;
list_for_each_entry(dev, head, dev_list) {
list_for_each_entry_rcu(dev, head, dev_list) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue;
if (!dev->name)
@ -2306,14 +2327,12 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
}
if (first_dev) {
rcu_read_lock();
name = rcu_dereference(first_dev->name);
seq_escape(m, name->str, " \t\n\\");
rcu_read_unlock();
} else {
WARN_ON(1);
}
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
rcu_read_unlock();
return 0;
}
@ -2355,7 +2374,7 @@ static int __init btrfs_interface_init(void)
return misc_register(&btrfs_misc);
}
static void btrfs_interface_exit(void)
static __cold void btrfs_interface_exit(void)
{
misc_deregister(&btrfs_misc);
}
@ -2376,22 +2395,18 @@ static void __init btrfs_print_mod_info(void)
", ref-verify=on"
#endif
"\n",
btrfs_crc32c_impl());
crc32c_impl());
}
static int __init init_btrfs_fs(void)
{
int err;
err = btrfs_hash_init();
if (err)
return err;
btrfs_props_init();
err = btrfs_init_sysfs();
if (err)
goto free_hash;
return err;
btrfs_init_compress();
@ -2472,8 +2487,7 @@ free_cachep:
free_compress:
btrfs_exit_compress();
btrfs_exit_sysfs();
free_hash:
btrfs_hash_exit();
return err;
}
@ -2493,7 +2507,6 @@ static void __exit exit_btrfs_fs(void)
btrfs_exit_sysfs();
btrfs_cleanup_fs_uuids();
btrfs_exit_compress();
btrfs_hash_exit();
}
late_initcall(init_btrfs_fs);

Просмотреть файл

@ -272,7 +272,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
{
struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
struct btrfs_block_group_cache *block_group;
int index = to_raid_kobj(kobj)->raid_type;
int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
u64 val = 0;
down_read(&sinfo->groups_sem);
@ -923,7 +923,7 @@ out1:
return ret;
}
void btrfs_exit_sysfs(void)
void __cold btrfs_exit_sysfs(void)
{
sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
kset_unregister(btrfs_kset);

Просмотреть файл

@ -278,8 +278,7 @@ int btrfs_run_sanity_tests(void)
}
}
ret = btrfs_test_extent_map();
if (ret)
goto out;
out:
btrfs_destroy_test_fs();
return ret;

Просмотреть файл

@ -343,7 +343,7 @@ static void test_case_4(struct extent_map_tree *em_tree)
__test_case_4(em_tree, SZ_4K);
}
int btrfs_test_extent_map()
int btrfs_test_extent_map(void)
{
struct extent_map_tree *em_tree;

Просмотреть файл

@ -63,7 +63,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
btrfs_set_extent_generation(leaf, item, 1);
btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_TREE_BLOCK);
block_info = (struct btrfs_tree_block_info *)(item + 1);
btrfs_set_tree_block_level(leaf, block_info, 1);
btrfs_set_tree_block_level(leaf, block_info, 0);
iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
if (parent > 0) {
btrfs_set_extent_inline_ref_type(leaf, iref,

Просмотреть файл

@ -37,22 +37,16 @@
static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
[TRANS_STATE_RUNNING] = 0U,
[TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE |
__TRANS_START),
[TRANS_STATE_COMMIT_START] = (__TRANS_USERSPACE |
__TRANS_START |
__TRANS_ATTACH),
[TRANS_STATE_COMMIT_DOING] = (__TRANS_USERSPACE |
__TRANS_START |
[TRANS_STATE_BLOCKED] = __TRANS_START,
[TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
[TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
__TRANS_ATTACH |
__TRANS_JOIN),
[TRANS_STATE_UNBLOCKED] = (__TRANS_USERSPACE |
__TRANS_START |
[TRANS_STATE_UNBLOCKED] = (__TRANS_START |
__TRANS_ATTACH |
__TRANS_JOIN |
__TRANS_JOIN_NOLOCK),
[TRANS_STATE_COMPLETED] = (__TRANS_USERSPACE |
__TRANS_START |
[TRANS_STATE_COMPLETED] = (__TRANS_START |
__TRANS_ATTACH |
__TRANS_JOIN |
__TRANS_JOIN_NOLOCK),
@ -126,9 +120,9 @@ static void clear_btree_io_tree(struct extent_io_tree *tree)
spin_unlock(&tree->lock);
}
static noinline void switch_commit_roots(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info)
static noinline void switch_commit_roots(struct btrfs_transaction *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root, *tmp;
down_write(&fs_info->commit_root_sem);
@ -319,7 +313,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
root->last_trans < trans->transid) || force) {
WARN_ON(root == fs_info->extent_root);
WARN_ON(root->commit_root != root->node);
WARN_ON(!force && root->commit_root != root->node);
/*
* see below for IN_TRANS_SETUP usage rules
@ -449,11 +443,7 @@ static int may_wait_transaction(struct btrfs_fs_info *fs_info, int type)
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
return 0;
if (type == TRANS_USERSPACE)
return 1;
if (type == TRANS_START &&
!atomic_read(&fs_info->open_ioctl_trans))
if (type == TRANS_START)
return 1;
return 0;
@ -508,8 +498,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
*/
if (num_items && root != fs_info->chunk_root) {
qgroup_reserved = num_items * fs_info->nodesize;
ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved,
enforce_qgroups);
ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
enforce_qgroups);
if (ret)
return ERR_PTR(ret);
@ -593,7 +583,7 @@ again:
got_it:
btrfs_record_root_in_trans(h, root);
if (!current->journal_info && type != TRANS_USERSPACE)
if (!current->journal_info)
current->journal_info = h;
return h;
@ -606,7 +596,7 @@ alloc_fail:
btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
num_bytes);
reserve_fail:
btrfs_qgroup_free_meta(root, qgroup_reserved);
btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
return ERR_PTR(ret);
}
@ -658,14 +648,6 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
return trans;
}
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
struct btrfs_root *root,
unsigned int num_items)
{
return start_transaction(root, num_items, TRANS_START,
BTRFS_RESERVE_FLUSH_LIMIT, true);
}
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH,
@ -678,12 +660,6 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root
BTRFS_RESERVE_NO_FLUSH, true);
}
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_USERSPACE,
BTRFS_RESERVE_NO_FLUSH, true);
}
/*
* btrfs_attach_transaction() - catch the running transaction
*
@ -789,8 +765,7 @@ out:
void btrfs_throttle(struct btrfs_fs_info *fs_info)
{
if (!atomic_read(&fs_info->open_ioctl_trans))
wait_current_trans(fs_info);
wait_current_trans(fs_info);
}
static int should_end_transaction(struct btrfs_trans_handle *trans)
@ -806,7 +781,6 @@ static int should_end_transaction(struct btrfs_trans_handle *trans)
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *fs_info = trans->fs_info;
int updates;
int err;
@ -818,7 +792,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
updates = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (updates) {
err = btrfs_run_delayed_refs(trans, fs_info, updates * 2);
err = btrfs_run_delayed_refs(trans, updates * 2);
if (err) /* Error code will also eval true */
return err;
}
@ -826,6 +800,27 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
return should_end_transaction(trans);
}
static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
if (!trans->block_rsv) {
ASSERT(!trans->bytes_reserved);
return;
}
if (!trans->bytes_reserved)
return;
ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, trans->bytes_reserved, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv,
trans->bytes_reserved);
trans->bytes_reserved = 0;
}
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
int throttle)
{
@ -843,11 +838,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
return 0;
}
btrfs_trans_release_metadata(trans, info);
btrfs_trans_release_metadata(trans);
trans->block_rsv = NULL;
if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, info);
btrfs_create_pending_block_groups(trans);
trans->delayed_ref_updates = 0;
if (!trans->sync) {
@ -864,16 +859,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
must_run_delayed_refs = 2;
}
btrfs_trans_release_metadata(trans, info);
btrfs_trans_release_metadata(trans);
trans->block_rsv = NULL;
if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, info);
btrfs_create_pending_block_groups(trans);
btrfs_trans_release_chunk_metadata(trans);
if (lock && !atomic_read(&info->open_ioctl_trans) &&
should_end_transaction(trans) &&
if (lock && should_end_transaction(trans) &&
READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
spin_lock(&info->trans_lock);
if (cur_trans->state == TRANS_STATE_RUNNING)
@ -1072,40 +1066,33 @@ int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark)
}
/*
* when btree blocks are allocated, they have some corresponding bits set for
* them in one of two extent_io trees. This is used to make sure all of
* those extents are on disk for transaction or log commit
* When btree blocks are allocated the corresponding extents are marked dirty.
* This function ensures such extents are persisted on disk for transaction or
* log commit.
*
* @trans: transaction whose dirty pages we'd like to write
*/
static int btrfs_write_and_wait_marked_extents(struct btrfs_fs_info *fs_info,
struct extent_io_tree *dirty_pages, int mark)
static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
{
int ret;
int ret2;
struct extent_io_tree *dirty_pages = &trans->transaction->dirty_pages;
struct btrfs_fs_info *fs_info = trans->fs_info;
struct blk_plug plug;
blk_start_plug(&plug);
ret = btrfs_write_marked_extents(fs_info, dirty_pages, mark);
ret = btrfs_write_marked_extents(fs_info, dirty_pages, EXTENT_DIRTY);
blk_finish_plug(&plug);
ret2 = btrfs_wait_extents(fs_info, dirty_pages);
if (ret)
return ret;
if (ret2)
return ret2;
return 0;
}
static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
int ret;
ret = btrfs_write_and_wait_marked_extents(fs_info,
&trans->transaction->dirty_pages,
EXTENT_DIRTY);
clear_btree_io_tree(&trans->transaction->dirty_pages);
return ret;
if (ret)
return ret;
else if (ret2)
return ret2;
else
return 0;
}
/*
@ -1155,9 +1142,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
* failures will cause the file system to go offline. We still need
* to clean up the delayed refs.
*/
static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
struct list_head *io_bgs = &trans->transaction->io_bgs;
struct list_head *next;
@ -1173,7 +1160,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
if (ret)
return ret;
ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
if (ret)
return ret;
@ -1192,7 +1179,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
return ret;
/* run_qgroups might have added some more refs */
ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
if (ret)
return ret;
again:
@ -1209,7 +1196,7 @@ again:
ret = update_cowonly_root(trans, root);
if (ret)
return ret;
ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
if (ret)
return ret;
}
@ -1218,7 +1205,7 @@ again:
ret = btrfs_write_dirty_block_groups(trans, fs_info);
if (ret)
return ret;
ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
if (ret)
return ret;
}
@ -1251,9 +1238,9 @@ void btrfs_add_dead_root(struct btrfs_root *root)
/*
* update all the cowonly tree roots on disk
*/
static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *gang[8];
int i;
int ret;
@ -1297,7 +1284,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
spin_lock(&fs_info->fs_roots_radix_lock);
if (err)
break;
btrfs_qgroup_free_meta_all(root);
btrfs_qgroup_free_meta_all_pertrans(root);
}
}
spin_unlock(&fs_info->fs_roots_radix_lock);
@ -1365,16 +1352,24 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
return 0;
/*
* Ensure dirty @src will be commited. Or, after comming
* commit_fs_roots() and switch_commit_roots(), any dirty but not
* recorded root will never be updated again, causing an outdated root
* item.
*/
record_root_in_trans(trans, src, 1);
/*
* We are going to commit transaction, see btrfs_commit_transaction()
* comment for reason locking tree_log_mutex
*/
mutex_lock(&fs_info->tree_log_mutex);
ret = commit_fs_roots(trans, fs_info);
ret = commit_fs_roots(trans);
if (ret)
goto out;
ret = btrfs_qgroup_account_extents(trans, fs_info);
ret = btrfs_qgroup_account_extents(trans);
if (ret < 0)
goto out;
@ -1397,11 +1392,11 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
* like chunk and root tree, as they won't affect qgroup.
* And we don't write super to avoid half committed status.
*/
ret = commit_cowonly_roots(trans, fs_info);
ret = commit_cowonly_roots(trans);
if (ret)
goto out;
switch_commit_roots(trans->transaction, fs_info);
ret = btrfs_write_and_wait_transaction(trans, fs_info);
switch_commit_roots(trans->transaction);
ret = btrfs_write_and_wait_transaction(trans);
if (ret)
btrfs_handle_fs_error(fs_info, ret,
"Error while writing out transaction for qgroup");
@ -1430,9 +1425,10 @@ out:
* the creation of the pending snapshots, just return 0.
*/
static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_pending_snapshot *pending)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_key key;
struct btrfs_root_item *new_root_item;
struct btrfs_root *tree_root = fs_info->tree_root;
@ -1524,7 +1520,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
* otherwise we corrupt the FS during
* snapshot
*/
ret = btrfs_run_delayed_items(trans, fs_info);
ret = btrfs_run_delayed_items(trans);
if (ret) { /* Transaction aborted */
btrfs_abort_transaction(trans, ret);
goto fail;
@ -1620,7 +1616,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto fail;
}
ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
@ -1674,7 +1670,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
}
ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
@ -1699,8 +1695,7 @@ no_free_objectid:
/*
* create all the snapshots we've scheduled for creation
*/
static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans)
{
struct btrfs_pending_snapshot *pending, *next;
struct list_head *head = &trans->transaction->pending_snapshots;
@ -1708,7 +1703,7 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
list_for_each_entry_safe(pending, next, head, list) {
list_del(&pending->list);
ret = create_pending_snapshot(trans, fs_info, pending);
ret = create_pending_snapshot(trans, pending);
if (ret)
break;
}
@ -1861,10 +1856,9 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
}
static void cleanup_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int err)
static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_transaction *cur_trans = trans->transaction;
DEFINE_WAIT(wait);
@ -1904,7 +1898,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
btrfs_put_transaction(cur_trans);
btrfs_put_transaction(cur_trans);
trace_btrfs_transaction_commit(root);
trace_btrfs_transaction_commit(trans->root);
if (current->journal_info == trans)
current->journal_info = NULL;
@ -1959,13 +1953,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
/* make a pass through all the delayed refs we have so far
* any runnings procs may add more while we are here
*/
ret = btrfs_run_delayed_refs(trans, fs_info, 0);
ret = btrfs_run_delayed_refs(trans, 0);
if (ret) {
btrfs_end_transaction(trans);
return ret;
}
btrfs_trans_release_metadata(trans, fs_info);
btrfs_trans_release_metadata(trans);
trans->block_rsv = NULL;
cur_trans = trans->transaction;
@ -1978,9 +1972,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
smp_wmb();
if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, fs_info);
btrfs_create_pending_block_groups(trans);
ret = btrfs_run_delayed_refs(trans, fs_info, 0);
ret = btrfs_run_delayed_refs(trans, 0);
if (ret) {
btrfs_end_transaction(trans);
return ret;
@ -2008,12 +2002,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
run_it = 1;
mutex_unlock(&fs_info->ro_block_group_mutex);
if (run_it)
ret = btrfs_start_dirty_block_groups(trans, fs_info);
}
if (ret) {
btrfs_end_transaction(trans);
return ret;
if (run_it) {
ret = btrfs_start_dirty_block_groups(trans);
if (ret) {
btrfs_end_transaction(trans);
return ret;
}
}
}
spin_lock(&fs_info->trans_lock);
@ -2061,7 +2056,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (ret)
goto cleanup_transaction;
ret = btrfs_run_delayed_items(trans, fs_info);
ret = btrfs_run_delayed_items(trans);
if (ret)
goto cleanup_transaction;
@ -2069,7 +2064,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
extwriter_counter_read(cur_trans) == 0);
/* some pending stuffs might be added after the previous flush. */
ret = btrfs_run_delayed_items(trans, fs_info);
ret = btrfs_run_delayed_items(trans);
if (ret)
goto cleanup_transaction;
@ -2106,7 +2101,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* deal with them in create_pending_snapshot(), which is the
* core function of the snapshot creation.
*/
ret = create_pending_snapshots(trans, fs_info);
ret = create_pending_snapshots(trans);
if (ret) {
mutex_unlock(&fs_info->reloc_mutex);
goto scrub_continue;
@ -2122,13 +2117,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* because all the tree which are snapshoted will be forced to COW
* the nodes and leaves.
*/
ret = btrfs_run_delayed_items(trans, fs_info);
ret = btrfs_run_delayed_items(trans);
if (ret) {
mutex_unlock(&fs_info->reloc_mutex);
goto scrub_continue;
}
ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
if (ret) {
mutex_unlock(&fs_info->reloc_mutex);
goto scrub_continue;
@ -2157,7 +2152,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
*/
mutex_lock(&fs_info->tree_log_mutex);
ret = commit_fs_roots(trans, fs_info);
ret = commit_fs_roots(trans);
if (ret) {
mutex_unlock(&fs_info->tree_log_mutex);
mutex_unlock(&fs_info->reloc_mutex);
@ -2179,7 +2174,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* commit_fs_roots() can call btrfs_save_ino_cache(), which generates
* new delayed refs. Must handle them or qgroup can be wrong.
*/
ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
if (ret) {
mutex_unlock(&fs_info->tree_log_mutex);
mutex_unlock(&fs_info->reloc_mutex);
@ -2190,14 +2185,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* Since fs roots are all committed, we can get a quite accurate
* new_roots. So let's do quota accounting.
*/
ret = btrfs_qgroup_account_extents(trans, fs_info);
ret = btrfs_qgroup_account_extents(trans);
if (ret < 0) {
mutex_unlock(&fs_info->tree_log_mutex);
mutex_unlock(&fs_info->reloc_mutex);
goto scrub_continue;
}
ret = commit_cowonly_roots(trans, fs_info);
ret = commit_cowonly_roots(trans);
if (ret) {
mutex_unlock(&fs_info->tree_log_mutex);
mutex_unlock(&fs_info->reloc_mutex);
@ -2229,7 +2224,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
list_add_tail(&fs_info->chunk_root->dirty_list,
&cur_trans->switch_commits);
switch_commit_roots(cur_trans, fs_info);
switch_commit_roots(cur_trans);
ASSERT(list_empty(&cur_trans->dirty_bgs));
ASSERT(list_empty(&cur_trans->io_bgs));
@ -2241,7 +2236,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
sizeof(*fs_info->super_copy));
btrfs_update_commit_device_size(fs_info);
btrfs_update_commit_device_bytes_used(fs_info, cur_trans);
btrfs_update_commit_device_bytes_used(cur_trans);
clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
@ -2256,7 +2251,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
wake_up(&fs_info->transaction_wait);
ret = btrfs_write_and_wait_transaction(trans, fs_info);
ret = btrfs_write_and_wait_transaction(trans);
if (ret) {
btrfs_handle_fs_error(fs_info, ret,
"Error while writing out transaction");
@ -2273,7 +2268,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (ret)
goto scrub_continue;
btrfs_finish_extent_commit(trans, fs_info);
btrfs_finish_extent_commit(trans);
if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
btrfs_clear_space_info_full(fs_info);
@ -2319,13 +2314,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
scrub_continue:
btrfs_scrub_continue(fs_info);
cleanup_transaction:
btrfs_trans_release_metadata(trans, fs_info);
btrfs_trans_release_metadata(trans);
btrfs_trans_release_chunk_metadata(trans);
trans->block_rsv = NULL;
btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
if (current->journal_info == trans)
current->journal_info = NULL;
cleanup_transaction(trans, trans->root, ret);
cleanup_transaction(trans, ret);
return ret;
}

Просмотреть файл

@ -69,6 +69,22 @@ struct btrfs_transaction {
struct list_head pending_chunks;
struct list_head switch_commits;
struct list_head dirty_bgs;
/*
* There is no explicit lock which protects io_bgs, rather its
* consistency is implied by the fact that all the sites which modify
* it do so under some form of transaction critical section, namely:
*
* - btrfs_start_dirty_block_groups - This function can only ever be
* run by one of the transaction committers. Refer to
* BTRFS_TRANS_DIRTY_BG_RUN usage in btrfs_commit_transaction
*
* - btrfs_write_dirty_blockgroups - this is called by
* commit_cowonly_roots from transaction critical section
* (TRANS_STATE_COMMIT_DOING)
*
* - btrfs_cleanup_dirty_bgs - called on transaction abort
*/
struct list_head io_bgs;
struct list_head dropped_roots;
@ -89,21 +105,18 @@ struct btrfs_transaction {
#define __TRANS_FREEZABLE (1U << 0)
#define __TRANS_USERSPACE (1U << 8)
#define __TRANS_START (1U << 9)
#define __TRANS_ATTACH (1U << 10)
#define __TRANS_JOIN (1U << 11)
#define __TRANS_JOIN_NOLOCK (1U << 12)
#define __TRANS_DUMMY (1U << 13)
#define TRANS_USERSPACE (__TRANS_USERSPACE | __TRANS_FREEZABLE)
#define TRANS_START (__TRANS_START | __TRANS_FREEZABLE)
#define TRANS_ATTACH (__TRANS_ATTACH)
#define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE)
#define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK)
#define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \
__TRANS_ATTACH)
#define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH)
#define BTRFS_SEND_TRANS_STUB ((void *)1)
@ -186,15 +199,11 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
struct btrfs_root *root,
unsigned int num_items,
int min_factor);
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
struct btrfs_root *root,
unsigned int num_items);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
void btrfs_add_dead_root(struct btrfs_root *root);

Просмотреть файл

@ -30,7 +30,6 @@
#include "tree-checker.h"
#include "disk-io.h"
#include "compression.h"
#include "hash.h"
/*
* Error message should follow the following format:
@ -53,7 +52,8 @@
* Allows callers to customize the output.
*/
__printf(4, 5)
static void generic_err(const struct btrfs_root *root,
__cold
static void generic_err(const struct btrfs_fs_info *fs_info,
const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
@ -65,10 +65,10 @@ static void generic_err(const struct btrfs_root *root,
vaf.fmt = fmt;
vaf.va = &args;
btrfs_crit(root->fs_info,
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node",
root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
va_end(args);
}
@ -77,7 +77,8 @@ static void generic_err(const struct btrfs_root *root,
* offset has its own meaning.
*/
__printf(4, 5)
static void file_extent_err(const struct btrfs_root *root,
__cold
static void file_extent_err(const struct btrfs_fs_info *fs_info,
const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
@ -91,10 +92,11 @@ static void file_extent_err(const struct btrfs_root *root,
vaf.fmt = fmt;
vaf.va = &args;
btrfs_crit(root->fs_info,
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
btrfs_header_bytenr(eb), slot, key.objectid, key.offset, &vaf);
btrfs_header_level(eb) == 0 ? "leaf" : "node",
btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
key.objectid, key.offset, &vaf);
va_end(args);
}
@ -102,26 +104,26 @@ static void file_extent_err(const struct btrfs_root *root,
* Return 0 if the btrfs_file_extent_##name is aligned to @alignment
* Else return 1
*/
#define CHECK_FE_ALIGNED(root, leaf, slot, fi, name, alignment) \
#define CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, name, alignment) \
({ \
if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
file_extent_err((root), (leaf), (slot), \
file_extent_err((fs_info), (leaf), (slot), \
"invalid %s for file extent, have %llu, should be aligned to %u", \
(#name), btrfs_file_extent_##name((leaf), (fi)), \
(alignment)); \
(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \
})
static int check_extent_data_item(struct btrfs_root *root,
static int check_extent_data_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct btrfs_file_extent_item *fi;
u32 sectorsize = root->fs_info->sectorsize;
u32 sectorsize = fs_info->sectorsize;
u32 item_size = btrfs_item_size_nr(leaf, slot);
if (!IS_ALIGNED(key->offset, sectorsize)) {
file_extent_err(root, leaf, slot,
file_extent_err(fs_info, leaf, slot,
"unaligned file_offset for file extent, have %llu should be aligned to %u",
key->offset, sectorsize);
return -EUCLEAN;
@ -130,7 +132,7 @@ static int check_extent_data_item(struct btrfs_root *root,
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
file_extent_err(root, leaf, slot,
file_extent_err(fs_info, leaf, slot,
"invalid type for file extent, have %u expect range [0, %u]",
btrfs_file_extent_type(leaf, fi),
BTRFS_FILE_EXTENT_TYPES);
@ -142,14 +144,14 @@ static int check_extent_data_item(struct btrfs_root *root,
* and must be caught in open_ctree().
*/
if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
file_extent_err(root, leaf, slot,
file_extent_err(fs_info, leaf, slot,
"invalid compression for file extent, have %u expect range [0, %u]",
btrfs_file_extent_compression(leaf, fi),
BTRFS_COMPRESS_TYPES);
return -EUCLEAN;
}
if (btrfs_file_extent_encryption(leaf, fi)) {
file_extent_err(root, leaf, slot,
file_extent_err(fs_info, leaf, slot,
"invalid encryption for file extent, have %u expect 0",
btrfs_file_extent_encryption(leaf, fi));
return -EUCLEAN;
@ -157,7 +159,7 @@ static int check_extent_data_item(struct btrfs_root *root,
if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
/* Inline extent must have 0 as key offset */
if (key->offset) {
file_extent_err(root, leaf, slot,
file_extent_err(fs_info, leaf, slot,
"invalid file_offset for inline file extent, have %llu expect 0",
key->offset);
return -EUCLEAN;
@ -171,7 +173,7 @@ static int check_extent_data_item(struct btrfs_root *root,
/* Uncompressed inline extent size must match item size */
if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
btrfs_file_extent_ram_bytes(leaf, fi)) {
file_extent_err(root, leaf, slot,
file_extent_err(fs_info, leaf, slot,
"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
btrfs_file_extent_ram_bytes(leaf, fi));
@ -182,40 +184,41 @@ static int check_extent_data_item(struct btrfs_root *root,
/* Regular or preallocated extent has fixed item size */
if (item_size != sizeof(*fi)) {
file_extent_err(root, leaf, slot,
file_extent_err(fs_info, leaf, slot,
"invalid item size for reg/prealloc file extent, have %u expect %zu",
item_size, sizeof(*fi));
return -EUCLEAN;
}
if (CHECK_FE_ALIGNED(root, leaf, slot, fi, ram_bytes, sectorsize) ||
CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_bytenr, sectorsize) ||
CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_num_bytes, sectorsize) ||
CHECK_FE_ALIGNED(root, leaf, slot, fi, offset, sectorsize) ||
CHECK_FE_ALIGNED(root, leaf, slot, fi, num_bytes, sectorsize))
if (CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, ram_bytes, sectorsize) ||
CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_bytenr, sectorsize) ||
CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_num_bytes, sectorsize) ||
CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, offset, sectorsize) ||
CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, num_bytes, sectorsize))
return -EUCLEAN;
return 0;
}
static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
static int check_csum_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf, struct btrfs_key *key,
int slot)
{
u32 sectorsize = root->fs_info->sectorsize;
u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
u32 sectorsize = fs_info->sectorsize;
u32 csumsize = btrfs_super_csum_size(fs_info->super_copy);
if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
generic_err(root, leaf, slot,
generic_err(fs_info, leaf, slot,
"invalid key objectid for csum item, have %llu expect %llu",
key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
return -EUCLEAN;
}
if (!IS_ALIGNED(key->offset, sectorsize)) {
generic_err(root, leaf, slot,
generic_err(fs_info, leaf, slot,
"unaligned key offset for csum item, have %llu should be aligned to %u",
key->offset, sectorsize);
return -EUCLEAN;
}
if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
generic_err(root, leaf, slot,
generic_err(fs_info, leaf, slot,
"unaligned item size for csum item, have %u should be aligned to %u",
btrfs_item_size_nr(leaf, slot), csumsize);
return -EUCLEAN;
@ -228,7 +231,8 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
* which represents inode number
*/
__printf(4, 5)
static void dir_item_err(const struct btrfs_root *root,
__cold
static void dir_item_err(const struct btrfs_fs_info *fs_info,
const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
@ -242,14 +246,15 @@ static void dir_item_err(const struct btrfs_root *root,
vaf.fmt = fmt;
vaf.va = &args;
btrfs_crit(root->fs_info,
btrfs_crit(fs_info,
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
btrfs_header_level(eb) == 0 ? "leaf" : "node",
btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
key.objectid, &vaf);
va_end(args);
}
static int check_dir_item(struct btrfs_root *root,
static int check_dir_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
@ -268,7 +273,7 @@ static int check_dir_item(struct btrfs_root *root,
/* header itself should not cross item boundary */
if (cur + sizeof(*di) > item_size) {
dir_item_err(root, leaf, slot,
dir_item_err(fs_info, leaf, slot,
"dir item header crosses item boundary, have %zu boundary %u",
cur + sizeof(*di), item_size);
return -EUCLEAN;
@ -277,7 +282,7 @@ static int check_dir_item(struct btrfs_root *root,
/* dir type check */
dir_type = btrfs_dir_type(leaf, di);
if (dir_type >= BTRFS_FT_MAX) {
dir_item_err(root, leaf, slot,
dir_item_err(fs_info, leaf, slot,
"invalid dir item type, have %u expect [0, %u)",
dir_type, BTRFS_FT_MAX);
return -EUCLEAN;
@ -285,14 +290,14 @@ static int check_dir_item(struct btrfs_root *root,
if (key->type == BTRFS_XATTR_ITEM_KEY &&
dir_type != BTRFS_FT_XATTR) {
dir_item_err(root, leaf, slot,
dir_item_err(fs_info, leaf, slot,
"invalid dir item type for XATTR key, have %u expect %u",
dir_type, BTRFS_FT_XATTR);
return -EUCLEAN;
}
if (dir_type == BTRFS_FT_XATTR &&
key->type != BTRFS_XATTR_ITEM_KEY) {
dir_item_err(root, leaf, slot,
dir_item_err(fs_info, leaf, slot,
"xattr dir type found for non-XATTR key");
return -EUCLEAN;
}
@ -305,21 +310,21 @@ static int check_dir_item(struct btrfs_root *root,
name_len = btrfs_dir_name_len(leaf, di);
data_len = btrfs_dir_data_len(leaf, di);
if (name_len > max_name_len) {
dir_item_err(root, leaf, slot,
dir_item_err(fs_info, leaf, slot,
"dir item name len too long, have %u max %u",
name_len, max_name_len);
return -EUCLEAN;
}
if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
dir_item_err(root, leaf, slot,
if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) {
dir_item_err(fs_info, leaf, slot,
"dir item name and data len too long, have %u max %u",
name_len + data_len,
BTRFS_MAX_XATTR_SIZE(root->fs_info));
BTRFS_MAX_XATTR_SIZE(fs_info));
return -EUCLEAN;
}
if (data_len && dir_type != BTRFS_FT_XATTR) {
dir_item_err(root, leaf, slot,
dir_item_err(fs_info, leaf, slot,
"dir item with invalid data len, have %u expect 0",
data_len);
return -EUCLEAN;
@ -329,7 +334,7 @@ static int check_dir_item(struct btrfs_root *root,
/* header and name/data should not cross item boundary */
if (cur + total_size > item_size) {
dir_item_err(root, leaf, slot,
dir_item_err(fs_info, leaf, slot,
"dir item data crosses item boundary, have %u boundary %u",
cur + total_size, item_size);
return -EUCLEAN;
@ -347,7 +352,7 @@ static int check_dir_item(struct btrfs_root *root,
(unsigned long)(di + 1), name_len);
name_hash = btrfs_name_hash(namebuf, name_len);
if (key->offset != name_hash) {
dir_item_err(root, leaf, slot,
dir_item_err(fs_info, leaf, slot,
"name hash mismatch with key, have 0x%016x expect 0x%016llx",
name_hash, key->offset);
return -EUCLEAN;
@ -362,7 +367,7 @@ static int check_dir_item(struct btrfs_root *root,
/*
* Common point to switch the item-specific validation.
*/
static int check_leaf_item(struct btrfs_root *root,
static int check_leaf_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
@ -370,24 +375,23 @@ static int check_leaf_item(struct btrfs_root *root,
switch (key->type) {
case BTRFS_EXTENT_DATA_KEY:
ret = check_extent_data_item(root, leaf, key, slot);
ret = check_extent_data_item(fs_info, leaf, key, slot);
break;
case BTRFS_EXTENT_CSUM_KEY:
ret = check_csum_item(root, leaf, key, slot);
ret = check_csum_item(fs_info, leaf, key, slot);
break;
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
case BTRFS_XATTR_ITEM_KEY:
ret = check_dir_item(root, leaf, key, slot);
ret = check_dir_item(fs_info, leaf, key, slot);
break;
}
return ret;
}
static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
bool check_item_data)
{
struct btrfs_fs_info *fs_info = root->fs_info;
/* No valid key type is 0, so all key should be larger than this key */
struct btrfs_key prev_key = {0, 0, 0};
struct btrfs_key key;
@ -420,7 +424,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
eb = btrfs_root_node(check_root);
/* if leaf is the root, then it's fine */
if (leaf != eb) {
generic_err(check_root, leaf, 0,
generic_err(fs_info, leaf, 0,
"invalid nritems, have %u should not be 0 for non-root leaf",
nritems);
free_extent_buffer(eb);
@ -453,7 +457,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
/* Make sure the keys are in the right order */
if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
generic_err(root, leaf, slot,
generic_err(fs_info, leaf, slot,
"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
prev_key.objectid, prev_key.type,
prev_key.offset, key.objectid, key.type,
@ -472,7 +476,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
item_end_expected = btrfs_item_offset_nr(leaf,
slot - 1);
if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
generic_err(root, leaf, slot,
generic_err(fs_info, leaf, slot,
"unexpected item end, have %u expect %u",
btrfs_item_end_nr(leaf, slot),
item_end_expected);
@ -486,7 +490,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
*/
if (btrfs_item_end_nr(leaf, slot) >
BTRFS_LEAF_DATA_SIZE(fs_info)) {
generic_err(root, leaf, slot,
generic_err(fs_info, leaf, slot,
"slot end outside of leaf, have %u expect range [0, %u]",
btrfs_item_end_nr(leaf, slot),
BTRFS_LEAF_DATA_SIZE(fs_info));
@ -496,7 +500,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
/* Also check if the item pointer overlaps with btrfs item. */
if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
btrfs_item_ptr_offset(leaf, slot)) {
generic_err(root, leaf, slot,
generic_err(fs_info, leaf, slot,
"slot overlaps with its data, item end %lu data start %lu",
btrfs_item_nr_offset(slot) +
sizeof(struct btrfs_item),
@ -509,7 +513,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
* Check if the item size and content meet other
* criteria
*/
ret = check_leaf_item(root, leaf, &key, slot);
ret = check_leaf_item(fs_info, leaf, &key, slot);
if (ret < 0)
return ret;
}
@ -522,18 +526,19 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
return 0;
}
int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf)
{
return check_leaf(root, leaf, true);
return check_leaf(fs_info, leaf, true);
}
int btrfs_check_leaf_relaxed(struct btrfs_root *root,
int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf)
{
return check_leaf(root, leaf, false);
return check_leaf(fs_info, leaf, false);
}
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
{
unsigned long nr = btrfs_header_nritems(node);
struct btrfs_key key, next_key;
@ -541,12 +546,12 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
u64 bytenr;
int ret = 0;
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
btrfs_crit(root->fs_info,
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info)) {
btrfs_crit(fs_info,
"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
root->objectid, node->start,
btrfs_header_owner(node), node->start,
nr == 0 ? "small" : "large", nr,
BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
BTRFS_NODEPTRS_PER_BLOCK(fs_info));
return -EUCLEAN;
}
@ -556,21 +561,21 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
if (!bytenr) {
generic_err(root, node, slot,
generic_err(fs_info, node, slot,
"invalid NULL node pointer");
ret = -EUCLEAN;
goto out;
}
if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
generic_err(root, node, slot,
if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) {
generic_err(fs_info, node, slot,
"unaligned pointer, have %llu should be aligned to %u",
bytenr, root->fs_info->sectorsize);
bytenr, fs_info->sectorsize);
ret = -EUCLEAN;
goto out;
}
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
generic_err(root, node, slot,
generic_err(fs_info, node, slot,
"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
key.objectid, key.type, key.offset,
next_key.objectid, next_key.type,

Просмотреть файл

@ -25,14 +25,15 @@
* Will check not only the item pointers, but also every possible member
* in item data.
*/
int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf);
/*
* Less strict leaf checker.
* Will only check item pointers, not reading item data.
*/
int btrfs_check_leaf_relaxed(struct btrfs_root *root,
int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf);
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node);
#endif

Просмотреть файл

@ -39,7 +39,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
int level;
int next_key_ret = 0;
u64 last_ret = 0;
u64 min_trans = 0;
if (root->fs_info->extent_root == root) {
/*
@ -81,7 +80,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
path->keep_locks = 1;
ret = btrfs_search_forward(root, &key, path, min_trans);
ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
if (ret < 0)
goto out;
if (ret > 0) {
@ -130,7 +129,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
*/
path->slots[1] = btrfs_header_nritems(path->nodes[1]);
next_key_ret = btrfs_find_next_key(root, path, &key, 1,
min_trans);
BTRFS_OLDEST_GENERATION);
if (next_key_ret == 0) {
memcpy(&root->defrag_progress, &key, sizeof(key));
ret = -EAGAIN;

Просмотреть файл

@ -21,12 +21,12 @@
#include <linux/blkdev.h>
#include <linux/list_sort.h>
#include <linux/iversion.h>
#include "ctree.h"
#include "tree-log.h"
#include "disk-io.h"
#include "locking.h"
#include "print-tree.h"
#include "backref.h"
#include "hash.h"
#include "compression.h"
#include "qgroup.h"
#include "inode-map.h"
@ -286,7 +286,7 @@ struct walk_control {
* inside it
*/
int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
struct walk_control *wc, u64 gen);
struct walk_control *wc, u64 gen, int level);
};
/*
@ -294,7 +294,7 @@ struct walk_control {
*/
static int process_one_buffer(struct btrfs_root *log,
struct extent_buffer *eb,
struct walk_control *wc, u64 gen)
struct walk_control *wc, u64 gen, int level)
{
struct btrfs_fs_info *fs_info = log->fs_info;
int ret = 0;
@ -304,7 +304,7 @@ static int process_one_buffer(struct btrfs_root *log,
* pin down any logged extents, so we have to read the block.
*/
if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
ret = btrfs_read_buffer(eb, gen);
ret = btrfs_read_buffer(eb, gen, level, NULL);
if (ret)
return ret;
}
@ -853,7 +853,6 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir,
struct btrfs_dir_item *di)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct inode *inode;
char *name;
int name_len;
@ -887,7 +886,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
if (ret)
goto out;
else
ret = btrfs_run_delayed_items(trans, fs_info);
ret = btrfs_run_delayed_items(trans);
out:
kfree(name);
iput(inode);
@ -1007,7 +1006,6 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
u64 ref_index, char *name, int namelen,
int *search_done)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
char *victim_name;
int victim_name_len;
@ -1065,7 +1063,7 @@ again:
kfree(victim_name);
if (ret)
return ret;
ret = btrfs_run_delayed_items(trans, fs_info);
ret = btrfs_run_delayed_items(trans);
if (ret)
return ret;
*search_done = 1;
@ -1136,8 +1134,7 @@ again:
victim_name_len);
if (!ret)
ret = btrfs_run_delayed_items(
trans,
fs_info);
trans);
}
iput(victim_parent);
kfree(victim_name);
@ -2098,7 +2095,6 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
struct inode *dir,
struct btrfs_key *dir_key)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
struct extent_buffer *eb;
int slot;
@ -2162,7 +2158,7 @@ again:
ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
BTRFS_I(inode), name, name_len);
if (!ret)
ret = btrfs_run_delayed_items(trans, fs_info);
ret = btrfs_run_delayed_items(trans);
kfree(name);
iput(inode);
if (ret)
@ -2410,17 +2406,16 @@ out:
* back refs).
*/
static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct walk_control *wc, u64 gen)
struct walk_control *wc, u64 gen, int level)
{
int nritems;
struct btrfs_path *path;
struct btrfs_root *root = wc->replay_dest;
struct btrfs_key key;
int level;
int i;
int ret;
ret = btrfs_read_buffer(eb, gen);
ret = btrfs_read_buffer(eb, gen, level, NULL);
if (ret)
return ret;
@ -2537,6 +2532,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(*level >= BTRFS_MAX_LEVEL);
while (*level > 0) {
struct btrfs_key first_key;
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
@ -2549,6 +2546,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
blocksize = fs_info->nodesize;
parent = path->nodes[*level];
@ -2559,7 +2557,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
return PTR_ERR(next);
if (*level == 1) {
ret = wc->process_func(root, next, wc, ptr_gen);
ret = wc->process_func(root, next, wc, ptr_gen,
*level - 1);
if (ret) {
free_extent_buffer(next);
return ret;
@ -2567,7 +2566,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
path->slots[*level]++;
if (wc->free) {
ret = btrfs_read_buffer(next, ptr_gen);
ret = btrfs_read_buffer(next, ptr_gen,
*level - 1, &first_key);
if (ret) {
free_extent_buffer(next);
return ret;
@ -2597,7 +2597,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
free_extent_buffer(next);
continue;
}
ret = btrfs_read_buffer(next, ptr_gen);
ret = btrfs_read_buffer(next, ptr_gen, *level - 1, &first_key);
if (ret) {
free_extent_buffer(next);
return ret;
@ -2647,7 +2647,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
root_owner = btrfs_header_owner(parent);
ret = wc->process_func(root, path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]));
btrfs_header_generation(path->nodes[*level]),
*level);
if (ret)
return ret;
@ -2729,7 +2730,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
/* was the root node processed? if not, catch it here */
if (path->nodes[orig_level]) {
ret = wc->process_func(log, path->nodes[orig_level], wc,
btrfs_header_generation(path->nodes[orig_level]));
btrfs_header_generation(path->nodes[orig_level]),
orig_level);
if (ret)
goto out;
if (wc->free) {
@ -3972,6 +3974,7 @@ fill_holes:
ASSERT(ret == 0);
src = src_path->nodes[0];
i = 0;
need_find_last_extent = true;
}
btrfs_item_key_to_cpu(src, &key, i);
@ -4006,6 +4009,36 @@ fill_holes:
break;
*last_extent = extent_end;
}
/*
* Check if there is a hole between the last extent found in our leaf
* and the first extent in the next leaf. If there is one, we need to
* log an explicit hole so that at replay time we can punch the hole.
*/
if (ret == 0 &&
key.objectid == btrfs_ino(inode) &&
key.type == BTRFS_EXTENT_DATA_KEY &&
i == btrfs_header_nritems(src_path->nodes[0])) {
ret = btrfs_next_leaf(inode->root, src_path);
need_find_last_extent = true;
if (ret > 0) {
ret = 0;
} else if (ret == 0) {
btrfs_item_key_to_cpu(src_path->nodes[0], &key,
src_path->slots[0]);
if (key.objectid == btrfs_ino(inode) &&
key.type == BTRFS_EXTENT_DATA_KEY &&
*last_extent < key.offset) {
const u64 len = key.offset - *last_extent;
ret = btrfs_insert_file_extent(trans, log,
btrfs_ino(inode),
*last_extent, 0,
0, len, 0, len,
0, 0, 0);
}
}
}
/*
* Need to let the callers know we dropped the path so they should
* re-search.
@ -5517,7 +5550,6 @@ out:
* the last committed transaction
*/
static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode,
struct dentry *parent,
const loff_t start,
@ -5525,6 +5557,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
int inode_only,
struct btrfs_log_ctx *ctx)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct super_block *sb;
struct dentry *old_parent = NULL;
@ -5550,7 +5583,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_no_trans;
}
if (root != inode->root || btrfs_root_refs(&root->root_item) == 0) {
if (btrfs_root_refs(&root->root_item) == 0) {
ret = 1;
goto end_no_trans;
}
@ -5682,7 +5715,7 @@ end_no_trans:
* data on disk.
*/
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry,
struct dentry *dentry,
const loff_t start,
const loff_t end,
struct btrfs_log_ctx *ctx)
@ -5690,8 +5723,8 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct dentry *parent = dget_parent(dentry);
int ret;
ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)),
parent, start, end, LOG_INODE_ALL, ctx);
ret = btrfs_log_inode_parent(trans, BTRFS_I(d_inode(dentry)), parent,
start, end, LOG_INODE_ALL, ctx);
dput(parent);
return ret;
@ -5953,7 +5986,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct dentry *parent)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
struct btrfs_root *root = inode->root;
/*
* this will force the logging code to walk the dentry chain
@ -5970,7 +6002,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
(!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
return 0;
return btrfs_log_inode_parent(trans, root, inode, parent, 0,
LLONG_MAX, LOG_INODE_EXISTS, NULL);
return btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
LOG_INODE_EXISTS, NULL);
}

Просмотреть файл

@ -65,7 +65,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_recover_log_trees(struct btrfs_root *tree_root);
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry,
struct dentry *dentry,
const loff_t start,
const loff_t end,
struct btrfs_log_ctx *ctx);

Просмотреть файл

@ -282,7 +282,7 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
key.offset = 0;
again_search_slot:
ret = btrfs_search_forward(root, &key, path, 0);
ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
if (ret) {
if (ret > 0)
ret = 0;

Просмотреть файл

@ -27,6 +27,7 @@
#include <linux/raid/pq.h>
#include <linux/semaphore.h>
#include <linux/uuid.h>
#include <linux/list_sort.h>
#include <asm/div64.h>
#include "ctree.h"
#include "extent_map.h"
@ -278,7 +279,7 @@ static void btrfs_kobject_uevent(struct block_device *bdev,
&disk_to_dev(bdev->bd_disk)->kobj);
}
void btrfs_cleanup_fs_uuids(void)
void __exit btrfs_cleanup_fs_uuids(void)
{
struct btrfs_fs_devices *fs_devices;
@ -708,7 +709,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
device->devid != BTRFS_DEV_REPLACE_DEVID) {
fs_devices->rw_devices++;
list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
}
brelse(bh);
@ -895,7 +896,11 @@ error:
return ERR_PTR(-ENOMEM);
}
void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step)
/*
* After we have read the system tree and know devids belonging to
* this filesystem, remove the device which does not belong there.
*/
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
{
struct btrfs_device *device, *next;
struct btrfs_device *latest_dev = NULL;
@ -1103,6 +1108,20 @@ out:
return ret;
}
static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct btrfs_device *dev1, *dev2;
dev1 = list_entry(a, struct btrfs_device, dev_list);
dev2 = list_entry(b, struct btrfs_device, dev_list);
if (dev1->devid < dev2->devid)
return -1;
else if (dev1->devid > dev2->devid)
return 1;
return 0;
}
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder)
{
@ -1113,6 +1132,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fs_devices->opened++;
ret = 0;
} else {
list_sort(NULL, &fs_devices->devices, devid_cmp);
ret = __btrfs_open_devices(fs_devices, flags, holder);
}
mutex_unlock(&uuid_mutex);
@ -1916,12 +1936,12 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
mutex_lock(&uuid_mutex);
num_devices = fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_lock(&fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
num_devices--;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
if (ret)
@ -2047,7 +2067,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
{
struct btrfs_fs_devices *fs_devices;
WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
lockdep_assert_held(&fs_info->fs_devices->device_list_mutex);
/*
* in case of fs with no seed, srcdev->fs_devices will point
@ -2237,7 +2257,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
struct btrfs_device *device;
u64 super_flags;
BUG_ON(!mutex_is_locked(&uuid_mutex));
lockdep_assert_held(&uuid_mutex);
if (!fs_devices->seeding)
return -EINVAL;
@ -2642,7 +2662,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
device->total_bytes = btrfs_device_get_total_bytes(srcdev);
device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
device->bytes_used = btrfs_device_get_bytes_used(srcdev);
ASSERT(list_empty(&srcdev->resized_list));
device->commit_total_bytes = srcdev->commit_total_bytes;
device->commit_bytes_used = device->bytes_used;
device->fs_info = fs_info;
@ -2666,19 +2685,6 @@ error:
return ret;
}
void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev)
{
u32 sectorsize = fs_info->sectorsize;
WARN_ON(fs_info->fs_devices->rw_devices == 0);
tgtdev->io_width = sectorsize;
tgtdev->io_align = sectorsize;
tgtdev->sector_size = sectorsize;
tgtdev->fs_info = fs_info;
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &tgtdev->dev_state);
}
static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device)
{
@ -2984,7 +2990,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
* we release the path used to search the chunk/dev tree and before
* the current task acquires this mutex and calls us.
*/
ASSERT(mutex_is_locked(&fs_info->delete_unused_bgs_mutex));
lockdep_assert_held(&fs_info->delete_unused_bgs_mutex);
ret = btrfs_can_relocate(fs_info, chunk_offset);
if (ret)
@ -2997,6 +3003,16 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
if (ret)
return ret;
/*
* We add the kobjects here (and after forcing data chunk creation)
* since relocation is the only place we'll create chunks of a new
* type at runtime. The only place where we'll remove the last
* chunk of a type is the call immediately below this one. Even
* so, we're protected against races with the cleaner thread since
* we're covered by the delete_unused_bgs_mutex.
*/
btrfs_add_raid_kobjects(fs_info);
trans = btrfs_start_trans_remove_block_group(root->fs_info,
chunk_offset);
if (IS_ERR(trans)) {
@ -3124,6 +3140,8 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
if (ret < 0)
return ret;
btrfs_add_raid_kobjects(fs_info);
return 1;
}
}
@ -3892,12 +3910,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
}
num_devices = fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_lock(&fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
BUG_ON(num_devices < 1);
num_devices--;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
if (num_devices > 1)
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
@ -4202,7 +4220,8 @@ static int btrfs_uuid_scan_kthread(void *data)
key.offset = 0;
while (1) {
ret = btrfs_search_forward(root, &key, path, 0);
ret = btrfs_search_forward(root, &key, path,
BTRFS_OLDEST_GENERATION);
if (ret) {
if (ret > 0)
ret = 0;
@ -4672,7 +4691,7 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
btrfs_set_fs_incompat(info, RAID56);
}
#define BTRFS_MAX_DEVS(r) ((BTRFS_MAX_ITEM_SIZE(r->fs_info) \
#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
- sizeof(struct btrfs_chunk)) \
/ sizeof(struct btrfs_stripe) + 1)
@ -4713,10 +4732,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
BUG_ON(!alloc_profile_is_valid(type, 0));
if (list_empty(&fs_devices->alloc_list))
if (list_empty(&fs_devices->alloc_list)) {
if (btrfs_test_opt(info, ENOSPC_DEBUG))
btrfs_debug(info, "%s: no writable device", __func__);
return -ENOSPC;
}
index = __get_raid_index(type);
index = btrfs_bg_flags_to_raid_index(type);
sub_stripes = btrfs_raid_array[index].sub_stripes;
dev_stripes = btrfs_raid_array[index].dev_stripes;
@ -4729,7 +4751,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
max_stripe_size = SZ_1G;
max_chunk_size = 10 * max_stripe_size;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
devs_max = BTRFS_MAX_DEVS(info);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
/* for larger filesystems, use larger metadata chunks */
if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
@ -4738,7 +4760,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
max_stripe_size = SZ_256M;
max_chunk_size = max_stripe_size;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
devs_max = BTRFS_MAX_DEVS(info);
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
max_stripe_size = SZ_32M;
max_chunk_size = 2 * max_stripe_size;
@ -4797,8 +4819,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (ret == 0)
max_avail = max_stripe_size * dev_stripes;
if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) {
if (btrfs_test_opt(info, ENOSPC_DEBUG))
btrfs_debug(info,
"%s: devid %llu has no free space, have=%llu want=%u",
__func__, device->devid, max_avail,
BTRFS_STRIPE_LEN * dev_stripes);
continue;
}
if (ndevs == fs_devices->rw_devices) {
WARN(1, "%s: found more than %llu devices\n",
@ -4821,8 +4849,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
/* round down to number of usable stripes */
ndevs = round_down(ndevs, devs_increment);
if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
if (ndevs < devs_min) {
ret = -ENOSPC;
if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
btrfs_debug(info,
"%s: not enough devices with free space: have=%d minimum required=%d",
__func__, ndevs, devs_min);
}
goto error;
}
@ -4856,18 +4889,17 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
* and compare that answer with the max chunk size
*/
if (stripe_size * data_stripes > max_chunk_size) {
u64 mask = (1ULL << 24) - 1;
stripe_size = div_u64(max_chunk_size, data_stripes);
/* bump the answer up to a 16MB boundary */
stripe_size = (stripe_size + mask) & ~mask;
stripe_size = round_up(stripe_size, SZ_16M);
/* but don't go higher than the limits we found
* while searching for free extents
/*
* But don't go higher than the limits we found while searching
* for free extents
*/
if (stripe_size > devices_info[ndevs-1].max_avail)
stripe_size = devices_info[ndevs-1].max_avail;
stripe_size = min(devices_info[ndevs - 1].max_avail,
stripe_size);
}
/* align to BTRFS_STRIPE_LEN */
@ -5068,7 +5100,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
{
u64 chunk_offset;
ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
lockdep_assert_held(&fs_info->chunk_mutex);
chunk_offset = find_next_chunk(fs_info);
return __btrfs_alloc_chunk(trans, chunk_offset, type);
}
@ -5209,11 +5241,11 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
ret = 1;
free_extent_map(em);
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_lock(&fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
fs_info->dev_replace.tgtdev)
ret++;
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
return ret;
}
@ -5254,13 +5286,25 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
}
static int find_live_mirror(struct btrfs_fs_info *fs_info,
struct map_lookup *map, int first, int num,
int optimal, int dev_replace_is_ongoing)
struct map_lookup *map, int first,
int dev_replace_is_ongoing)
{
int i;
int num_stripes;
int preferred_mirror;
int tolerance;
struct btrfs_device *srcdev;
ASSERT((map->type &
(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)));
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
num_stripes = map->sub_stripes;
else
num_stripes = map->num_stripes;
preferred_mirror = first + current->pid % num_stripes;
if (dev_replace_is_ongoing &&
fs_info->dev_replace.cont_reading_from_srcdev_mode ==
BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
@ -5274,10 +5318,10 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
* mirror is available
*/
for (tolerance = 0; tolerance < 2; tolerance++) {
if (map->stripes[optimal].dev->bdev &&
(tolerance || map->stripes[optimal].dev != srcdev))
return optimal;
for (i = first; i < first + num; i++) {
if (map->stripes[preferred_mirror].dev->bdev &&
(tolerance || map->stripes[preferred_mirror].dev != srcdev))
return preferred_mirror;
for (i = first; i < first + num_stripes; i++) {
if (map->stripes[i].dev->bdev &&
(tolerance || map->stripes[i].dev != srcdev))
return i;
@ -5287,7 +5331,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
/* we couldn't find one that doesn't fail. Just return something
* and the io error handling code will clean up eventually
*/
return optimal;
return preferred_mirror;
}
static inline int parity_smaller(u64 a, u64 b)
@ -5779,10 +5823,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
if (!bbio_ret)
goto out;
btrfs_dev_replace_lock(dev_replace, 0);
btrfs_dev_replace_read_lock(dev_replace);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
if (!dev_replace_is_ongoing)
btrfs_dev_replace_unlock(dev_replace, 0);
btrfs_dev_replace_read_unlock(dev_replace);
else
btrfs_dev_replace_set_lock_blocking(dev_replace);
@ -5814,8 +5858,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
stripe_index = mirror_num - 1;
else {
stripe_index = find_live_mirror(fs_info, map, 0,
map->num_stripes,
current->pid % map->num_stripes,
dev_replace_is_ongoing);
mirror_num = stripe_index + 1;
}
@ -5843,8 +5885,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
int old_stripe_index = stripe_index;
stripe_index = find_live_mirror(fs_info, map,
stripe_index,
map->sub_stripes, stripe_index +
current->pid % map->sub_stripes,
dev_replace_is_ongoing);
mirror_num = stripe_index - old_stripe_index + 1;
}
@ -5984,7 +6024,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
out:
if (dev_replace_is_ongoing) {
btrfs_dev_replace_clear_lock_blocking(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 0);
btrfs_dev_replace_read_unlock(dev_replace);
}
free_extent_map(em);
return ret;
@ -6618,7 +6658,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices;
int ret;
BUG_ON(!mutex_is_locked(&uuid_mutex));
lockdep_assert_held(&uuid_mutex);
ASSERT(fsid);
fs_devices = fs_info->fs_devices->seed;
@ -7358,20 +7398,20 @@ void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info)
}
/* Must be invoked during the transaction commit */
void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info,
struct btrfs_transaction *transaction)
void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct extent_map *em;
struct map_lookup *map;
struct btrfs_device *dev;
int i;
if (list_empty(&transaction->pending_chunks))
if (list_empty(&trans->pending_chunks))
return;
/* In order to kick the device replace finish process */
mutex_lock(&fs_info->chunk_mutex);
list_for_each_entry(em, &transaction->pending_chunks, list) {
list_for_each_entry(em, &trans->pending_chunks, list) {
map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {

Просмотреть файл

@ -422,7 +422,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
struct btrfs_fs_devices **fs_devices_ret);
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
struct btrfs_device *device, struct btrfs_device *this_dev);
int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
@ -436,7 +436,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u8 *uuid);
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
const char *device_path, u64 devid);
void btrfs_cleanup_fs_uuids(void);
void __exit btrfs_cleanup_fs_uuids(void);
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
int btrfs_grow_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 new_size);
@ -476,8 +476,6 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev);
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev);
void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev);
void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
u64 logical, u64 len);
@ -546,9 +544,30 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
btrfs_dev_stat_set(dev, index, 0);
}
/*
* Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which
* can be used as index to access btrfs_raid_array[].
*/
static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
{
if (flags & BTRFS_BLOCK_GROUP_RAID10)
return BTRFS_RAID_RAID10;
else if (flags & BTRFS_BLOCK_GROUP_RAID1)
return BTRFS_RAID_RAID1;
else if (flags & BTRFS_BLOCK_GROUP_DUP)
return BTRFS_RAID_DUP;
else if (flags & BTRFS_BLOCK_GROUP_RAID0)
return BTRFS_RAID_RAID0;
else if (flags & BTRFS_BLOCK_GROUP_RAID5)
return BTRFS_RAID_RAID5;
else if (flags & BTRFS_BLOCK_GROUP_RAID6)
return BTRFS_RAID_RAID6;
return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
}
void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info,
struct btrfs_transaction *transaction);
void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans);
struct list_head *btrfs_get_fs_uuids(void);
void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);

Просмотреть файл

@ -33,7 +33,7 @@
#include "locking.h"
ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
int btrfs_getxattr(struct inode *inode, const char *name,
void *buffer, size_t size)
{
struct btrfs_dir_item *di;
@ -233,7 +233,7 @@ out:
/*
* @value: "" makes the attribute to empty, NULL removes it
*/
int __btrfs_setxattr(struct btrfs_trans_handle *trans,
int btrfs_setxattr(struct btrfs_trans_handle *trans,
struct inode *inode, const char *name,
const void *value, size_t size, int flags)
{
@ -374,7 +374,7 @@ static int btrfs_xattr_handler_get(const struct xattr_handler *handler,
const char *name, void *buffer, size_t size)
{
name = xattr_full_name(handler, name);
return __btrfs_getxattr(inode, name, buffer, size);
return btrfs_getxattr(inode, name, buffer, size);
}
static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
@ -383,7 +383,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
size_t size, int flags)
{
name = xattr_full_name(handler, name);
return __btrfs_setxattr(NULL, inode, name, buffer, size, flags);
return btrfs_setxattr(NULL, inode, name, buffer, size, flags);
}
static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
@ -448,8 +448,8 @@ static int btrfs_initxattrs(struct inode *inode,
}
strcpy(name, XATTR_SECURITY_PREFIX);
strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
err = __btrfs_setxattr(trans, inode, name,
xattr->value, xattr->value_len, 0);
err = btrfs_setxattr(trans, inode, name, xattr->value,
xattr->value_len, 0);
kfree(name);
if (err < 0)
break;

Просмотреть файл

@ -23,13 +23,14 @@
extern const struct xattr_handler *btrfs_xattr_handlers[];
extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
int btrfs_getxattr(struct inode *inode, const char *name,
void *buffer, size_t size);
extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
int btrfs_setxattr(struct btrfs_trans_handle *trans,
struct inode *inode, const char *name,
const void *value, size_t size, int flags);
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
const struct qstr *qstr);

Просмотреть файл

@ -5,6 +5,7 @@
#include <linux/types.h>
extern u32 crc32c(u32 crc, const void *address, unsigned int length);
extern const char *crc32c_impl(void);
/* This macro exists for backwards-compatibility. */
#define crc32c_le crc32c

Просмотреть файл

@ -71,6 +71,12 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
{ BTRFS_FILE_EXTENT_REG, "REG" }, \
{ BTRFS_FILE_EXTENT_PREALLOC, "PREALLOC"})
#define show_qgroup_rsv_type(type) \
__print_symbolic(type, \
{ BTRFS_QGROUP_RSV_DATA, "DATA" }, \
{ BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS" }, \
{ BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC" })
#define BTRFS_GROUP_FLAGS \
{ BTRFS_BLOCK_GROUP_DATA, "DATA"}, \
{ BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \
@ -248,6 +254,41 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
__entry->refs, __entry->compress_type)
);
TRACE_EVENT(btrfs_handle_em_exist,
TP_PROTO(const struct extent_map *existing, const struct extent_map *map, u64 start, u64 len),
TP_ARGS(existing, map, start, len),
TP_STRUCT__entry(
__field( u64, e_start )
__field( u64, e_len )
__field( u64, map_start )
__field( u64, map_len )
__field( u64, start )
__field( u64, len )
),
TP_fast_assign(
__entry->e_start = existing->start;
__entry->e_len = existing->len;
__entry->map_start = map->start;
__entry->map_len = map->len;
__entry->start = start;
__entry->len = len;
),
TP_printk("start=%llu len=%llu "
"existing(start=%llu len=%llu) "
"em(start=%llu len=%llu)",
(unsigned long long)__entry->start,
(unsigned long long)__entry->len,
(unsigned long long)__entry->e_start,
(unsigned long long)__entry->e_len,
(unsigned long long)__entry->map_start,
(unsigned long long)__entry->map_len)
);
/* file extent item */
DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
@ -1598,28 +1639,52 @@ TRACE_EVENT(qgroup_update_counters,
TRACE_EVENT(qgroup_update_reserve,
TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
s64 diff),
s64 diff, int type),
TP_ARGS(fs_info, qgroup, diff),
TP_ARGS(fs_info, qgroup, diff, type),
TP_STRUCT__entry_btrfs(
__field( u64, qgid )
__field( u64, cur_reserved )
__field( s64, diff )
__field( int, type )
),
TP_fast_assign_btrfs(fs_info,
__entry->qgid = qgroup->qgroupid;
__entry->cur_reserved = qgroup->reserved;
__entry->cur_reserved = qgroup->rsv.values[type];
__entry->diff = diff;
),
TP_printk_btrfs("qgid=%llu cur_reserved=%llu diff=%lld",
__entry->qgid, __entry->cur_reserved, __entry->diff)
TP_printk_btrfs("qgid=%llu type=%s cur_reserved=%llu diff=%lld",
__entry->qgid, show_qgroup_rsv_type(__entry->type),
__entry->cur_reserved, __entry->diff)
);
TRACE_EVENT(qgroup_meta_reserve,
TP_PROTO(struct btrfs_root *root, s64 diff, int type),
TP_ARGS(root, diff, type),
TP_STRUCT__entry_btrfs(
__field( u64, refroot )
__field( s64, diff )
__field( int, type )
),
TP_fast_assign_btrfs(root->fs_info,
__entry->refroot = root->objectid;
__entry->diff = diff;
),
TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
show_root_type(__entry->refroot),
show_qgroup_rsv_type(__entry->type), __entry->diff)
);
TRACE_EVENT(qgroup_meta_convert,
TP_PROTO(struct btrfs_root *root, s64 diff),
TP_ARGS(root, diff),
@ -1627,6 +1692,7 @@ TRACE_EVENT(qgroup_meta_reserve,
TP_STRUCT__entry_btrfs(
__field( u64, refroot )
__field( s64, diff )
__field( int, type )
),
TP_fast_assign_btrfs(root->fs_info,
@ -1634,8 +1700,36 @@ TRACE_EVENT(qgroup_meta_reserve,
__entry->diff = diff;
),
TP_printk_btrfs("refroot=%llu(%s) diff=%lld",
show_root_type(__entry->refroot), __entry->diff)
TP_printk_btrfs("refroot=%llu(%s) type=%s->%s diff=%lld",
show_root_type(__entry->refroot),
show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PREALLOC),
show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PERTRANS),
__entry->diff)
);
TRACE_EVENT(qgroup_meta_free_all_pertrans,
TP_PROTO(struct btrfs_root *root),
TP_ARGS(root),
TP_STRUCT__entry_btrfs(
__field( u64, refroot )
__field( s64, diff )
__field( int, type )
),
TP_fast_assign_btrfs(root->fs_info,
__entry->refroot = root->objectid;
spin_lock(&root->qgroup_meta_rsv_lock);
__entry->diff = -(s64)root->qgroup_meta_rsv_pertrans;
spin_unlock(&root->qgroup_meta_rsv_lock);
__entry->type = BTRFS_QGROUP_RSV_META_PERTRANS;
),
TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
show_root_type(__entry->refroot),
show_qgroup_rsv_type(__entry->type), __entry->diff)
);
DECLARE_EVENT_CLASS(btrfs__prelim_ref,

Просмотреть файл

@ -71,6 +71,12 @@ static void __exit libcrc32c_mod_fini(void)
crypto_free_shash(tfm);
}
const char *crc32c_impl(void)
{
return crypto_shash_driver_name(tfm);
}
EXPORT_SYMBOL(crc32c_impl);
module_init(libcrc32c_mod_init);
module_exit(libcrc32c_mod_fini);