for-5.11-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAl/XdB4ACgkQxWXV+ddt WDv41g//dOkrwjAVBfDUwRT/yKqojyEsZB1aNyHlPHFw8KEw5oIW7wxR4oqXi2ed /i9KIJe4E9AfqAiexhLvA+Wyt/Sgwz+k4ys82PKhhRNQn7LE4tvhSBUu6JYJDU09 6I1jagya7ILa8akFXZTmVbXdliI4Ab+pcXWAmQYK/xPVDxYTSsBf4o4MilNBA9FS lTwwBh5GTEtIkubr2yVd3pKfF4fT2g1hd+yglpHaOzpcrLMNN4hj4sUFlLbx/FlJ MWo+914cSNKJoebbnqhK9djD9hggaaXnNooqfBOXUhZN0VN9rQoKb5tW+TREQmFm shrmBSqN7CaqKfSOMZs7WOnTuTvmV/825PnLqDqcTUaLw+BgdyacpO9WflgfSs16 Cdvagr1SqbrSQ/3WYCpbqPLDNP3XuZ6+m5OWizf6fhyo8xdFcUHZgRC8qejDlycy V/zP0c5OYOMi5vo6x/zhrD7Uft7xoFUVcSJCe8WPri082d9LbA2BqwCsullD60PQ K/fsmlHs5Uxxy3MFgBPVDdWGgaa9rQ2vXequezbozBIIeeVL+Q9zkeyBFSYuFeE8 HToRE9B9BUEUh+p1JxPjOdFH/m+sKe1WMdmRLQthMzfOiNWW7pp/nL5rl4BUVmjm 58dQS73Cj/YNdBomRJXPPtgKIJPAWRrzU/JBcwAdMoKy57oh9NQ= =5YAS -----END PGP SIGNATURE----- Merge tag 'for-5.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "We have a mix of all kinds of changes, feature updates, core stuff, performance improvements and lots of cleanups and preparatory changes. User visible: - export filesystem generation in sysfs - new features for mount option 'rescue': - what's currently supported is exported in sysfs - 'ignorebadroots'/'ibadroots' - continue even if some essential tree roots are not usable (extent, uuid, data reloc, device, csum, free space) - 'ignoredatacsums'/'idatacsums' - skip checksum verification on data - 'all' - now enables 'ignorebadroots' + 'ignoredatacsums' + 'nologreplay' - export read mirror policy settings to sysfs, new policies will be added in the future - remove inode number cache feature (mount -o inode_cache), obsoleted in 5.9 User visible fixes: - async discard scheduling fixes on high loads - update inode byte counter atomically so stat() does not report wrong value in some cases - free space tree fixes: - correctly report status of v2 after remount - clear v1 cache inodes when v2 is newly enabled after remount Core: - switch own tree lock implementation to standard rw semaphore: - one-level lock nesting is not required anymore, the last use of this was in free space that's now loaded asynchronously - own implementation of adaptive spinning before taking mutex has been part of rwsem - performance seems to be better in general, much better (+tens of percents) for some workloads - lockdep does not complain - finish direct IO conversion to iomap infrastructure, remove temporary workaround for DSYNC after iomap API updates - preparatory work to support data and metadata blocks smaller than page: - generalize code that assumes sectorsize == PAGE_SIZE, lots of refactoring - planned namely for 64K pages (eg. arm64, ppc64) - scrub read-only support - preparatory work for zoned allocation mode (SMR/ZBC/ZNS friendly): - disable incompatible features - round-robin superblock write - free space cache (v1) is loaded asynchronously, remove tree path recursion - slightly improved time tacking for transaction kthread wake ups Performance improvements (note that the numbers depend on load type or other features and weren't run on the same machine): - skip unnecessary work: - do not start readahead for csum tree when scrubbing non-data block groups - do not start and wait for delalloc on snapshot roots on transaction commit - fix race when defragmenting leads to unnecessary IO - dbench speedups (+throughput%/-max latency%): - skip unnecessary searches for xattrs when logging an inode (+10.8/-8.2) - stop incrementing log batch when joining log transaction (1-2) - unlock path before checking if extent is shared during nocow writeback (+5.0/-20.5), on fio load +9.7% throughput/-9.8% runtime - several tree log improvements, eg. removing unnecessary operations, fixing races that lead to additional work (+12.7/-8.2) - tree-checker error branches annotated with unlikely() (+3% throughput) Other: - cleanups - lockdep fixes - more btrfs_inode conversions - error variable cleanups" * tag 'for-5.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (198 commits) btrfs: scrub: allow scrub to work with subpage sectorsize btrfs: scrub: support subpage data scrub btrfs: scrub: support subpage tree block scrub btrfs: scrub: always allocate one full page for one sector for RAID56 btrfs: scrub: reduce width of extent_len/stripe_len from 64 to 32 bits btrfs: refactor btrfs_lookup_bio_sums to handle out-of-order bvecs btrfs: remove btrfs_find_ordered_sum call from btrfs_lookup_bio_sums btrfs: handle sectorsize < PAGE_SIZE case for extent buffer accessors btrfs: update num_extent_pages to support subpage sized extent buffer btrfs: don't allow tree block to cross page boundary for subpage support btrfs: calculate inline extent buffer page size based on page size btrfs: factor out btree page submission code to a helper btrfs: make btrfs_verify_data_csum follow sector size btrfs: pass bio_offset to check_data_csum() directly btrfs: rename bio_offset of extent_submit_bio_start_t to dio_file_offset btrfs: fix lockdep warning when creating free space tree btrfs: skip space_cache v1 setup when not using it btrfs: remove free space items when disabling space cache v1 btrfs: warn when remount will not change the free space tree btrfs: use superblock state to print space_cache mount option ...
This commit is contained in:
Коммит
f1ee3b8829
|
@ -3,7 +3,7 @@
|
||||||
obj-$(CONFIG_BTRFS_FS) := btrfs.o
|
obj-$(CONFIG_BTRFS_FS) := btrfs.o
|
||||||
|
|
||||||
btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||||
file-item.o inode-item.o inode-map.o disk-io.o \
|
file-item.o inode-item.o disk-io.o \
|
||||||
transaction.o inode.o file.o tree-defrag.o \
|
transaction.o inode.o file.o tree-defrag.o \
|
||||||
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
|
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
|
||||||
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
|
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
|
||||||
|
@ -16,6 +16,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||||
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
||||||
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
|
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
|
||||||
|
btrfs-$(CONFIG_BLK_DEV_ZONED) += zoned.o
|
||||||
|
|
||||||
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
|
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
|
||||||
tests/extent-buffer-tests.o tests/btrfs-tests.o \
|
tests/extent-buffer-tests.o tests/btrfs-tests.o \
|
||||||
|
|
|
@ -783,8 +783,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
|
||||||
BUG_ON(ref->key_for_search.type);
|
BUG_ON(ref->key_for_search.type);
|
||||||
BUG_ON(!ref->wanted_disk_byte);
|
BUG_ON(!ref->wanted_disk_byte);
|
||||||
|
|
||||||
eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0,
|
eb = read_tree_block(fs_info, ref->wanted_disk_byte,
|
||||||
ref->level - 1, NULL);
|
ref->root_id, 0, ref->level - 1, NULL);
|
||||||
if (IS_ERR(eb)) {
|
if (IS_ERR(eb)) {
|
||||||
free_pref(ref);
|
free_pref(ref);
|
||||||
return PTR_ERR(eb);
|
return PTR_ERR(eb);
|
||||||
|
@ -1331,7 +1331,7 @@ again:
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
|
|
||||||
eb = read_tree_block(fs_info, ref->parent, 0,
|
eb = read_tree_block(fs_info, ref->parent, 0,
|
||||||
ref->level, NULL);
|
0, ref->level, NULL);
|
||||||
if (IS_ERR(eb)) {
|
if (IS_ERR(eb)) {
|
||||||
ret = PTR_ERR(eb);
|
ret = PTR_ERR(eb);
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -1341,14 +1341,12 @@ again:
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!path->skip_locking) {
|
if (!path->skip_locking)
|
||||||
btrfs_tree_read_lock(eb);
|
btrfs_tree_read_lock(eb);
|
||||||
btrfs_set_lock_blocking_read(eb);
|
|
||||||
}
|
|
||||||
ret = find_extent_in_eb(eb, bytenr,
|
ret = find_extent_in_eb(eb, bytenr,
|
||||||
*extent_item_pos, &eie, ignore_offset);
|
*extent_item_pos, &eie, ignore_offset);
|
||||||
if (!path->skip_locking)
|
if (!path->skip_locking)
|
||||||
btrfs_tree_read_unlock_blocking(eb);
|
btrfs_tree_read_unlock(eb);
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -1671,13 +1669,11 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
|
||||||
s64 bytes_left = ((s64)size) - 1;
|
s64 bytes_left = ((s64)size) - 1;
|
||||||
struct extent_buffer *eb = eb_in;
|
struct extent_buffer *eb = eb_in;
|
||||||
struct btrfs_key found_key;
|
struct btrfs_key found_key;
|
||||||
int leave_spinning = path->leave_spinning;
|
|
||||||
struct btrfs_inode_ref *iref;
|
struct btrfs_inode_ref *iref;
|
||||||
|
|
||||||
if (bytes_left >= 0)
|
if (bytes_left >= 0)
|
||||||
dest[bytes_left] = '\0';
|
dest[bytes_left] = '\0';
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
while (1) {
|
while (1) {
|
||||||
bytes_left -= name_len;
|
bytes_left -= name_len;
|
||||||
if (bytes_left >= 0)
|
if (bytes_left >= 0)
|
||||||
|
@ -1685,7 +1681,7 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
|
||||||
name_off, name_len);
|
name_off, name_len);
|
||||||
if (eb != eb_in) {
|
if (eb != eb_in) {
|
||||||
if (!path->skip_locking)
|
if (!path->skip_locking)
|
||||||
btrfs_tree_read_unlock_blocking(eb);
|
btrfs_tree_read_unlock(eb);
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
}
|
}
|
||||||
ret = btrfs_find_item(fs_root, path, parent, 0,
|
ret = btrfs_find_item(fs_root, path, parent, 0,
|
||||||
|
@ -1705,8 +1701,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
|
||||||
eb = path->nodes[0];
|
eb = path->nodes[0];
|
||||||
/* make sure we can use eb after releasing the path */
|
/* make sure we can use eb after releasing the path */
|
||||||
if (eb != eb_in) {
|
if (eb != eb_in) {
|
||||||
if (!path->skip_locking)
|
|
||||||
btrfs_set_lock_blocking_read(eb);
|
|
||||||
path->nodes[0] = NULL;
|
path->nodes[0] = NULL;
|
||||||
path->locks[0] = 0;
|
path->locks[0] = 0;
|
||||||
}
|
}
|
||||||
|
@ -1723,7 +1717,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
path->leave_spinning = leave_spinning;
|
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
|
|
|
@ -424,6 +424,23 @@ int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool space_cache_v1_done(struct btrfs_block_group *cache)
|
||||||
|
{
|
||||||
|
bool ret;
|
||||||
|
|
||||||
|
spin_lock(&cache->lock);
|
||||||
|
ret = cache->cached != BTRFS_CACHE_FAST;
|
||||||
|
spin_unlock(&cache->lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
|
||||||
|
struct btrfs_caching_control *caching_ctl)
|
||||||
|
{
|
||||||
|
wait_event(caching_ctl->wait, space_cache_v1_done(cache));
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
static void fragment_free_space(struct btrfs_block_group *block_group)
|
static void fragment_free_space(struct btrfs_block_group *block_group)
|
||||||
{
|
{
|
||||||
|
@ -639,11 +656,28 @@ static noinline void caching_thread(struct btrfs_work *work)
|
||||||
mutex_lock(&caching_ctl->mutex);
|
mutex_lock(&caching_ctl->mutex);
|
||||||
down_read(&fs_info->commit_root_sem);
|
down_read(&fs_info->commit_root_sem);
|
||||||
|
|
||||||
|
if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
|
||||||
|
ret = load_free_space_cache(block_group);
|
||||||
|
if (ret == 1) {
|
||||||
|
ret = 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We failed to load the space cache, set ourselves to
|
||||||
|
* CACHE_STARTED and carry on.
|
||||||
|
*/
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
block_group->cached = BTRFS_CACHE_STARTED;
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
wake_up(&caching_ctl->wait);
|
||||||
|
}
|
||||||
|
|
||||||
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
|
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
|
||||||
ret = load_free_space_tree(caching_ctl);
|
ret = load_free_space_tree(caching_ctl);
|
||||||
else
|
else
|
||||||
ret = load_extent_tree_free(caching_ctl);
|
ret = load_extent_tree_free(caching_ctl);
|
||||||
|
done:
|
||||||
spin_lock(&block_group->lock);
|
spin_lock(&block_group->lock);
|
||||||
block_group->caching_ctl = NULL;
|
block_group->caching_ctl = NULL;
|
||||||
block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
|
block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
|
||||||
|
@ -679,7 +713,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
|
||||||
{
|
{
|
||||||
DEFINE_WAIT(wait);
|
DEFINE_WAIT(wait);
|
||||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||||
struct btrfs_caching_control *caching_ctl;
|
struct btrfs_caching_control *caching_ctl = NULL;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
|
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
|
||||||
|
@ -691,119 +725,41 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
|
||||||
init_waitqueue_head(&caching_ctl->wait);
|
init_waitqueue_head(&caching_ctl->wait);
|
||||||
caching_ctl->block_group = cache;
|
caching_ctl->block_group = cache;
|
||||||
caching_ctl->progress = cache->start;
|
caching_ctl->progress = cache->start;
|
||||||
refcount_set(&caching_ctl->count, 1);
|
refcount_set(&caching_ctl->count, 2);
|
||||||
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
|
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
|
||||||
|
|
||||||
spin_lock(&cache->lock);
|
spin_lock(&cache->lock);
|
||||||
/*
|
|
||||||
* This should be a rare occasion, but this could happen I think in the
|
|
||||||
* case where one thread starts to load the space cache info, and then
|
|
||||||
* some other thread starts a transaction commit which tries to do an
|
|
||||||
* allocation while the other thread is still loading the space cache
|
|
||||||
* info. The previous loop should have kept us from choosing this block
|
|
||||||
* group, but if we've moved to the state where we will wait on caching
|
|
||||||
* block groups we need to first check if we're doing a fast load here,
|
|
||||||
* so we can wait for it to finish, otherwise we could end up allocating
|
|
||||||
* from a block group who's cache gets evicted for one reason or
|
|
||||||
* another.
|
|
||||||
*/
|
|
||||||
while (cache->cached == BTRFS_CACHE_FAST) {
|
|
||||||
struct btrfs_caching_control *ctl;
|
|
||||||
|
|
||||||
ctl = cache->caching_ctl;
|
|
||||||
refcount_inc(&ctl->count);
|
|
||||||
prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
|
|
||||||
spin_unlock(&cache->lock);
|
|
||||||
|
|
||||||
schedule();
|
|
||||||
|
|
||||||
finish_wait(&ctl->wait, &wait);
|
|
||||||
btrfs_put_caching_control(ctl);
|
|
||||||
spin_lock(&cache->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cache->cached != BTRFS_CACHE_NO) {
|
if (cache->cached != BTRFS_CACHE_NO) {
|
||||||
spin_unlock(&cache->lock);
|
|
||||||
kfree(caching_ctl);
|
kfree(caching_ctl);
|
||||||
return 0;
|
|
||||||
|
caching_ctl = cache->caching_ctl;
|
||||||
|
if (caching_ctl)
|
||||||
|
refcount_inc(&caching_ctl->count);
|
||||||
|
spin_unlock(&cache->lock);
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
WARN_ON(cache->caching_ctl);
|
WARN_ON(cache->caching_ctl);
|
||||||
cache->caching_ctl = caching_ctl;
|
cache->caching_ctl = caching_ctl;
|
||||||
cache->cached = BTRFS_CACHE_FAST;
|
if (btrfs_test_opt(fs_info, SPACE_CACHE))
|
||||||
|
cache->cached = BTRFS_CACHE_FAST;
|
||||||
|
else
|
||||||
|
cache->cached = BTRFS_CACHE_STARTED;
|
||||||
|
cache->has_caching_ctl = 1;
|
||||||
spin_unlock(&cache->lock);
|
spin_unlock(&cache->lock);
|
||||||
|
|
||||||
if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
|
spin_lock(&fs_info->block_group_cache_lock);
|
||||||
mutex_lock(&caching_ctl->mutex);
|
|
||||||
ret = load_free_space_cache(cache);
|
|
||||||
|
|
||||||
spin_lock(&cache->lock);
|
|
||||||
if (ret == 1) {
|
|
||||||
cache->caching_ctl = NULL;
|
|
||||||
cache->cached = BTRFS_CACHE_FINISHED;
|
|
||||||
cache->last_byte_to_unpin = (u64)-1;
|
|
||||||
caching_ctl->progress = (u64)-1;
|
|
||||||
} else {
|
|
||||||
if (load_cache_only) {
|
|
||||||
cache->caching_ctl = NULL;
|
|
||||||
cache->cached = BTRFS_CACHE_NO;
|
|
||||||
} else {
|
|
||||||
cache->cached = BTRFS_CACHE_STARTED;
|
|
||||||
cache->has_caching_ctl = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock(&cache->lock);
|
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
|
||||||
if (ret == 1 &&
|
|
||||||
btrfs_should_fragment_free_space(cache)) {
|
|
||||||
u64 bytes_used;
|
|
||||||
|
|
||||||
spin_lock(&cache->space_info->lock);
|
|
||||||
spin_lock(&cache->lock);
|
|
||||||
bytes_used = cache->length - cache->used;
|
|
||||||
cache->space_info->bytes_used += bytes_used >> 1;
|
|
||||||
spin_unlock(&cache->lock);
|
|
||||||
spin_unlock(&cache->space_info->lock);
|
|
||||||
fragment_free_space(cache);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
mutex_unlock(&caching_ctl->mutex);
|
|
||||||
|
|
||||||
wake_up(&caching_ctl->wait);
|
|
||||||
if (ret == 1) {
|
|
||||||
btrfs_put_caching_control(caching_ctl);
|
|
||||||
btrfs_free_excluded_extents(cache);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* We're either using the free space tree or no caching at all.
|
|
||||||
* Set cached to the appropriate value and wakeup any waiters.
|
|
||||||
*/
|
|
||||||
spin_lock(&cache->lock);
|
|
||||||
if (load_cache_only) {
|
|
||||||
cache->caching_ctl = NULL;
|
|
||||||
cache->cached = BTRFS_CACHE_NO;
|
|
||||||
} else {
|
|
||||||
cache->cached = BTRFS_CACHE_STARTED;
|
|
||||||
cache->has_caching_ctl = 1;
|
|
||||||
}
|
|
||||||
spin_unlock(&cache->lock);
|
|
||||||
wake_up(&caching_ctl->wait);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (load_cache_only) {
|
|
||||||
btrfs_put_caching_control(caching_ctl);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
down_write(&fs_info->commit_root_sem);
|
|
||||||
refcount_inc(&caching_ctl->count);
|
refcount_inc(&caching_ctl->count);
|
||||||
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
|
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
|
||||||
up_write(&fs_info->commit_root_sem);
|
spin_unlock(&fs_info->block_group_cache_lock);
|
||||||
|
|
||||||
btrfs_get_block_group(cache);
|
btrfs_get_block_group(cache);
|
||||||
|
|
||||||
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
|
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
|
||||||
|
out:
|
||||||
|
if (load_cache_only && caching_ctl)
|
||||||
|
btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
|
||||||
|
if (caching_ctl)
|
||||||
|
btrfs_put_caching_control(caching_ctl);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -892,8 +848,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_path *path;
|
struct btrfs_path *path;
|
||||||
struct btrfs_block_group *block_group;
|
struct btrfs_block_group *block_group;
|
||||||
struct btrfs_free_cluster *cluster;
|
struct btrfs_free_cluster *cluster;
|
||||||
struct btrfs_root *tree_root = fs_info->tree_root;
|
|
||||||
struct btrfs_key key;
|
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct kobject *kobj = NULL;
|
struct kobject *kobj = NULL;
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -971,42 +925,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||||
spin_unlock(&trans->transaction->dirty_bgs_lock);
|
spin_unlock(&trans->transaction->dirty_bgs_lock);
|
||||||
mutex_unlock(&trans->transaction->cache_write_mutex);
|
mutex_unlock(&trans->transaction->cache_write_mutex);
|
||||||
|
|
||||||
if (!IS_ERR(inode)) {
|
ret = btrfs_remove_free_space_inode(trans, inode, block_group);
|
||||||
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
|
if (ret)
|
||||||
if (ret) {
|
|
||||||
btrfs_add_delayed_iput(inode);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
clear_nlink(inode);
|
|
||||||
/* One for the block groups ref */
|
|
||||||
spin_lock(&block_group->lock);
|
|
||||||
if (block_group->iref) {
|
|
||||||
block_group->iref = 0;
|
|
||||||
block_group->inode = NULL;
|
|
||||||
spin_unlock(&block_group->lock);
|
|
||||||
iput(inode);
|
|
||||||
} else {
|
|
||||||
spin_unlock(&block_group->lock);
|
|
||||||
}
|
|
||||||
/* One for our lookup ref */
|
|
||||||
btrfs_add_delayed_iput(inode);
|
|
||||||
}
|
|
||||||
|
|
||||||
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
|
|
||||||
key.type = 0;
|
|
||||||
key.offset = block_group->start;
|
|
||||||
|
|
||||||
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out;
|
goto out;
|
||||||
if (ret > 0)
|
|
||||||
btrfs_release_path(path);
|
|
||||||
if (ret == 0) {
|
|
||||||
ret = btrfs_del_item(trans, tree_root, path);
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
btrfs_release_path(path);
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_lock(&fs_info->block_group_cache_lock);
|
spin_lock(&fs_info->block_group_cache_lock);
|
||||||
rb_erase(&block_group->cache_node,
|
rb_erase(&block_group->cache_node,
|
||||||
|
@ -1043,7 +964,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||||
if (block_group->cached == BTRFS_CACHE_STARTED)
|
if (block_group->cached == BTRFS_CACHE_STARTED)
|
||||||
btrfs_wait_block_group_cache_done(block_group);
|
btrfs_wait_block_group_cache_done(block_group);
|
||||||
if (block_group->has_caching_ctl) {
|
if (block_group->has_caching_ctl) {
|
||||||
down_write(&fs_info->commit_root_sem);
|
spin_lock(&fs_info->block_group_cache_lock);
|
||||||
if (!caching_ctl) {
|
if (!caching_ctl) {
|
||||||
struct btrfs_caching_control *ctl;
|
struct btrfs_caching_control *ctl;
|
||||||
|
|
||||||
|
@ -1057,7 +978,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
if (caching_ctl)
|
if (caching_ctl)
|
||||||
list_del_init(&caching_ctl->list);
|
list_del_init(&caching_ctl->list);
|
||||||
up_write(&fs_info->commit_root_sem);
|
spin_unlock(&fs_info->block_group_cache_lock);
|
||||||
if (caching_ctl) {
|
if (caching_ctl) {
|
||||||
/* Once for the caching bgs list and once for us. */
|
/* Once for the caching bgs list and once for us. */
|
||||||
btrfs_put_caching_control(caching_ctl);
|
btrfs_put_caching_control(caching_ctl);
|
||||||
|
@ -1723,6 +1644,7 @@ out:
|
||||||
static int exclude_super_stripes(struct btrfs_block_group *cache)
|
static int exclude_super_stripes(struct btrfs_block_group *cache)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||||
|
const bool zoned = btrfs_is_zoned(fs_info);
|
||||||
u64 bytenr;
|
u64 bytenr;
|
||||||
u64 *logical;
|
u64 *logical;
|
||||||
int stripe_len;
|
int stripe_len;
|
||||||
|
@ -1744,6 +1666,14 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
/* Shouldn't have super stripes in sequential zones */
|
||||||
|
if (zoned && nr) {
|
||||||
|
btrfs_err(fs_info,
|
||||||
|
"zoned: block group %llu must not contain super block",
|
||||||
|
cache->start);
|
||||||
|
return -EUCLEAN;
|
||||||
|
}
|
||||||
|
|
||||||
while (nr--) {
|
while (nr--) {
|
||||||
u64 len = min_t(u64, stripe_len,
|
u64 len = min_t(u64, stripe_len,
|
||||||
cache->start + cache->length - logical[nr]);
|
cache->start + cache->length - logical[nr]);
|
||||||
|
@ -1805,7 +1735,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
||||||
INIT_LIST_HEAD(&cache->discard_list);
|
INIT_LIST_HEAD(&cache->discard_list);
|
||||||
INIT_LIST_HEAD(&cache->dirty_list);
|
INIT_LIST_HEAD(&cache->dirty_list);
|
||||||
INIT_LIST_HEAD(&cache->io_list);
|
INIT_LIST_HEAD(&cache->io_list);
|
||||||
btrfs_init_free_space_ctl(cache);
|
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
|
||||||
atomic_set(&cache->frozen, 0);
|
atomic_set(&cache->frozen, 0);
|
||||||
mutex_init(&cache->free_space_lock);
|
mutex_init(&cache->free_space_lock);
|
||||||
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
|
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
|
||||||
|
@ -1985,6 +1915,51 @@ error:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
|
||||||
|
{
|
||||||
|
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
|
||||||
|
struct btrfs_space_info *space_info;
|
||||||
|
struct rb_node *node;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
|
||||||
|
struct extent_map *em;
|
||||||
|
struct map_lookup *map;
|
||||||
|
struct btrfs_block_group *bg;
|
||||||
|
|
||||||
|
em = rb_entry(node, struct extent_map, rb_node);
|
||||||
|
map = em->map_lookup;
|
||||||
|
bg = btrfs_create_block_group_cache(fs_info, em->start);
|
||||||
|
if (!bg) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fill dummy cache as FULL */
|
||||||
|
bg->length = em->len;
|
||||||
|
bg->flags = map->type;
|
||||||
|
bg->last_byte_to_unpin = (u64)-1;
|
||||||
|
bg->cached = BTRFS_CACHE_FINISHED;
|
||||||
|
bg->used = em->len;
|
||||||
|
bg->flags = map->type;
|
||||||
|
ret = btrfs_add_block_group_cache(fs_info, bg);
|
||||||
|
if (ret) {
|
||||||
|
btrfs_remove_free_space_cache(bg);
|
||||||
|
btrfs_put_block_group(bg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
|
||||||
|
0, &space_info);
|
||||||
|
bg->space_info = space_info;
|
||||||
|
link_block_group(bg);
|
||||||
|
|
||||||
|
set_avail_alloc_bits(fs_info, bg->flags);
|
||||||
|
}
|
||||||
|
if (!ret)
|
||||||
|
btrfs_init_global_block_rsv(fs_info);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
||||||
{
|
{
|
||||||
struct btrfs_path *path;
|
struct btrfs_path *path;
|
||||||
|
@ -1995,6 +1970,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
||||||
int need_clear = 0;
|
int need_clear = 0;
|
||||||
u64 cache_gen;
|
u64 cache_gen;
|
||||||
|
|
||||||
|
if (!info->extent_root)
|
||||||
|
return fill_dummy_bgs(info);
|
||||||
|
|
||||||
key.objectid = 0;
|
key.objectid = 0;
|
||||||
key.offset = 0;
|
key.offset = 0;
|
||||||
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
|
||||||
|
@ -2152,7 +2130,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
|
||||||
cache->flags = type;
|
cache->flags = type;
|
||||||
cache->last_byte_to_unpin = (u64)-1;
|
cache->last_byte_to_unpin = (u64)-1;
|
||||||
cache->cached = BTRFS_CACHE_FINISHED;
|
cache->cached = BTRFS_CACHE_FINISHED;
|
||||||
cache->needs_free_space = 1;
|
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
|
||||||
|
cache->needs_free_space = 1;
|
||||||
ret = exclude_super_stripes(cache);
|
ret = exclude_super_stripes(cache);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/* We may have excluded something, so call this just in case */
|
/* We may have excluded something, so call this just in case */
|
||||||
|
@ -2361,6 +2340,9 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
|
||||||
int retries = 0;
|
int retries = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!btrfs_test_opt(fs_info, SPACE_CACHE))
|
||||||
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this block group is smaller than 100 megs don't bother caching the
|
* If this block group is smaller than 100 megs don't bother caching the
|
||||||
* block group.
|
* block group.
|
||||||
|
@ -2401,7 +2383,7 @@ again:
|
||||||
* time.
|
* time.
|
||||||
*/
|
*/
|
||||||
BTRFS_I(inode)->generation = 0;
|
BTRFS_I(inode)->generation = 0;
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/*
|
/*
|
||||||
* So theoretically we could recover from this, simply set the
|
* So theoretically we could recover from this, simply set the
|
||||||
|
@ -3307,14 +3289,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
||||||
struct btrfs_caching_control *caching_ctl;
|
struct btrfs_caching_control *caching_ctl;
|
||||||
struct rb_node *n;
|
struct rb_node *n;
|
||||||
|
|
||||||
down_write(&info->commit_root_sem);
|
spin_lock(&info->block_group_cache_lock);
|
||||||
while (!list_empty(&info->caching_block_groups)) {
|
while (!list_empty(&info->caching_block_groups)) {
|
||||||
caching_ctl = list_entry(info->caching_block_groups.next,
|
caching_ctl = list_entry(info->caching_block_groups.next,
|
||||||
struct btrfs_caching_control, list);
|
struct btrfs_caching_control, list);
|
||||||
list_del(&caching_ctl->list);
|
list_del(&caching_ctl->list);
|
||||||
btrfs_put_caching_control(caching_ctl);
|
btrfs_put_caching_control(caching_ctl);
|
||||||
}
|
}
|
||||||
up_write(&info->commit_root_sem);
|
spin_unlock(&info->block_group_cache_lock);
|
||||||
|
|
||||||
spin_lock(&info->unused_bgs_lock);
|
spin_lock(&info->unused_bgs_lock);
|
||||||
while (!list_empty(&info->unused_bgs)) {
|
while (!list_empty(&info->unused_bgs)) {
|
||||||
|
|
|
@ -268,6 +268,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
|
||||||
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
|
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
|
||||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
|
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
|
||||||
int btrfs_free_block_groups(struct btrfs_fs_info *info);
|
int btrfs_free_block_groups(struct btrfs_fs_info *info);
|
||||||
|
void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
|
||||||
|
struct btrfs_caching_control *caching_ctl);
|
||||||
|
|
||||||
static inline u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
|
static inline u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
|
||||||
{
|
{
|
||||||
|
|
|
@ -426,6 +426,14 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
|
||||||
fs_info->delayed_block_rsv.space_info = space_info;
|
fs_info->delayed_block_rsv.space_info = space_info;
|
||||||
fs_info->delayed_refs_rsv.space_info = space_info;
|
fs_info->delayed_refs_rsv.space_info = space_info;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Our various recovery options can leave us with NULL roots, so check
|
||||||
|
* here and just bail before we go dereferencing NULLs everywhere.
|
||||||
|
*/
|
||||||
|
if (!fs_info->extent_root || !fs_info->csum_root ||
|
||||||
|
!fs_info->dev_root || !fs_info->chunk_root || !fs_info->tree_root)
|
||||||
|
return;
|
||||||
|
|
||||||
fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
|
fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
|
||||||
fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
|
fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
|
||||||
fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
|
fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
|
||||||
|
|
|
@ -35,6 +35,13 @@ enum {
|
||||||
BTRFS_INODE_IN_DELALLOC_LIST,
|
BTRFS_INODE_IN_DELALLOC_LIST,
|
||||||
BTRFS_INODE_HAS_PROPS,
|
BTRFS_INODE_HAS_PROPS,
|
||||||
BTRFS_INODE_SNAPSHOT_FLUSH,
|
BTRFS_INODE_SNAPSHOT_FLUSH,
|
||||||
|
/*
|
||||||
|
* Set and used when logging an inode and it serves to signal that an
|
||||||
|
* inode does not have xattrs, so subsequent fsyncs can avoid searching
|
||||||
|
* for xattrs to log. This bit must be cleared whenever a xattr is added
|
||||||
|
* to an inode.
|
||||||
|
*/
|
||||||
|
BTRFS_INODE_NO_XATTRS,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* in memory btrfs inode */
|
/* in memory btrfs inode */
|
||||||
|
@ -50,7 +57,8 @@ struct btrfs_inode {
|
||||||
/*
|
/*
|
||||||
* Lock for counters and all fields used to determine if the inode is in
|
* Lock for counters and all fields used to determine if the inode is in
|
||||||
* the log or not (last_trans, last_sub_trans, last_log_commit,
|
* the log or not (last_trans, last_sub_trans, last_log_commit,
|
||||||
* logged_trans).
|
* logged_trans), to access/update new_delalloc_bytes and to update the
|
||||||
|
* VFS' inode number of bytes used.
|
||||||
*/
|
*/
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
|
|
||||||
|
@ -203,16 +211,6 @@ struct btrfs_inode {
|
||||||
/* Hook into fs_info->delayed_iputs */
|
/* Hook into fs_info->delayed_iputs */
|
||||||
struct list_head delayed_iput;
|
struct list_head delayed_iput;
|
||||||
|
|
||||||
/*
|
|
||||||
* To avoid races between lockless (i_mutex not held) direct IO writes
|
|
||||||
* and concurrent fsync requests. Direct IO writes must acquire read
|
|
||||||
* access on this semaphore for creating an extent map and its
|
|
||||||
* corresponding ordered extent. The fast fsync path must acquire write
|
|
||||||
* access on this semaphore before it collects ordered extents and
|
|
||||||
* extent maps.
|
|
||||||
*/
|
|
||||||
struct rw_semaphore dio_sem;
|
|
||||||
|
|
||||||
struct inode vfs_inode;
|
struct inode vfs_inode;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -341,8 +339,7 @@ static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
|
||||||
u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
|
u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
|
||||||
{
|
{
|
||||||
struct btrfs_root *root = inode->root;
|
struct btrfs_root *root = inode->root;
|
||||||
struct btrfs_super_block *sb = root->fs_info->super_copy;
|
const u32 csum_size = root->fs_info->csum_size;
|
||||||
const u16 csum_size = btrfs_super_csum_size(sb);
|
|
||||||
|
|
||||||
/* Output minus objectid, which is more meaningful */
|
/* Output minus objectid, which is more meaningful */
|
||||||
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
|
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
|
||||||
|
|
|
@ -233,7 +233,6 @@ struct btrfsic_stack_frame {
|
||||||
struct btrfsic_state {
|
struct btrfsic_state {
|
||||||
u32 print_mask;
|
u32 print_mask;
|
||||||
int include_extent_data;
|
int include_extent_data;
|
||||||
int csum_size;
|
|
||||||
struct list_head all_blocks_list;
|
struct list_head all_blocks_list;
|
||||||
struct btrfsic_block_hashtable block_hashtable;
|
struct btrfsic_block_hashtable block_hashtable;
|
||||||
struct btrfsic_block_link_hashtable block_link_hashtable;
|
struct btrfsic_block_link_hashtable block_link_hashtable;
|
||||||
|
@ -660,8 +659,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
state->csum_size = btrfs_super_csum_size(selected_super);
|
|
||||||
|
|
||||||
for (pass = 0; pass < 3; pass++) {
|
for (pass = 0; pass < 3; pass++) {
|
||||||
int num_copies;
|
int num_copies;
|
||||||
int mirror_num;
|
int mirror_num;
|
||||||
|
@ -954,7 +951,7 @@ static noinline_for_stack int btrfsic_process_metablock(
|
||||||
sf->prev = NULL;
|
sf->prev = NULL;
|
||||||
|
|
||||||
continue_with_new_stack_frame:
|
continue_with_new_stack_frame:
|
||||||
sf->block->generation = le64_to_cpu(sf->hdr->generation);
|
sf->block->generation = btrfs_stack_header_generation(sf->hdr);
|
||||||
if (0 == sf->hdr->level) {
|
if (0 == sf->hdr->level) {
|
||||||
struct btrfs_leaf *const leafhdr =
|
struct btrfs_leaf *const leafhdr =
|
||||||
(struct btrfs_leaf *)sf->hdr;
|
(struct btrfs_leaf *)sf->hdr;
|
||||||
|
@ -1723,7 +1720,7 @@ static noinline_for_stack int btrfsic_test_for_metadata(
|
||||||
crypto_shash_update(shash, data, sublen);
|
crypto_shash_update(shash, data, sublen);
|
||||||
}
|
}
|
||||||
crypto_shash_final(shash, csum);
|
crypto_shash_final(shash, csum);
|
||||||
if (memcmp(csum, h->csum, state->csum_size))
|
if (memcmp(csum, h->csum, fs_info->csum_size))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
return 0; /* is metadata */
|
return 0; /* is metadata */
|
||||||
|
@ -2695,8 +2692,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
|
||||||
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
|
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
|
||||||
pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_disk=%p)\n",
|
pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_disk=%p)\n",
|
||||||
bio_op(bio), bio->bi_opf, segs,
|
bio_op(bio), bio->bi_opf, segs,
|
||||||
(unsigned long long)bio->bi_iter.bi_sector,
|
bio->bi_iter.bi_sector, dev_bytenr, bio->bi_disk);
|
||||||
dev_bytenr, bio->bi_disk);
|
|
||||||
|
|
||||||
mapped_datav = kmalloc_array(segs,
|
mapped_datav = kmalloc_array(segs,
|
||||||
sizeof(*mapped_datav), GFP_NOFS);
|
sizeof(*mapped_datav), GFP_NOFS);
|
||||||
|
@ -2797,7 +2793,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
|
||||||
state->fs_info = fs_info;
|
state->fs_info = fs_info;
|
||||||
state->print_mask = print_mask;
|
state->print_mask = print_mask;
|
||||||
state->include_extent_data = including_extent_data;
|
state->include_extent_data = including_extent_data;
|
||||||
state->csum_size = 0;
|
|
||||||
state->metablock_size = fs_info->nodesize;
|
state->metablock_size = fs_info->nodesize;
|
||||||
state->datablock_size = fs_info->sectorsize;
|
state->datablock_size = fs_info->sectorsize;
|
||||||
INIT_LIST_HEAD(&state->all_blocks_list);
|
INIT_LIST_HEAD(&state->all_blocks_list);
|
||||||
|
|
|
@ -131,10 +131,8 @@ static int btrfs_decompress_bio(struct compressed_bio *cb);
|
||||||
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
|
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
|
||||||
unsigned long disk_size)
|
unsigned long disk_size)
|
||||||
{
|
{
|
||||||
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
|
||||||
|
|
||||||
return sizeof(struct compressed_bio) +
|
return sizeof(struct compressed_bio) +
|
||||||
(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * csum_size;
|
(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * fs_info->csum_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
|
static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
|
||||||
|
@ -142,7 +140,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||||
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
const u32 csum_size = fs_info->csum_size;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long i;
|
unsigned long i;
|
||||||
char *kaddr;
|
char *kaddr;
|
||||||
|
@ -150,7 +148,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
|
||||||
struct compressed_bio *cb = bio->bi_private;
|
struct compressed_bio *cb = bio->bi_private;
|
||||||
u8 *cb_sum = cb->sums;
|
u8 *cb_sum = cb->sums;
|
||||||
|
|
||||||
if (inode->flags & BTRFS_INODE_NODATASUM)
|
if (!fs_info->csum_root || (inode->flags & BTRFS_INODE_NODATASUM))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
shash->tfm = fs_info->csum_shash;
|
shash->tfm = fs_info->csum_shash;
|
||||||
|
@ -220,7 +218,7 @@ static void end_compressed_bio_read(struct bio *bio)
|
||||||
|
|
||||||
inode = cb->inode;
|
inode = cb->inode;
|
||||||
ret = check_compressed_csum(BTRFS_I(inode), bio,
|
ret = check_compressed_csum(BTRFS_I(inode), bio,
|
||||||
(u64)bio->bi_iter.bi_sector << 9);
|
bio->bi_iter.bi_sector << 9);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto csum_failed;
|
goto csum_failed;
|
||||||
|
|
||||||
|
@ -622,13 +620,12 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||||
unsigned long pg_index;
|
unsigned long pg_index;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
struct bio *comp_bio;
|
struct bio *comp_bio;
|
||||||
u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
|
u64 cur_disk_byte = bio->bi_iter.bi_sector << 9;
|
||||||
u64 em_len;
|
u64 em_len;
|
||||||
u64 em_start;
|
u64 em_start;
|
||||||
struct extent_map *em;
|
struct extent_map *em;
|
||||||
blk_status_t ret = BLK_STS_RESOURCE;
|
blk_status_t ret = BLK_STS_RESOURCE;
|
||||||
int faili = 0;
|
int faili = 0;
|
||||||
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
|
||||||
u8 *sums;
|
u8 *sums;
|
||||||
|
|
||||||
em_tree = &BTRFS_I(inode)->extent_tree;
|
em_tree = &BTRFS_I(inode)->extent_tree;
|
||||||
|
@ -722,15 +719,12 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||||
*/
|
*/
|
||||||
refcount_inc(&cb->pending_bios);
|
refcount_inc(&cb->pending_bios);
|
||||||
|
|
||||||
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
|
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
|
||||||
ret = btrfs_lookup_bio_sums(inode, comp_bio,
|
BUG_ON(ret); /* -ENOMEM */
|
||||||
(u64)-1, sums);
|
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
|
||||||
}
|
|
||||||
|
|
||||||
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
|
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
|
||||||
fs_info->sectorsize);
|
fs_info->sectorsize);
|
||||||
sums += csum_size * nr_sectors;
|
sums += fs_info->csum_size * nr_sectors;
|
||||||
|
|
||||||
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
|
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -751,10 +745,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||||
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
|
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
BUG_ON(ret); /* -ENOMEM */
|
||||||
|
|
||||||
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
|
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
|
||||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, (u64)-1, sums);
|
BUG_ON(ret); /* -ENOMEM */
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
|
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
|
258
fs/btrfs/ctree.c
258
fs/btrfs/ctree.c
|
@ -1278,14 +1278,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
|
||||||
if (!tm)
|
if (!tm)
|
||||||
return eb;
|
return eb;
|
||||||
|
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
btrfs_set_lock_blocking_read(eb);
|
|
||||||
|
|
||||||
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
|
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
|
||||||
BUG_ON(tm->slot != 0);
|
BUG_ON(tm->slot != 0);
|
||||||
eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
|
eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
|
||||||
if (!eb_rewin) {
|
if (!eb_rewin) {
|
||||||
btrfs_tree_read_unlock_blocking(eb);
|
btrfs_tree_read_unlock(eb);
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -1297,13 +1294,13 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
|
||||||
} else {
|
} else {
|
||||||
eb_rewin = btrfs_clone_extent_buffer(eb);
|
eb_rewin = btrfs_clone_extent_buffer(eb);
|
||||||
if (!eb_rewin) {
|
if (!eb_rewin) {
|
||||||
btrfs_tree_read_unlock_blocking(eb);
|
btrfs_tree_read_unlock(eb);
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_tree_read_unlock_blocking(eb);
|
btrfs_tree_read_unlock(eb);
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
|
|
||||||
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
|
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
|
||||||
|
@ -1356,7 +1353,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
|
||||||
if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
|
if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
|
||||||
btrfs_tree_read_unlock(eb_root);
|
btrfs_tree_read_unlock(eb_root);
|
||||||
free_extent_buffer(eb_root);
|
free_extent_buffer(eb_root);
|
||||||
old = read_tree_block(fs_info, logical, 0, level, NULL);
|
old = read_tree_block(fs_info, logical, root->root_key.objectid,
|
||||||
|
0, level, NULL);
|
||||||
if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
|
if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
|
||||||
if (!IS_ERR(old))
|
if (!IS_ERR(old))
|
||||||
free_extent_buffer(old);
|
free_extent_buffer(old);
|
||||||
|
@ -1373,9 +1371,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
|
||||||
free_extent_buffer(eb_root);
|
free_extent_buffer(eb_root);
|
||||||
eb = alloc_dummy_extent_buffer(fs_info, logical);
|
eb = alloc_dummy_extent_buffer(fs_info, logical);
|
||||||
} else {
|
} else {
|
||||||
btrfs_set_lock_blocking_read(eb_root);
|
|
||||||
eb = btrfs_clone_extent_buffer(eb_root);
|
eb = btrfs_clone_extent_buffer(eb_root);
|
||||||
btrfs_tree_read_unlock_blocking(eb_root);
|
btrfs_tree_read_unlock(eb_root);
|
||||||
free_extent_buffer(eb_root);
|
free_extent_buffer(eb_root);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1483,10 +1480,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
search_start = buf->start & ~((u64)SZ_1G - 1);
|
search_start = buf->start & ~((u64)SZ_1G - 1);
|
||||||
|
|
||||||
if (parent)
|
|
||||||
btrfs_set_lock_blocking_write(parent);
|
|
||||||
btrfs_set_lock_blocking_write(buf);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Before CoWing this block for later modification, check if it's
|
* Before CoWing this block for later modification, check if it's
|
||||||
* the subtree root and do the delayed subtree trace if needed.
|
* the subtree root and do the delayed subtree trace if needed.
|
||||||
|
@ -1578,7 +1571,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
struct extent_buffer *cur;
|
struct extent_buffer *cur;
|
||||||
u64 blocknr;
|
u64 blocknr;
|
||||||
u64 gen;
|
|
||||||
u64 search_start = *last_ret;
|
u64 search_start = *last_ret;
|
||||||
u64 last_block = 0;
|
u64 last_block = 0;
|
||||||
u64 other;
|
u64 other;
|
||||||
|
@ -1586,14 +1578,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
|
||||||
int end_slot;
|
int end_slot;
|
||||||
int i;
|
int i;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
int parent_level;
|
|
||||||
int uptodate;
|
|
||||||
u32 blocksize;
|
u32 blocksize;
|
||||||
int progress_passed = 0;
|
int progress_passed = 0;
|
||||||
struct btrfs_disk_key disk_key;
|
struct btrfs_disk_key disk_key;
|
||||||
|
|
||||||
parent_level = btrfs_header_level(parent);
|
|
||||||
|
|
||||||
WARN_ON(trans->transaction != fs_info->running_transaction);
|
WARN_ON(trans->transaction != fs_info->running_transaction);
|
||||||
WARN_ON(trans->transid != fs_info->generation);
|
WARN_ON(trans->transid != fs_info->generation);
|
||||||
|
|
||||||
|
@ -1604,10 +1592,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
|
||||||
if (parent_nritems <= 1)
|
if (parent_nritems <= 1)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
btrfs_set_lock_blocking_write(parent);
|
|
||||||
|
|
||||||
for (i = start_slot; i <= end_slot; i++) {
|
for (i = start_slot; i <= end_slot; i++) {
|
||||||
struct btrfs_key first_key;
|
|
||||||
int close = 1;
|
int close = 1;
|
||||||
|
|
||||||
btrfs_node_key(parent, &disk_key, i);
|
btrfs_node_key(parent, &disk_key, i);
|
||||||
|
@ -1616,8 +1601,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
progress_passed = 1;
|
progress_passed = 1;
|
||||||
blocknr = btrfs_node_blockptr(parent, i);
|
blocknr = btrfs_node_blockptr(parent, i);
|
||||||
gen = btrfs_node_ptr_generation(parent, i);
|
|
||||||
btrfs_node_key_to_cpu(parent, &first_key, i);
|
|
||||||
if (last_block == 0)
|
if (last_block == 0)
|
||||||
last_block = blocknr;
|
last_block = blocknr;
|
||||||
|
|
||||||
|
@ -1634,36 +1617,13 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
cur = find_extent_buffer(fs_info, blocknr);
|
cur = btrfs_read_node_slot(parent, i);
|
||||||
if (cur)
|
if (IS_ERR(cur))
|
||||||
uptodate = btrfs_buffer_uptodate(cur, gen, 0);
|
return PTR_ERR(cur);
|
||||||
else
|
|
||||||
uptodate = 0;
|
|
||||||
if (!cur || !uptodate) {
|
|
||||||
if (!cur) {
|
|
||||||
cur = read_tree_block(fs_info, blocknr, gen,
|
|
||||||
parent_level - 1,
|
|
||||||
&first_key);
|
|
||||||
if (IS_ERR(cur)) {
|
|
||||||
return PTR_ERR(cur);
|
|
||||||
} else if (!extent_buffer_uptodate(cur)) {
|
|
||||||
free_extent_buffer(cur);
|
|
||||||
return -EIO;
|
|
||||||
}
|
|
||||||
} else if (!uptodate) {
|
|
||||||
err = btrfs_read_buffer(cur, gen,
|
|
||||||
parent_level - 1,&first_key);
|
|
||||||
if (err) {
|
|
||||||
free_extent_buffer(cur);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (search_start == 0)
|
if (search_start == 0)
|
||||||
search_start = last_block;
|
search_start = last_block;
|
||||||
|
|
||||||
btrfs_tree_lock(cur);
|
btrfs_tree_lock(cur);
|
||||||
btrfs_set_lock_blocking_write(cur);
|
|
||||||
err = __btrfs_cow_block(trans, root, cur, parent, i,
|
err = __btrfs_cow_block(trans, root, cur, parent, i,
|
||||||
&cur, search_start,
|
&cur, search_start,
|
||||||
min(16 * blocksize,
|
min(16 * blocksize,
|
||||||
|
@ -1723,9 +1683,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
|
||||||
oip = offset_in_page(offset);
|
oip = offset_in_page(offset);
|
||||||
|
|
||||||
if (oip + key_size <= PAGE_SIZE) {
|
if (oip + key_size <= PAGE_SIZE) {
|
||||||
const unsigned long idx = offset >> PAGE_SHIFT;
|
const unsigned long idx = get_eb_page_index(offset);
|
||||||
char *kaddr = page_address(eb->pages[idx]);
|
char *kaddr = page_address(eb->pages[idx]);
|
||||||
|
|
||||||
|
oip = get_eb_offset_in_page(eb, offset);
|
||||||
tmp = (struct btrfs_disk_key *)(kaddr + oip);
|
tmp = (struct btrfs_disk_key *)(kaddr + oip);
|
||||||
} else {
|
} else {
|
||||||
read_extent_buffer(eb, &unaligned, offset, key_size);
|
read_extent_buffer(eb, &unaligned, offset, key_size);
|
||||||
|
@ -1801,6 +1762,7 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
|
||||||
|
|
||||||
btrfs_node_key_to_cpu(parent, &first_key, slot);
|
btrfs_node_key_to_cpu(parent, &first_key, slot);
|
||||||
eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
|
eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
|
||||||
|
btrfs_header_owner(parent),
|
||||||
btrfs_node_ptr_generation(parent, slot),
|
btrfs_node_ptr_generation(parent, slot),
|
||||||
level - 1, &first_key);
|
level - 1, &first_key);
|
||||||
if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
|
if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
|
||||||
|
@ -1835,8 +1797,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
mid = path->nodes[level];
|
mid = path->nodes[level];
|
||||||
|
|
||||||
WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
|
WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK);
|
||||||
path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
|
|
||||||
WARN_ON(btrfs_header_generation(mid) != trans->transid);
|
WARN_ON(btrfs_header_generation(mid) != trans->transid);
|
||||||
|
|
||||||
orig_ptr = btrfs_node_blockptr(mid, orig_slot);
|
orig_ptr = btrfs_node_blockptr(mid, orig_slot);
|
||||||
|
@ -1865,7 +1826,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_tree_lock(child);
|
btrfs_tree_lock(child);
|
||||||
btrfs_set_lock_blocking_write(child);
|
|
||||||
ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
|
ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
|
||||||
BTRFS_NESTING_COW);
|
BTRFS_NESTING_COW);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -1904,7 +1864,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
if (left) {
|
if (left) {
|
||||||
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
|
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
|
||||||
btrfs_set_lock_blocking_write(left);
|
|
||||||
wret = btrfs_cow_block(trans, root, left,
|
wret = btrfs_cow_block(trans, root, left,
|
||||||
parent, pslot - 1, &left,
|
parent, pslot - 1, &left,
|
||||||
BTRFS_NESTING_LEFT_COW);
|
BTRFS_NESTING_LEFT_COW);
|
||||||
|
@ -1920,7 +1879,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
if (right) {
|
if (right) {
|
||||||
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
|
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
|
||||||
btrfs_set_lock_blocking_write(right);
|
|
||||||
wret = btrfs_cow_block(trans, root, right,
|
wret = btrfs_cow_block(trans, root, right,
|
||||||
parent, pslot + 1, &right,
|
parent, pslot + 1, &right,
|
||||||
BTRFS_NESTING_RIGHT_COW);
|
BTRFS_NESTING_RIGHT_COW);
|
||||||
|
@ -2084,7 +2042,6 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
|
||||||
u32 left_nr;
|
u32 left_nr;
|
||||||
|
|
||||||
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
|
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
|
||||||
btrfs_set_lock_blocking_write(left);
|
|
||||||
|
|
||||||
left_nr = btrfs_header_nritems(left);
|
left_nr = btrfs_header_nritems(left);
|
||||||
if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
|
if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
|
||||||
|
@ -2139,7 +2096,6 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
|
||||||
u32 right_nr;
|
u32 right_nr;
|
||||||
|
|
||||||
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
|
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
|
||||||
btrfs_set_lock_blocking_write(right);
|
|
||||||
|
|
||||||
right_nr = btrfs_header_nritems(right);
|
right_nr = btrfs_header_nritems(right);
|
||||||
if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
|
if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
|
||||||
|
@ -2243,7 +2199,7 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
|
||||||
search = btrfs_node_blockptr(node, nr);
|
search = btrfs_node_blockptr(node, nr);
|
||||||
if ((search <= target && target - search <= 65536) ||
|
if ((search <= target && target - search <= 65536) ||
|
||||||
(search > target && search - target <= 65536)) {
|
(search > target && search - target <= 65536)) {
|
||||||
readahead_tree_block(fs_info, search);
|
btrfs_readahead_node_child(node, nr);
|
||||||
nread += blocksize;
|
nread += blocksize;
|
||||||
}
|
}
|
||||||
nscan++;
|
nscan++;
|
||||||
|
@ -2252,16 +2208,11 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline void reada_for_balance(struct btrfs_fs_info *fs_info,
|
static noinline void reada_for_balance(struct btrfs_path *path, int level)
|
||||||
struct btrfs_path *path, int level)
|
|
||||||
{
|
{
|
||||||
|
struct extent_buffer *parent;
|
||||||
int slot;
|
int slot;
|
||||||
int nritems;
|
int nritems;
|
||||||
struct extent_buffer *parent;
|
|
||||||
struct extent_buffer *eb;
|
|
||||||
u64 gen;
|
|
||||||
u64 block1 = 0;
|
|
||||||
u64 block2 = 0;
|
|
||||||
|
|
||||||
parent = path->nodes[level + 1];
|
parent = path->nodes[level + 1];
|
||||||
if (!parent)
|
if (!parent)
|
||||||
|
@ -2270,32 +2221,10 @@ static noinline void reada_for_balance(struct btrfs_fs_info *fs_info,
|
||||||
nritems = btrfs_header_nritems(parent);
|
nritems = btrfs_header_nritems(parent);
|
||||||
slot = path->slots[level + 1];
|
slot = path->slots[level + 1];
|
||||||
|
|
||||||
if (slot > 0) {
|
if (slot > 0)
|
||||||
block1 = btrfs_node_blockptr(parent, slot - 1);
|
btrfs_readahead_node_child(parent, slot - 1);
|
||||||
gen = btrfs_node_ptr_generation(parent, slot - 1);
|
if (slot + 1 < nritems)
|
||||||
eb = find_extent_buffer(fs_info, block1);
|
btrfs_readahead_node_child(parent, slot + 1);
|
||||||
/*
|
|
||||||
* if we get -eagain from btrfs_buffer_uptodate, we
|
|
||||||
* don't want to return eagain here. That will loop
|
|
||||||
* forever
|
|
||||||
*/
|
|
||||||
if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
|
|
||||||
block1 = 0;
|
|
||||||
free_extent_buffer(eb);
|
|
||||||
}
|
|
||||||
if (slot + 1 < nritems) {
|
|
||||||
block2 = btrfs_node_blockptr(parent, slot + 1);
|
|
||||||
gen = btrfs_node_ptr_generation(parent, slot + 1);
|
|
||||||
eb = find_extent_buffer(fs_info, block2);
|
|
||||||
if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
|
|
||||||
block2 = 0;
|
|
||||||
free_extent_buffer(eb);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (block1)
|
|
||||||
readahead_tree_block(fs_info, block1);
|
|
||||||
if (block2)
|
|
||||||
readahead_tree_block(fs_info, block2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -2399,14 +2328,6 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* the pages were up to date, but we failed
|
|
||||||
* the generation number check. Do a full
|
|
||||||
* read for the generation number that is correct.
|
|
||||||
* We must do this without dropping locks so
|
|
||||||
* we can trust our generation number
|
|
||||||
*/
|
|
||||||
btrfs_set_path_blocking(p);
|
|
||||||
|
|
||||||
/* now we're allowed to do a blocking uptodate check */
|
/* now we're allowed to do a blocking uptodate check */
|
||||||
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
|
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
|
@ -2426,14 +2347,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||||
* out which blocks to read.
|
* out which blocks to read.
|
||||||
*/
|
*/
|
||||||
btrfs_unlock_up_safe(p, level + 1);
|
btrfs_unlock_up_safe(p, level + 1);
|
||||||
btrfs_set_path_blocking(p);
|
|
||||||
|
|
||||||
if (p->reada != READA_NONE)
|
if (p->reada != READA_NONE)
|
||||||
reada_for_search(fs_info, p, level, slot, key->objectid);
|
reada_for_search(fs_info, p, level, slot, key->objectid);
|
||||||
|
|
||||||
ret = -EAGAIN;
|
ret = -EAGAIN;
|
||||||
tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
|
tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
|
||||||
&first_key);
|
gen, parent_level - 1, &first_key);
|
||||||
if (!IS_ERR(tmp)) {
|
if (!IS_ERR(tmp)) {
|
||||||
/*
|
/*
|
||||||
* If the read above didn't mark this buffer up to date,
|
* If the read above didn't mark this buffer up to date,
|
||||||
|
@ -2468,58 +2388,42 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
|
||||||
int *write_lock_level)
|
int *write_lock_level)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
int ret;
|
int ret = 0;
|
||||||
|
|
||||||
if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
|
if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
|
||||||
BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
|
BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
|
||||||
int sret;
|
|
||||||
|
|
||||||
if (*write_lock_level < level + 1) {
|
if (*write_lock_level < level + 1) {
|
||||||
*write_lock_level = level + 1;
|
*write_lock_level = level + 1;
|
||||||
btrfs_release_path(p);
|
btrfs_release_path(p);
|
||||||
goto again;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_set_path_blocking(p);
|
reada_for_balance(p, level);
|
||||||
reada_for_balance(fs_info, p, level);
|
ret = split_node(trans, root, p, level);
|
||||||
sret = split_node(trans, root, p, level);
|
|
||||||
|
|
||||||
BUG_ON(sret > 0);
|
|
||||||
if (sret) {
|
|
||||||
ret = sret;
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
b = p->nodes[level];
|
b = p->nodes[level];
|
||||||
} else if (ins_len < 0 && btrfs_header_nritems(b) <
|
} else if (ins_len < 0 && btrfs_header_nritems(b) <
|
||||||
BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
|
BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
|
||||||
int sret;
|
|
||||||
|
|
||||||
if (*write_lock_level < level + 1) {
|
if (*write_lock_level < level + 1) {
|
||||||
*write_lock_level = level + 1;
|
*write_lock_level = level + 1;
|
||||||
btrfs_release_path(p);
|
btrfs_release_path(p);
|
||||||
goto again;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_set_path_blocking(p);
|
reada_for_balance(p, level);
|
||||||
reada_for_balance(fs_info, p, level);
|
ret = balance_level(trans, root, p, level);
|
||||||
sret = balance_level(trans, root, p, level);
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (sret) {
|
|
||||||
ret = sret;
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
b = p->nodes[level];
|
b = p->nodes[level];
|
||||||
if (!b) {
|
if (!b) {
|
||||||
btrfs_release_path(p);
|
btrfs_release_path(p);
|
||||||
goto again;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
BUG_ON(btrfs_header_nritems(b) == 1);
|
BUG_ON(btrfs_header_nritems(b) == 1);
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
|
|
||||||
again:
|
|
||||||
ret = -EAGAIN;
|
|
||||||
done:
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2616,7 +2520,7 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
|
||||||
* We don't know the level of the root node until we actually
|
* We don't know the level of the root node until we actually
|
||||||
* have it read locked
|
* have it read locked
|
||||||
*/
|
*/
|
||||||
b = __btrfs_read_lock_root_node(root, p->recurse);
|
b = btrfs_read_lock_root_node(root);
|
||||||
level = btrfs_header_level(b);
|
level = btrfs_header_level(b);
|
||||||
if (level > write_lock_level)
|
if (level > write_lock_level)
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -2752,7 +2656,6 @@ again:
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_set_path_blocking(p);
|
|
||||||
if (last_level)
|
if (last_level)
|
||||||
err = btrfs_cow_block(trans, root, b, NULL, 0,
|
err = btrfs_cow_block(trans, root, b, NULL, 0,
|
||||||
&b,
|
&b,
|
||||||
|
@ -2822,7 +2725,6 @@ cow_done:
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_set_path_blocking(p);
|
|
||||||
err = split_leaf(trans, root, key,
|
err = split_leaf(trans, root, key,
|
||||||
p, ins_len, ret == 0);
|
p, ins_len, ret == 0);
|
||||||
|
|
||||||
|
@ -2884,17 +2786,10 @@ cow_done:
|
||||||
if (!p->skip_locking) {
|
if (!p->skip_locking) {
|
||||||
level = btrfs_header_level(b);
|
level = btrfs_header_level(b);
|
||||||
if (level <= write_lock_level) {
|
if (level <= write_lock_level) {
|
||||||
if (!btrfs_try_tree_write_lock(b)) {
|
btrfs_tree_lock(b);
|
||||||
btrfs_set_path_blocking(p);
|
|
||||||
btrfs_tree_lock(b);
|
|
||||||
}
|
|
||||||
p->locks[level] = BTRFS_WRITE_LOCK;
|
p->locks[level] = BTRFS_WRITE_LOCK;
|
||||||
} else {
|
} else {
|
||||||
if (!btrfs_tree_read_lock_atomic(b)) {
|
btrfs_tree_read_lock(b);
|
||||||
btrfs_set_path_blocking(p);
|
|
||||||
__btrfs_tree_read_lock(b, BTRFS_NESTING_NORMAL,
|
|
||||||
p->recurse);
|
|
||||||
}
|
|
||||||
p->locks[level] = BTRFS_READ_LOCK;
|
p->locks[level] = BTRFS_READ_LOCK;
|
||||||
}
|
}
|
||||||
p->nodes[level] = b;
|
p->nodes[level] = b;
|
||||||
|
@ -2902,12 +2797,6 @@ cow_done:
|
||||||
}
|
}
|
||||||
ret = 1;
|
ret = 1;
|
||||||
done:
|
done:
|
||||||
/*
|
|
||||||
* we don't really know what they plan on doing with the path
|
|
||||||
* from here on, so for now just mark it as blocking
|
|
||||||
*/
|
|
||||||
if (!p->leave_spinning)
|
|
||||||
btrfs_set_path_blocking(p);
|
|
||||||
if (ret < 0 && !p->skip_release_on_error)
|
if (ret < 0 && !p->skip_release_on_error)
|
||||||
btrfs_release_path(p);
|
btrfs_release_path(p);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -2999,10 +2888,7 @@ again:
|
||||||
}
|
}
|
||||||
|
|
||||||
level = btrfs_header_level(b);
|
level = btrfs_header_level(b);
|
||||||
if (!btrfs_tree_read_lock_atomic(b)) {
|
btrfs_tree_read_lock(b);
|
||||||
btrfs_set_path_blocking(p);
|
|
||||||
btrfs_tree_read_lock(b);
|
|
||||||
}
|
|
||||||
b = tree_mod_log_rewind(fs_info, p, b, time_seq);
|
b = tree_mod_log_rewind(fs_info, p, b, time_seq);
|
||||||
if (!b) {
|
if (!b) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
|
@ -3013,8 +2899,6 @@ again:
|
||||||
}
|
}
|
||||||
ret = 1;
|
ret = 1;
|
||||||
done:
|
done:
|
||||||
if (!p->leave_spinning)
|
|
||||||
btrfs_set_path_blocking(p);
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
btrfs_release_path(p);
|
btrfs_release_path(p);
|
||||||
|
|
||||||
|
@ -3441,7 +3325,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
|
||||||
add_root_to_dirty_list(root);
|
add_root_to_dirty_list(root);
|
||||||
atomic_inc(&c->refs);
|
atomic_inc(&c->refs);
|
||||||
path->nodes[level] = c;
|
path->nodes[level] = c;
|
||||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
path->locks[level] = BTRFS_WRITE_LOCK;
|
||||||
path->slots[level] = 0;
|
path->slots[level] = 0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -3562,7 +3446,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
|
||||||
(c_nritems - mid) * sizeof(struct btrfs_key_ptr));
|
(c_nritems - mid) * sizeof(struct btrfs_key_ptr));
|
||||||
btrfs_set_header_nritems(split, c_nritems - mid);
|
btrfs_set_header_nritems(split, c_nritems - mid);
|
||||||
btrfs_set_header_nritems(c, mid);
|
btrfs_set_header_nritems(c, mid);
|
||||||
ret = 0;
|
|
||||||
|
|
||||||
btrfs_mark_buffer_dirty(c);
|
btrfs_mark_buffer_dirty(c);
|
||||||
btrfs_mark_buffer_dirty(split);
|
btrfs_mark_buffer_dirty(split);
|
||||||
|
@ -3580,7 +3463,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
|
||||||
btrfs_tree_unlock(split);
|
btrfs_tree_unlock(split);
|
||||||
free_extent_buffer(split);
|
free_extent_buffer(split);
|
||||||
}
|
}
|
||||||
return ret;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3814,7 +3697,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
|
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
|
||||||
btrfs_set_lock_blocking_write(right);
|
|
||||||
|
|
||||||
free_space = btrfs_leaf_free_space(right);
|
free_space = btrfs_leaf_free_space(right);
|
||||||
if (free_space < data_size)
|
if (free_space < data_size)
|
||||||
|
@ -4053,7 +3935,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
|
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
|
||||||
btrfs_set_lock_blocking_write(left);
|
|
||||||
|
|
||||||
free_space = btrfs_leaf_free_space(left);
|
free_space = btrfs_leaf_free_space(left);
|
||||||
if (free_space < data_size) {
|
if (free_space < data_size) {
|
||||||
|
@ -4448,7 +4329,6 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
ret = split_leaf(trans, root, &key, path, ins_len, 1);
|
ret = split_leaf(trans, root, &key, path, ins_len, 1);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -4478,8 +4358,6 @@ static noinline int split_item(struct btrfs_path *path,
|
||||||
leaf = path->nodes[0];
|
leaf = path->nodes[0];
|
||||||
BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
|
BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
|
||||||
|
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
|
|
||||||
item = btrfs_item_nr(path->slots[0]);
|
item = btrfs_item_nr(path->slots[0]);
|
||||||
orig_offset = btrfs_item_offset(leaf, item);
|
orig_offset = btrfs_item_offset(leaf, item);
|
||||||
item_size = btrfs_item_size(leaf, item);
|
item_size = btrfs_item_size(leaf, item);
|
||||||
|
@ -5055,7 +4933,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||||
if (leaf == root->node) {
|
if (leaf == root->node) {
|
||||||
btrfs_set_header_level(leaf, 0);
|
btrfs_set_header_level(leaf, 0);
|
||||||
} else {
|
} else {
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
btrfs_clean_tree_block(leaf);
|
btrfs_clean_tree_block(leaf);
|
||||||
btrfs_del_leaf(trans, root, path, leaf);
|
btrfs_del_leaf(trans, root, path, leaf);
|
||||||
}
|
}
|
||||||
|
@ -5077,7 +4954,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||||
slot = path->slots[1];
|
slot = path->slots[1];
|
||||||
atomic_inc(&leaf->refs);
|
atomic_inc(&leaf->refs);
|
||||||
|
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
wret = push_leaf_left(trans, root, path, 1, 1,
|
wret = push_leaf_left(trans, root, path, 1, 1,
|
||||||
1, (u32)-1);
|
1, (u32)-1);
|
||||||
if (wret < 0 && wret != -ENOSPC)
|
if (wret < 0 && wret != -ENOSPC)
|
||||||
|
@ -5248,7 +5124,6 @@ find_next_key:
|
||||||
*/
|
*/
|
||||||
if (slot >= nritems) {
|
if (slot >= nritems) {
|
||||||
path->slots[level] = slot;
|
path->slots[level] = slot;
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
sret = btrfs_find_next_key(root, path, min_key, level,
|
sret = btrfs_find_next_key(root, path, min_key, level,
|
||||||
min_trans);
|
min_trans);
|
||||||
if (sret == 0) {
|
if (sret == 0) {
|
||||||
|
@ -5265,7 +5140,6 @@ find_next_key:
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
cur = btrfs_read_node_slot(cur, slot);
|
cur = btrfs_read_node_slot(cur, slot);
|
||||||
if (IS_ERR(cur)) {
|
if (IS_ERR(cur)) {
|
||||||
ret = PTR_ERR(cur);
|
ret = PTR_ERR(cur);
|
||||||
|
@ -5282,7 +5156,6 @@ out:
|
||||||
path->keep_locks = keep_locks;
|
path->keep_locks = keep_locks;
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
btrfs_unlock_up_safe(path, path->lowest_level + 1);
|
btrfs_unlock_up_safe(path, path->lowest_level + 1);
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
memcpy(min_key, &found_key, sizeof(found_key));
|
memcpy(min_key, &found_key, sizeof(found_key));
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -5384,8 +5257,7 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
|
||||||
struct btrfs_key key;
|
struct btrfs_key key;
|
||||||
u32 nritems;
|
u32 nritems;
|
||||||
int ret;
|
int ret;
|
||||||
int old_spinning = path->leave_spinning;
|
int i;
|
||||||
int next_rw_lock = 0;
|
|
||||||
|
|
||||||
nritems = btrfs_header_nritems(path->nodes[0]);
|
nritems = btrfs_header_nritems(path->nodes[0]);
|
||||||
if (nritems == 0)
|
if (nritems == 0)
|
||||||
|
@ -5395,11 +5267,9 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
|
||||||
again:
|
again:
|
||||||
level = 1;
|
level = 1;
|
||||||
next = NULL;
|
next = NULL;
|
||||||
next_rw_lock = 0;
|
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
|
|
||||||
path->keep_locks = 1;
|
path->keep_locks = 1;
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
if (time_seq)
|
if (time_seq)
|
||||||
ret = btrfs_search_old_slot(root, &key, path, time_seq);
|
ret = btrfs_search_old_slot(root, &key, path, time_seq);
|
||||||
|
@ -5459,13 +5329,22 @@ again:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (next) {
|
|
||||||
btrfs_tree_unlock_rw(next, next_rw_lock);
|
/*
|
||||||
free_extent_buffer(next);
|
* Our current level is where we're going to start from, and to
|
||||||
|
* make sure lockdep doesn't complain we need to drop our locks
|
||||||
|
* and nodes from 0 to our current level.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < level; i++) {
|
||||||
|
if (path->locks[level]) {
|
||||||
|
btrfs_tree_read_unlock(path->nodes[i]);
|
||||||
|
path->locks[i] = 0;
|
||||||
|
}
|
||||||
|
free_extent_buffer(path->nodes[i]);
|
||||||
|
path->nodes[i] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
next = c;
|
next = c;
|
||||||
next_rw_lock = path->locks[level];
|
|
||||||
ret = read_block_for_search(root, path, &next, level,
|
ret = read_block_for_search(root, path, &next, level,
|
||||||
slot, &key);
|
slot, &key);
|
||||||
if (ret == -EAGAIN)
|
if (ret == -EAGAIN)
|
||||||
|
@ -5491,28 +5370,18 @@ again:
|
||||||
cond_resched();
|
cond_resched();
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
if (!ret) {
|
if (!ret)
|
||||||
btrfs_set_path_blocking(path);
|
btrfs_tree_read_lock(next);
|
||||||
__btrfs_tree_read_lock(next,
|
|
||||||
BTRFS_NESTING_RIGHT,
|
|
||||||
path->recurse);
|
|
||||||
}
|
|
||||||
next_rw_lock = BTRFS_READ_LOCK;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
path->slots[level] = slot;
|
path->slots[level] = slot;
|
||||||
while (1) {
|
while (1) {
|
||||||
level--;
|
level--;
|
||||||
c = path->nodes[level];
|
|
||||||
if (path->locks[level])
|
|
||||||
btrfs_tree_unlock_rw(c, path->locks[level]);
|
|
||||||
|
|
||||||
free_extent_buffer(c);
|
|
||||||
path->nodes[level] = next;
|
path->nodes[level] = next;
|
||||||
path->slots[level] = 0;
|
path->slots[level] = 0;
|
||||||
if (!path->skip_locking)
|
if (!path->skip_locking)
|
||||||
path->locks[level] = next_rw_lock;
|
path->locks[level] = BTRFS_READ_LOCK;
|
||||||
if (!level)
|
if (!level)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -5526,23 +5395,12 @@ again:
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!path->skip_locking) {
|
if (!path->skip_locking)
|
||||||
ret = btrfs_try_tree_read_lock(next);
|
btrfs_tree_read_lock(next);
|
||||||
if (!ret) {
|
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
__btrfs_tree_read_lock(next,
|
|
||||||
BTRFS_NESTING_RIGHT,
|
|
||||||
path->recurse);
|
|
||||||
}
|
|
||||||
next_rw_lock = BTRFS_READ_LOCK;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
ret = 0;
|
ret = 0;
|
||||||
done:
|
done:
|
||||||
unlock_up(path, 0, 1, 0, NULL);
|
unlock_up(path, 0, 1, 0, NULL);
|
||||||
path->leave_spinning = old_spinning;
|
|
||||||
if (!old_spinning)
|
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -5564,7 +5422,6 @@ int btrfs_previous_item(struct btrfs_root *root,
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
if (path->slots[0] == 0) {
|
if (path->slots[0] == 0) {
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
ret = btrfs_prev_leaf(root, path);
|
ret = btrfs_prev_leaf(root, path);
|
||||||
if (ret != 0)
|
if (ret != 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -5606,7 +5463,6 @@ int btrfs_previous_extent_item(struct btrfs_root *root,
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
if (path->slots[0] == 0) {
|
if (path->slots[0] == 0) {
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
ret = btrfs_prev_leaf(root, path);
|
ret = btrfs_prev_leaf(root, path);
|
||||||
if (ret != 0)
|
if (ret != 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
213
fs/btrfs/ctree.h
213
fs/btrfs/ctree.h
|
@ -27,6 +27,7 @@
|
||||||
#include <linux/dynamic_debug.h>
|
#include <linux/dynamic_debug.h>
|
||||||
#include <linux/refcount.h>
|
#include <linux/refcount.h>
|
||||||
#include <linux/crc32c.h>
|
#include <linux/crc32c.h>
|
||||||
|
#include <linux/iomap.h>
|
||||||
#include "extent-io-tree.h"
|
#include "extent-io-tree.h"
|
||||||
#include "extent_io.h"
|
#include "extent_io.h"
|
||||||
#include "extent_map.h"
|
#include "extent_map.h"
|
||||||
|
@ -65,12 +66,6 @@ struct btrfs_ref;
|
||||||
|
|
||||||
#define BTRFS_OLDEST_GENERATION 0ULL
|
#define BTRFS_OLDEST_GENERATION 0ULL
|
||||||
|
|
||||||
/*
|
|
||||||
* the max metadata block size. This limit is somewhat artificial,
|
|
||||||
* but the memmove costs go through the roof for larger blocks.
|
|
||||||
*/
|
|
||||||
#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we can actually store much bigger names, but lets not confuse the rest
|
* we can actually store much bigger names, but lets not confuse the rest
|
||||||
* of linux
|
* of linux
|
||||||
|
@ -369,11 +364,9 @@ struct btrfs_path {
|
||||||
unsigned int search_for_split:1;
|
unsigned int search_for_split:1;
|
||||||
unsigned int keep_locks:1;
|
unsigned int keep_locks:1;
|
||||||
unsigned int skip_locking:1;
|
unsigned int skip_locking:1;
|
||||||
unsigned int leave_spinning:1;
|
|
||||||
unsigned int search_commit_root:1;
|
unsigned int search_commit_root:1;
|
||||||
unsigned int need_commit_sem:1;
|
unsigned int need_commit_sem:1;
|
||||||
unsigned int skip_release_on_error:1;
|
unsigned int skip_release_on_error:1;
|
||||||
unsigned int recurse:1;
|
|
||||||
};
|
};
|
||||||
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
|
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
|
||||||
sizeof(struct btrfs_item))
|
sizeof(struct btrfs_item))
|
||||||
|
@ -468,10 +461,11 @@ struct btrfs_discard_ctl {
|
||||||
struct btrfs_block_group *block_group;
|
struct btrfs_block_group *block_group;
|
||||||
struct list_head discard_list[BTRFS_NR_DISCARD_LISTS];
|
struct list_head discard_list[BTRFS_NR_DISCARD_LISTS];
|
||||||
u64 prev_discard;
|
u64 prev_discard;
|
||||||
|
u64 prev_discard_time;
|
||||||
atomic_t discardable_extents;
|
atomic_t discardable_extents;
|
||||||
atomic64_t discardable_bytes;
|
atomic64_t discardable_bytes;
|
||||||
u64 max_discard_size;
|
u64 max_discard_size;
|
||||||
unsigned long delay;
|
u64 delay_ms;
|
||||||
u32 iops_limit;
|
u32 iops_limit;
|
||||||
u32 kbps_limit;
|
u32 kbps_limit;
|
||||||
u64 discard_extent_bytes;
|
u64 discard_extent_bytes;
|
||||||
|
@ -558,6 +552,9 @@ enum {
|
||||||
|
|
||||||
/* Indicate that the discard workqueue can service discards. */
|
/* Indicate that the discard workqueue can service discards. */
|
||||||
BTRFS_FS_DISCARD_RUNNING,
|
BTRFS_FS_DISCARD_RUNNING,
|
||||||
|
|
||||||
|
/* Indicate that we need to cleanup space cache v1 */
|
||||||
|
BTRFS_FS_CLEANUP_SPACE_CACHE_V1,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -911,6 +908,7 @@ struct btrfs_fs_info {
|
||||||
|
|
||||||
/* Extent buffer radix tree */
|
/* Extent buffer radix tree */
|
||||||
spinlock_t buffer_lock;
|
spinlock_t buffer_lock;
|
||||||
|
/* Entries are eb->start / sectorsize */
|
||||||
struct radix_tree_root buffer_radix;
|
struct radix_tree_root buffer_radix;
|
||||||
|
|
||||||
/* next backup root to be overwritten */
|
/* next backup root to be overwritten */
|
||||||
|
@ -933,6 +931,10 @@ struct btrfs_fs_info {
|
||||||
/* Cached block sizes */
|
/* Cached block sizes */
|
||||||
u32 nodesize;
|
u32 nodesize;
|
||||||
u32 sectorsize;
|
u32 sectorsize;
|
||||||
|
/* ilog2 of sectorsize, use to avoid 64bit division */
|
||||||
|
u32 sectorsize_bits;
|
||||||
|
u32 csum_size;
|
||||||
|
u32 csums_per_leaf;
|
||||||
u32 stripesize;
|
u32 stripesize;
|
||||||
|
|
||||||
/* Block groups and devices containing active swapfiles. */
|
/* Block groups and devices containing active swapfiles. */
|
||||||
|
@ -950,6 +952,18 @@ struct btrfs_fs_info {
|
||||||
/* Type of exclusive operation running */
|
/* Type of exclusive operation running */
|
||||||
unsigned long exclusive_operation;
|
unsigned long exclusive_operation;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Zone size > 0 when in ZONED mode, otherwise it's used for a check
|
||||||
|
* if the mode is enabled
|
||||||
|
*/
|
||||||
|
union {
|
||||||
|
u64 zone_size;
|
||||||
|
u64 zoned;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Max size to emit ZONE_APPEND write command */
|
||||||
|
u64 max_zone_append_size;
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||||
spinlock_t ref_verify_lock;
|
spinlock_t ref_verify_lock;
|
||||||
struct rb_root block_tree;
|
struct rb_root block_tree;
|
||||||
|
@ -1020,7 +1034,7 @@ enum {
|
||||||
BTRFS_ROOT_DEAD_RELOC_TREE,
|
BTRFS_ROOT_DEAD_RELOC_TREE,
|
||||||
/* Mark dead root stored on device whose cleanup needs to be resumed */
|
/* Mark dead root stored on device whose cleanup needs to be resumed */
|
||||||
BTRFS_ROOT_DEAD_TREE,
|
BTRFS_ROOT_DEAD_TREE,
|
||||||
/* The root has a log tree. Used only for subvolume roots. */
|
/* The root has a log tree. Used for subvolume roots and the tree root. */
|
||||||
BTRFS_ROOT_HAS_LOG_TREE,
|
BTRFS_ROOT_HAS_LOG_TREE,
|
||||||
/* Qgroup flushing is in progress */
|
/* Qgroup flushing is in progress */
|
||||||
BTRFS_ROOT_QGROUP_FLUSHING,
|
BTRFS_ROOT_QGROUP_FLUSHING,
|
||||||
|
@ -1059,15 +1073,6 @@ struct btrfs_root {
|
||||||
spinlock_t accounting_lock;
|
spinlock_t accounting_lock;
|
||||||
struct btrfs_block_rsv *block_rsv;
|
struct btrfs_block_rsv *block_rsv;
|
||||||
|
|
||||||
/* free ino cache stuff */
|
|
||||||
struct btrfs_free_space_ctl *free_ino_ctl;
|
|
||||||
enum btrfs_caching_type ino_cache_state;
|
|
||||||
spinlock_t ino_cache_lock;
|
|
||||||
wait_queue_head_t ino_cache_wait;
|
|
||||||
struct btrfs_free_space_ctl *free_ino_pinned;
|
|
||||||
u64 ino_cache_progress;
|
|
||||||
struct inode *ino_cache_inode;
|
|
||||||
|
|
||||||
struct mutex log_mutex;
|
struct mutex log_mutex;
|
||||||
wait_queue_head_t log_writer_wait;
|
wait_queue_head_t log_writer_wait;
|
||||||
wait_queue_head_t log_commit_wait[2];
|
wait_queue_head_t log_commit_wait[2];
|
||||||
|
@ -1226,6 +1231,63 @@ struct btrfs_replace_extent_info {
|
||||||
int insertions;
|
int insertions;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Arguments for btrfs_drop_extents() */
|
||||||
|
struct btrfs_drop_extents_args {
|
||||||
|
/* Input parameters */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If NULL, btrfs_drop_extents() will allocate and free its own path.
|
||||||
|
* If 'replace_extent' is true, this must not be NULL. Also the path
|
||||||
|
* is always released except if 'replace_extent' is true and
|
||||||
|
* btrfs_drop_extents() sets 'extent_inserted' to true, in which case
|
||||||
|
* the path is kept locked.
|
||||||
|
*/
|
||||||
|
struct btrfs_path *path;
|
||||||
|
/* Start offset of the range to drop extents from */
|
||||||
|
u64 start;
|
||||||
|
/* End (exclusive, last byte + 1) of the range to drop extents from */
|
||||||
|
u64 end;
|
||||||
|
/* If true drop all the extent maps in the range */
|
||||||
|
bool drop_cache;
|
||||||
|
/*
|
||||||
|
* If true it means we want to insert a new extent after dropping all
|
||||||
|
* the extents in the range. If this is true, the 'extent_item_size'
|
||||||
|
* parameter must be set as well and the 'extent_inserted' field will
|
||||||
|
* be set to true by btrfs_drop_extents() if it could insert the new
|
||||||
|
* extent.
|
||||||
|
* Note: when this is set to true the path must not be NULL.
|
||||||
|
*/
|
||||||
|
bool replace_extent;
|
||||||
|
/*
|
||||||
|
* Used if 'replace_extent' is true. Size of the file extent item to
|
||||||
|
* insert after dropping all existing extents in the range
|
||||||
|
*/
|
||||||
|
u32 extent_item_size;
|
||||||
|
|
||||||
|
/* Output parameters */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set to the minimum between the input parameter 'end' and the end
|
||||||
|
* (exclusive, last byte + 1) of the last dropped extent. This is always
|
||||||
|
* set even if btrfs_drop_extents() returns an error.
|
||||||
|
*/
|
||||||
|
u64 drop_end;
|
||||||
|
/*
|
||||||
|
* The number of allocated bytes found in the range. This can be smaller
|
||||||
|
* than the range's length when there are holes in the range.
|
||||||
|
*/
|
||||||
|
u64 bytes_found;
|
||||||
|
/*
|
||||||
|
* Only set if 'replace_extent' is true. Set to true if we were able
|
||||||
|
* to insert a replacement extent after dropping all extents in the
|
||||||
|
* range, otherwise set to false by btrfs_drop_extents().
|
||||||
|
* Also, if btrfs_drop_extents() has set this to true it means it
|
||||||
|
* returned with the path locked, otherwise if it has set this to
|
||||||
|
* false it has returned with the path released.
|
||||||
|
*/
|
||||||
|
bool extent_inserted;
|
||||||
|
};
|
||||||
|
|
||||||
struct btrfs_file_private {
|
struct btrfs_file_private {
|
||||||
void *filldir_buf;
|
void *filldir_buf;
|
||||||
};
|
};
|
||||||
|
@ -1284,7 +1346,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
|
||||||
#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
|
#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
|
||||||
#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
|
#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
|
||||||
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
|
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
|
||||||
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
|
/* bit 17 is free */
|
||||||
#define BTRFS_MOUNT_USEBACKUPROOT (1 << 18)
|
#define BTRFS_MOUNT_USEBACKUPROOT (1 << 18)
|
||||||
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
|
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
|
||||||
#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
|
#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
|
||||||
|
@ -1297,6 +1359,8 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
|
||||||
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
|
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
|
||||||
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
|
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
|
||||||
#define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29)
|
#define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29)
|
||||||
|
#define BTRFS_MOUNT_IGNOREBADROOTS (1 << 30)
|
||||||
|
#define BTRFS_MOUNT_IGNOREDATACSUMS (1 << 31)
|
||||||
|
|
||||||
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
|
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
|
||||||
#define BTRFS_DEFAULT_MAX_INLINE (2048)
|
#define BTRFS_DEFAULT_MAX_INLINE (2048)
|
||||||
|
@ -1329,9 +1393,7 @@ do { \
|
||||||
* transaction commit)
|
* transaction commit)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define BTRFS_PENDING_SET_INODE_MAP_CACHE (0)
|
#define BTRFS_PENDING_COMMIT (0)
|
||||||
#define BTRFS_PENDING_CLEAR_INODE_MAP_CACHE (1)
|
|
||||||
#define BTRFS_PENDING_COMMIT (2)
|
|
||||||
|
|
||||||
#define btrfs_test_pending(info, opt) \
|
#define btrfs_test_pending(info, opt) \
|
||||||
test_bit(BTRFS_PENDING_##opt, &(info)->pending_changes)
|
test_bit(BTRFS_PENDING_##opt, &(info)->pending_changes)
|
||||||
|
@ -1404,7 +1466,7 @@ struct btrfs_map_token {
|
||||||
};
|
};
|
||||||
|
|
||||||
#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
|
#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
|
||||||
((bytes) >> (fs_info)->sb->s_blocksize_bits)
|
((bytes) >> (fs_info)->sectorsize_bits)
|
||||||
|
|
||||||
static inline void btrfs_init_map_token(struct btrfs_map_token *token,
|
static inline void btrfs_init_map_token(struct btrfs_map_token *token,
|
||||||
struct extent_buffer *eb)
|
struct extent_buffer *eb)
|
||||||
|
@ -1489,13 +1551,14 @@ static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
|
||||||
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
|
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
|
||||||
static inline u##bits btrfs_##name(const struct extent_buffer *eb) \
|
static inline u##bits btrfs_##name(const struct extent_buffer *eb) \
|
||||||
{ \
|
{ \
|
||||||
const type *p = page_address(eb->pages[0]); \
|
const type *p = page_address(eb->pages[0]) + \
|
||||||
|
offset_in_page(eb->start); \
|
||||||
return get_unaligned_le##bits(&p->member); \
|
return get_unaligned_le##bits(&p->member); \
|
||||||
} \
|
} \
|
||||||
static inline void btrfs_set_##name(const struct extent_buffer *eb, \
|
static inline void btrfs_set_##name(const struct extent_buffer *eb, \
|
||||||
u##bits val) \
|
u##bits val) \
|
||||||
{ \
|
{ \
|
||||||
type *p = page_address(eb->pages[0]); \
|
type *p = page_address(eb->pages[0]) + offset_in_page(eb->start); \
|
||||||
put_unaligned_le##bits(val, &p->member); \
|
put_unaligned_le##bits(val, &p->member); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2085,6 +2148,7 @@ BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
|
||||||
BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item,
|
BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item,
|
||||||
generation, 64);
|
generation, 64);
|
||||||
BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
|
BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
|
||||||
|
BTRFS_SETGET_STACK_FUNCS(root_drop_level, struct btrfs_root_item, drop_level, 8);
|
||||||
BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
|
BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
|
||||||
BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
|
BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
|
||||||
BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
|
BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
|
||||||
|
@ -2517,7 +2581,17 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
|
||||||
enum btrfs_inline_ref_type is_data);
|
enum btrfs_inline_ref_type is_data);
|
||||||
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
|
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
|
||||||
|
|
||||||
u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes);
|
/*
|
||||||
|
* Take the number of bytes to be checksummmed and figure out how many leaves
|
||||||
|
* it would require to store the csums for that many bytes.
|
||||||
|
*/
|
||||||
|
static inline u64 btrfs_csum_bytes_to_leaves(
|
||||||
|
const struct btrfs_fs_info *fs_info, u64 csum_bytes)
|
||||||
|
{
|
||||||
|
const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits;
|
||||||
|
|
||||||
|
return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use this if we would be adding new items, as we could split nodes as we cow
|
* Use this if we would be adding new items, as we could split nodes as we cow
|
||||||
|
@ -2592,7 +2666,6 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||||
u64 start, u64 len, int delalloc);
|
u64 start, u64 len, int delalloc);
|
||||||
int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
|
int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
|
||||||
u64 len);
|
u64 len);
|
||||||
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
|
|
||||||
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
|
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
|
||||||
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_ref *generic_ref);
|
struct btrfs_ref *generic_ref);
|
||||||
|
@ -2939,8 +3012,7 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
|
||||||
struct btrfs_dio_private;
|
struct btrfs_dio_private;
|
||||||
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, u64 bytenr, u64 len);
|
struct btrfs_root *root, u64 bytenr, u64 len);
|
||||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst);
|
||||||
u64 offset, u8 *dst);
|
|
||||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
u64 objectid, u64 pos,
|
u64 objectid, u64 pos,
|
||||||
|
@ -2967,13 +3039,13 @@ int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||||
u64 len);
|
u64 len);
|
||||||
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
|
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||||
u64 len);
|
u64 len);
|
||||||
void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size);
|
void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size);
|
||||||
u64 btrfs_file_extent_end(const struct btrfs_path *path);
|
u64 btrfs_file_extent_end(const struct btrfs_path *path);
|
||||||
|
|
||||||
/* inode.c */
|
/* inode.c */
|
||||||
blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
|
blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
|
||||||
int mirror_num, unsigned long bio_flags);
|
int mirror_num, unsigned long bio_flags);
|
||||||
int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
|
int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
|
||||||
struct page *page, u64 start, u64 end, int mirror);
|
struct page *page, u64 start, u64 end, int mirror);
|
||||||
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
||||||
u64 start, u64 len);
|
u64 start, u64 len);
|
||||||
|
@ -2993,11 +3065,11 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
|
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
|
||||||
const char *name, int name_len, int add_backref, u64 index);
|
const char *name, int name_len, int add_backref, u64 index);
|
||||||
int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
|
int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
|
||||||
int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
|
int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
|
||||||
int front);
|
int front);
|
||||||
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
|
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct inode *inode, u64 new_size,
|
struct btrfs_inode *inode, u64 new_size,
|
||||||
u32 min_type);
|
u32 min_type);
|
||||||
|
|
||||||
int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
|
int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
|
||||||
|
@ -3037,14 +3109,13 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||||
struct page *page, size_t pg_offset,
|
struct page *page, size_t pg_offset,
|
||||||
u64 start, u64 end);
|
u64 start, u64 end);
|
||||||
int btrfs_update_inode(struct btrfs_trans_handle *trans,
|
int btrfs_update_inode(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root, struct btrfs_inode *inode);
|
||||||
struct inode *inode);
|
|
||||||
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
|
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, struct inode *inode);
|
struct btrfs_root *root, struct btrfs_inode *inode);
|
||||||
int btrfs_orphan_add(struct btrfs_trans_handle *trans,
|
int btrfs_orphan_add(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_inode *inode);
|
struct btrfs_inode *inode);
|
||||||
int btrfs_orphan_cleanup(struct btrfs_root *root);
|
int btrfs_orphan_cleanup(struct btrfs_root *root);
|
||||||
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
|
int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size);
|
||||||
void btrfs_add_delayed_iput(struct inode *inode);
|
void btrfs_add_delayed_iput(struct inode *inode);
|
||||||
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
|
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
|
||||||
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info);
|
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info);
|
||||||
|
@ -3062,7 +3133,18 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
|
||||||
void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
|
void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
|
||||||
u64 end, int uptodate);
|
u64 end, int uptodate);
|
||||||
extern const struct dentry_operations btrfs_dentry_operations;
|
extern const struct dentry_operations btrfs_dentry_operations;
|
||||||
ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
|
extern const struct iomap_ops btrfs_dio_iomap_ops;
|
||||||
|
extern const struct iomap_dio_ops btrfs_dio_ops;
|
||||||
|
|
||||||
|
/* Inode locking type flags, by default the exclusive lock is taken */
|
||||||
|
#define BTRFS_ILOCK_SHARED (1U << 0)
|
||||||
|
#define BTRFS_ILOCK_TRY (1U << 1)
|
||||||
|
|
||||||
|
int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags);
|
||||||
|
void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags);
|
||||||
|
void btrfs_update_inode_bytes(struct btrfs_inode *inode,
|
||||||
|
const u64 add_bytes,
|
||||||
|
const u64 del_bytes);
|
||||||
|
|
||||||
/* ioctl.c */
|
/* ioctl.c */
|
||||||
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||||
|
@ -3092,16 +3174,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
|
||||||
void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
|
void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
|
||||||
int skip_pinned);
|
int skip_pinned);
|
||||||
extern const struct file_operations btrfs_file_operations;
|
extern const struct file_operations btrfs_file_operations;
|
||||||
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
|
||||||
struct btrfs_root *root, struct btrfs_inode *inode,
|
|
||||||
struct btrfs_path *path, u64 start, u64 end,
|
|
||||||
u64 *drop_end, int drop_cache,
|
|
||||||
int replace_extent,
|
|
||||||
u32 extent_item_size,
|
|
||||||
int *key_inserted);
|
|
||||||
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, struct inode *inode, u64 start,
|
struct btrfs_root *root, struct btrfs_inode *inode,
|
||||||
u64 end, int drop_cache);
|
struct btrfs_drop_extents_args *args);
|
||||||
int btrfs_replace_file_extents(struct inode *inode, struct btrfs_path *path,
|
int btrfs_replace_file_extents(struct inode *inode, struct btrfs_path *path,
|
||||||
const u64 start, const u64 end,
|
const u64 start, const u64 end,
|
||||||
struct btrfs_replace_extent_info *extent_info,
|
struct btrfs_replace_extent_info *extent_info,
|
||||||
|
@ -3111,7 +3186,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
|
||||||
int btrfs_release_file(struct inode *inode, struct file *file);
|
int btrfs_release_file(struct inode *inode, struct file *file);
|
||||||
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
||||||
size_t num_pages, loff_t pos, size_t write_bytes,
|
size_t num_pages, loff_t pos, size_t write_bytes,
|
||||||
struct extent_state **cached);
|
struct extent_state **cached, bool noreserve);
|
||||||
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
|
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
|
||||||
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
||||||
size_t *write_bytes);
|
size_t *write_bytes);
|
||||||
|
@ -3290,6 +3365,39 @@ static inline void assertfail(const char *expr, const char* file, int line) { }
|
||||||
#define ASSERT(expr) (void)(expr)
|
#define ASSERT(expr) (void)(expr)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the correct offset inside the page of extent buffer.
|
||||||
|
*
|
||||||
|
* @eb: target extent buffer
|
||||||
|
* @start: offset inside the extent buffer
|
||||||
|
*
|
||||||
|
* Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases.
|
||||||
|
*/
|
||||||
|
static inline size_t get_eb_offset_in_page(const struct extent_buffer *eb,
|
||||||
|
unsigned long offset)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* For sectorsize == PAGE_SIZE case, eb->start will always be aligned
|
||||||
|
* to PAGE_SIZE, thus adding it won't cause any difference.
|
||||||
|
*
|
||||||
|
* For sectorsize < PAGE_SIZE, we must only read the data that belongs
|
||||||
|
* to the eb, thus we have to take the eb->start into consideration.
|
||||||
|
*/
|
||||||
|
return offset_in_page(offset + eb->start);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned long get_eb_page_index(unsigned long offset)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* For sectorsize == PAGE_SIZE case, plain >> PAGE_SHIFT is enough.
|
||||||
|
*
|
||||||
|
* For sectorsize < PAGE_SIZE case, we only support 64K PAGE_SIZE,
|
||||||
|
* and have ensured that all tree blocks are contained in one page,
|
||||||
|
* thus we always get index == 0.
|
||||||
|
*/
|
||||||
|
return offset >> PAGE_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use that for functions that are conditionally exported for sanity tests but
|
* Use that for functions that are conditionally exported for sanity tests but
|
||||||
* otherwise static
|
* otherwise static
|
||||||
|
@ -3599,4 +3707,9 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
|
||||||
|
{
|
||||||
|
return fs_info->zoned != 0;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -740,13 +740,6 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* we need allocate some memory space, but it might cause the task
|
|
||||||
* to sleep, so we set all locked nodes in the path to blocking locks
|
|
||||||
* first.
|
|
||||||
*/
|
|
||||||
btrfs_set_path_blocking(path);
|
|
||||||
|
|
||||||
keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
|
keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
|
||||||
if (!keys) {
|
if (!keys) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
|
@ -1154,7 +1147,6 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
block_rsv = trans->block_rsv;
|
block_rsv = trans->block_rsv;
|
||||||
trans->block_rsv = &fs_info->delayed_block_rsv;
|
trans->block_rsv = &fs_info->delayed_block_rsv;
|
||||||
|
@ -1219,7 +1211,6 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
|
||||||
btrfs_release_delayed_node(delayed_node);
|
btrfs_release_delayed_node(delayed_node);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
block_rsv = trans->block_rsv;
|
block_rsv = trans->block_rsv;
|
||||||
trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
|
trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
|
||||||
|
@ -1264,7 +1255,6 @@ int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto trans_out;
|
goto trans_out;
|
||||||
}
|
}
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
block_rsv = trans->block_rsv;
|
block_rsv = trans->block_rsv;
|
||||||
trans->block_rsv = &fs_info->delayed_block_rsv;
|
trans->block_rsv = &fs_info->delayed_block_rsv;
|
||||||
|
@ -1333,7 +1323,6 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
|
||||||
if (!delayed_node)
|
if (!delayed_node)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
root = delayed_node->root;
|
root = delayed_node->root;
|
||||||
|
|
||||||
trans = btrfs_join_transaction(root);
|
trans = btrfs_join_transaction(root);
|
||||||
|
@ -1826,27 +1815,29 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
|
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, struct inode *inode)
|
struct btrfs_root *root,
|
||||||
|
struct btrfs_inode *inode)
|
||||||
{
|
{
|
||||||
struct btrfs_delayed_node *delayed_node;
|
struct btrfs_delayed_node *delayed_node;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
|
delayed_node = btrfs_get_or_create_delayed_node(inode);
|
||||||
if (IS_ERR(delayed_node))
|
if (IS_ERR(delayed_node))
|
||||||
return PTR_ERR(delayed_node);
|
return PTR_ERR(delayed_node);
|
||||||
|
|
||||||
mutex_lock(&delayed_node->mutex);
|
mutex_lock(&delayed_node->mutex);
|
||||||
if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
|
if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
|
||||||
fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
|
fill_stack_inode_item(trans, &delayed_node->inode_item,
|
||||||
|
&inode->vfs_inode);
|
||||||
goto release_node;
|
goto release_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
|
ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
|
||||||
delayed_node);
|
delayed_node);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto release_node;
|
goto release_node;
|
||||||
|
|
||||||
fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
|
fill_stack_inode_item(trans, &delayed_node->inode_item, &inode->vfs_inode);
|
||||||
set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
|
set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
|
||||||
delayed_node->count++;
|
delayed_node->count++;
|
||||||
atomic_inc(&root->fs_info->delayed_root->items);
|
atomic_inc(&root->fs_info->delayed_root->items);
|
||||||
|
|
|
@ -110,7 +110,8 @@ int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode);
|
||||||
|
|
||||||
|
|
||||||
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
|
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, struct inode *inode);
|
struct btrfs_root *root,
|
||||||
|
struct btrfs_inode *inode);
|
||||||
int btrfs_fill_inode(struct inode *inode, u32 *rdev);
|
int btrfs_fill_inode(struct inode *inode, u32 *rdev);
|
||||||
int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode);
|
int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode);
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "rcu-string.h"
|
#include "rcu-string.h"
|
||||||
#include "dev-replace.h"
|
#include "dev-replace.h"
|
||||||
#include "sysfs.h"
|
#include "sysfs.h"
|
||||||
|
#include "zoned.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Device replace overview
|
* Device replace overview
|
||||||
|
@ -96,7 +97,7 @@ no_valid_dev_replace_entry_found:
|
||||||
* a replace target, fail the mount.
|
* a replace target, fail the mount.
|
||||||
*/
|
*/
|
||||||
if (btrfs_find_device(fs_info->fs_devices,
|
if (btrfs_find_device(fs_info->fs_devices,
|
||||||
BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) {
|
BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
"found replace target device without a valid replace item");
|
"found replace target device without a valid replace item");
|
||||||
ret = -EUCLEAN;
|
ret = -EUCLEAN;
|
||||||
|
@ -159,7 +160,7 @@ no_valid_dev_replace_entry_found:
|
||||||
* replace target, fail the mount.
|
* replace target, fail the mount.
|
||||||
*/
|
*/
|
||||||
if (btrfs_find_device(fs_info->fs_devices,
|
if (btrfs_find_device(fs_info->fs_devices,
|
||||||
BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) {
|
BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
"replace devid present without an active replace item");
|
"replace devid present without an active replace item");
|
||||||
ret = -EUCLEAN;
|
ret = -EUCLEAN;
|
||||||
|
@ -171,10 +172,10 @@ no_valid_dev_replace_entry_found:
|
||||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
|
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
|
||||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
|
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
|
||||||
dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices,
|
dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices,
|
||||||
src_devid, NULL, NULL, true);
|
src_devid, NULL, NULL);
|
||||||
dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices,
|
dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices,
|
||||||
BTRFS_DEV_REPLACE_DEVID,
|
BTRFS_DEV_REPLACE_DEVID,
|
||||||
NULL, NULL, true);
|
NULL, NULL);
|
||||||
/*
|
/*
|
||||||
* allow 'btrfs dev replace_cancel' if src/tgt device is
|
* allow 'btrfs dev replace_cancel' if src/tgt device is
|
||||||
* missing
|
* missing
|
||||||
|
@ -259,6 +260,13 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||||
return PTR_ERR(bdev);
|
return PTR_ERR(bdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!btrfs_check_device_zone_type(fs_info, bdev)) {
|
||||||
|
btrfs_err(fs_info,
|
||||||
|
"dev-replace: zoned type of target device mismatch with filesystem");
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
sync_blockdev(bdev);
|
sync_blockdev(bdev);
|
||||||
|
|
||||||
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
|
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
|
||||||
|
@ -313,6 +321,10 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||||
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
|
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
|
||||||
device->fs_devices = fs_info->fs_devices;
|
device->fs_devices = fs_info->fs_devices;
|
||||||
|
|
||||||
|
ret = btrfs_get_dev_zone_info(device);
|
||||||
|
if (ret)
|
||||||
|
goto error;
|
||||||
|
|
||||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||||
list_add(&device->dev_list, &fs_info->fs_devices->devices);
|
list_add(&device->dev_list, &fs_info->fs_devices->devices);
|
||||||
fs_info->fs_devices->num_devices++;
|
fs_info->fs_devices->num_devices++;
|
||||||
|
|
|
@ -127,7 +127,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
btrfs_cpu_key_to_disk(&disk_key, location);
|
btrfs_cpu_key_to_disk(&disk_key, location);
|
||||||
|
|
||||||
|
|
|
@ -355,7 +355,7 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
|
||||||
block_group = find_next_block_group(discard_ctl, now);
|
block_group = find_next_block_group(discard_ctl, now);
|
||||||
if (block_group) {
|
if (block_group) {
|
||||||
unsigned long delay = discard_ctl->delay;
|
u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
|
||||||
u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
|
u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -366,9 +366,9 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||||
if (kbps_limit && discard_ctl->prev_discard) {
|
if (kbps_limit && discard_ctl->prev_discard) {
|
||||||
u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
|
u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
|
||||||
u64 bps_delay = div64_u64(discard_ctl->prev_discard *
|
u64 bps_delay = div64_u64(discard_ctl->prev_discard *
|
||||||
MSEC_PER_SEC, bps_limit);
|
NSEC_PER_SEC, bps_limit);
|
||||||
|
|
||||||
delay = max(delay, msecs_to_jiffies(bps_delay));
|
delay = max(delay, bps_delay);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -378,11 +378,20 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||||
if (now < block_group->discard_eligible_time) {
|
if (now < block_group->discard_eligible_time) {
|
||||||
u64 bg_timeout = block_group->discard_eligible_time - now;
|
u64 bg_timeout = block_group->discard_eligible_time - now;
|
||||||
|
|
||||||
delay = max(delay, nsecs_to_jiffies(bg_timeout));
|
delay = max(delay, bg_timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (override && discard_ctl->prev_discard) {
|
||||||
|
u64 elapsed = now - discard_ctl->prev_discard_time;
|
||||||
|
|
||||||
|
if (delay > elapsed)
|
||||||
|
delay -= elapsed;
|
||||||
|
else
|
||||||
|
delay = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
mod_delayed_work(discard_ctl->discard_workers,
|
mod_delayed_work(discard_ctl->discard_workers,
|
||||||
&discard_ctl->work, delay);
|
&discard_ctl->work, nsecs_to_jiffies(delay));
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
spin_unlock(&discard_ctl->lock);
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
@ -465,7 +474,12 @@ static void btrfs_discard_workfn(struct work_struct *work)
|
||||||
discard_ctl->discard_extent_bytes += trimmed;
|
discard_ctl->discard_extent_bytes += trimmed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Updated without locks as this is inside the workfn and nothing else
|
||||||
|
* is reading the values
|
||||||
|
*/
|
||||||
discard_ctl->prev_discard = trimmed;
|
discard_ctl->prev_discard = trimmed;
|
||||||
|
discard_ctl->prev_discard_time = ktime_get_ns();
|
||||||
|
|
||||||
/* Determine next steps for a block_group */
|
/* Determine next steps for a block_group */
|
||||||
if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
|
if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
|
||||||
|
@ -519,7 +533,6 @@ void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
|
||||||
s64 discardable_bytes;
|
s64 discardable_bytes;
|
||||||
u32 iops_limit;
|
u32 iops_limit;
|
||||||
unsigned long delay;
|
unsigned long delay;
|
||||||
unsigned long lower_limit = BTRFS_DISCARD_MIN_DELAY_MSEC;
|
|
||||||
|
|
||||||
discardable_extents = atomic_read(&discard_ctl->discardable_extents);
|
discardable_extents = atomic_read(&discard_ctl->discardable_extents);
|
||||||
if (!discardable_extents)
|
if (!discardable_extents)
|
||||||
|
@ -550,12 +563,13 @@ void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
|
||||||
|
|
||||||
iops_limit = READ_ONCE(discard_ctl->iops_limit);
|
iops_limit = READ_ONCE(discard_ctl->iops_limit);
|
||||||
if (iops_limit)
|
if (iops_limit)
|
||||||
lower_limit = max_t(unsigned long, lower_limit,
|
delay = MSEC_PER_SEC / iops_limit;
|
||||||
MSEC_PER_SEC / iops_limit);
|
else
|
||||||
|
delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
|
||||||
|
|
||||||
delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
|
delay = clamp(delay, BTRFS_DISCARD_MIN_DELAY_MSEC,
|
||||||
delay = clamp(delay, lower_limit, BTRFS_DISCARD_MAX_DELAY_MSEC);
|
BTRFS_DISCARD_MAX_DELAY_MSEC);
|
||||||
discard_ctl->delay = msecs_to_jiffies(delay);
|
discard_ctl->delay_ms = delay;
|
||||||
|
|
||||||
spin_unlock(&discard_ctl->lock);
|
spin_unlock(&discard_ctl->lock);
|
||||||
}
|
}
|
||||||
|
@ -563,15 +577,14 @@ void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
|
||||||
/**
|
/**
|
||||||
* btrfs_discard_update_discardable - propagate discard counters
|
* btrfs_discard_update_discardable - propagate discard counters
|
||||||
* @block_group: block_group of interest
|
* @block_group: block_group of interest
|
||||||
* @ctl: free_space_ctl of @block_group
|
|
||||||
*
|
*
|
||||||
* This propagates deltas of counters up to the discard_ctl. It maintains a
|
* This propagates deltas of counters up to the discard_ctl. It maintains a
|
||||||
* current counter and a previous counter passing the delta up to the global
|
* current counter and a previous counter passing the delta up to the global
|
||||||
* stat. Then the current counter value becomes the previous counter value.
|
* stat. Then the current counter value becomes the previous counter value.
|
||||||
*/
|
*/
|
||||||
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
|
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
|
||||||
struct btrfs_free_space_ctl *ctl)
|
|
||||||
{
|
{
|
||||||
|
struct btrfs_free_space_ctl *ctl;
|
||||||
struct btrfs_discard_ctl *discard_ctl;
|
struct btrfs_discard_ctl *discard_ctl;
|
||||||
s32 extents_delta;
|
s32 extents_delta;
|
||||||
s64 bytes_delta;
|
s64 bytes_delta;
|
||||||
|
@ -581,8 +594,10 @@ void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
|
||||||
!btrfs_is_block_group_data_only(block_group))
|
!btrfs_is_block_group_data_only(block_group))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
ctl = block_group->free_space_ctl;
|
||||||
discard_ctl = &block_group->fs_info->discard_ctl;
|
discard_ctl = &block_group->fs_info->discard_ctl;
|
||||||
|
|
||||||
|
lockdep_assert_held(&ctl->tree_lock);
|
||||||
extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
|
extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
|
||||||
ctl->discardable_extents[BTRFS_STAT_PREV];
|
ctl->discardable_extents[BTRFS_STAT_PREV];
|
||||||
if (extents_delta) {
|
if (extents_delta) {
|
||||||
|
@ -684,10 +699,11 @@ void btrfs_discard_init(struct btrfs_fs_info *fs_info)
|
||||||
INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
|
INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
|
||||||
|
|
||||||
discard_ctl->prev_discard = 0;
|
discard_ctl->prev_discard = 0;
|
||||||
|
discard_ctl->prev_discard_time = 0;
|
||||||
atomic_set(&discard_ctl->discardable_extents, 0);
|
atomic_set(&discard_ctl->discardable_extents, 0);
|
||||||
atomic64_set(&discard_ctl->discardable_bytes, 0);
|
atomic64_set(&discard_ctl->discardable_bytes, 0);
|
||||||
discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
|
discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
|
||||||
discard_ctl->delay = BTRFS_DISCARD_MAX_DELAY_MSEC;
|
discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
|
||||||
discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
|
discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
|
||||||
discard_ctl->kbps_limit = 0;
|
discard_ctl->kbps_limit = 0;
|
||||||
discard_ctl->discard_extent_bytes = 0;
|
discard_ctl->discard_extent_bytes = 0;
|
||||||
|
|
|
@ -28,8 +28,7 @@ bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl);
|
||||||
|
|
||||||
/* Update operations */
|
/* Update operations */
|
||||||
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl);
|
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl);
|
||||||
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
|
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group);
|
||||||
struct btrfs_free_space_ctl *ctl);
|
|
||||||
|
|
||||||
/* Setup/cleanup operations */
|
/* Setup/cleanup operations */
|
||||||
void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info);
|
void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info);
|
||||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -43,13 +43,15 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
|
||||||
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
||||||
struct btrfs_key *first_key, u64 parent_transid);
|
struct btrfs_key *first_key, u64 parent_transid);
|
||||||
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||||
u64 parent_transid, int level,
|
u64 owner_root, u64 parent_transid,
|
||||||
struct btrfs_key *first_key);
|
int level, struct btrfs_key *first_key);
|
||||||
void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr);
|
|
||||||
struct extent_buffer *btrfs_find_create_tree_block(
|
struct extent_buffer *btrfs_find_create_tree_block(
|
||||||
struct btrfs_fs_info *fs_info,
|
struct btrfs_fs_info *fs_info,
|
||||||
u64 bytenr);
|
u64 bytenr, u64 owner_root,
|
||||||
|
int level);
|
||||||
void btrfs_clean_tree_block(struct extent_buffer *buf);
|
void btrfs_clean_tree_block(struct extent_buffer *buf);
|
||||||
|
void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info);
|
||||||
|
int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info);
|
||||||
int __cold open_ctree(struct super_block *sb,
|
int __cold open_ctree(struct super_block *sb,
|
||||||
struct btrfs_fs_devices *fs_devices,
|
struct btrfs_fs_devices *fs_devices,
|
||||||
char *options);
|
char *options);
|
||||||
|
@ -79,7 +81,7 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
|
||||||
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
|
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
|
||||||
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_root *root);
|
struct btrfs_root *root);
|
||||||
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio, u64 phy_offset,
|
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio,
|
||||||
struct page *page, u64 start, u64 end,
|
struct page *page, u64 start, u64 end,
|
||||||
int mirror);
|
int mirror);
|
||||||
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
|
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
|
||||||
|
@ -112,10 +114,10 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
|
||||||
struct btrfs_key *first_key);
|
struct btrfs_key *first_key);
|
||||||
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
||||||
enum btrfs_wq_endio_type metadata);
|
enum btrfs_wq_endio_type metadata);
|
||||||
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
|
||||||
int mirror_num, unsigned long bio_flags,
|
int mirror_num, unsigned long bio_flags,
|
||||||
u64 bio_offset, void *private_data,
|
u64 dio_file_offset,
|
||||||
extent_submit_bio_start_t *submit_bio_start);
|
extent_submit_bio_start_t *submit_bio_start);
|
||||||
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
|
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
|
||||||
int mirror_num);
|
int mirror_num);
|
||||||
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
|
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
|
||||||
|
@ -131,16 +133,15 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
||||||
int btree_lock_page_hook(struct page *page, void *data,
|
int btree_lock_page_hook(struct page *page, void *data,
|
||||||
void (*flush_fn)(void *));
|
void (*flush_fn)(void *));
|
||||||
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
||||||
|
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
|
||||||
|
int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid);
|
||||||
int __init btrfs_end_io_wq_init(void);
|
int __init btrfs_end_io_wq_init(void);
|
||||||
void __cold btrfs_end_io_wq_exit(void);
|
void __cold btrfs_end_io_wq_exit(void);
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||||
void btrfs_init_lockdep(void);
|
|
||||||
void btrfs_set_buffer_lockdep_class(u64 objectid,
|
void btrfs_set_buffer_lockdep_class(u64 objectid,
|
||||||
struct extent_buffer *eb, int level);
|
struct extent_buffer *eb, int level);
|
||||||
#else
|
#else
|
||||||
static inline void btrfs_init_lockdep(void)
|
|
||||||
{ }
|
|
||||||
static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
|
static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
|
||||||
struct extent_buffer *eb, int level)
|
struct extent_buffer *eb, int level)
|
||||||
{
|
{
|
||||||
|
|
|
@ -222,7 +222,6 @@ static int btrfs_get_name(struct dentry *parent, char *name,
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
if (ino == BTRFS_FIRST_FREE_OBJECTID) {
|
if (ino == BTRFS_FIRST_FREE_OBJECTID) {
|
||||||
key.objectid = BTRFS_I(inode)->root->root_key.objectid;
|
key.objectid = BTRFS_I(inode)->root->root_key.objectid;
|
||||||
|
|
|
@ -21,10 +21,24 @@ struct io_failure_record;
|
||||||
#define EXTENT_NORESERVE (1U << 11)
|
#define EXTENT_NORESERVE (1U << 11)
|
||||||
#define EXTENT_QGROUP_RESERVED (1U << 12)
|
#define EXTENT_QGROUP_RESERVED (1U << 12)
|
||||||
#define EXTENT_CLEAR_DATA_RESV (1U << 13)
|
#define EXTENT_CLEAR_DATA_RESV (1U << 13)
|
||||||
|
/*
|
||||||
|
* Must be cleared only during ordered extent completion or on error paths if we
|
||||||
|
* did not manage to submit bios and create the ordered extents for the range.
|
||||||
|
* Should not be cleared during page release and page invalidation (if there is
|
||||||
|
* an ordered extent in flight), that is left for the ordered extent completion.
|
||||||
|
*/
|
||||||
#define EXTENT_DELALLOC_NEW (1U << 14)
|
#define EXTENT_DELALLOC_NEW (1U << 14)
|
||||||
|
/*
|
||||||
|
* When an ordered extent successfully completes for a region marked as a new
|
||||||
|
* delalloc range, use this flag when clearing a new delalloc range to indicate
|
||||||
|
* that the VFS' inode number of bytes should be incremented and the inode's new
|
||||||
|
* delalloc bytes decremented, in an atomic way to prevent races with stat(2).
|
||||||
|
*/
|
||||||
|
#define EXTENT_ADD_INODE_BYTES (1U << 15)
|
||||||
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
|
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
|
||||||
EXTENT_CLEAR_DATA_RESV)
|
EXTENT_CLEAR_DATA_RESV)
|
||||||
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
|
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | \
|
||||||
|
EXTENT_ADD_INODE_BYTES)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Redefined bits above which are used only in the device allocation tree,
|
* Redefined bits above which are used only in the device allocation tree,
|
||||||
|
@ -73,7 +87,7 @@ struct extent_state {
|
||||||
/* ADD NEW ELEMENTS AFTER THIS */
|
/* ADD NEW ELEMENTS AFTER THIS */
|
||||||
wait_queue_head_t wq;
|
wait_queue_head_t wq;
|
||||||
refcount_t refs;
|
refcount_t refs;
|
||||||
unsigned state;
|
u32 state;
|
||||||
|
|
||||||
struct io_failure_record *failrec;
|
struct io_failure_record *failrec;
|
||||||
|
|
||||||
|
@ -105,19 +119,18 @@ void __cold extent_io_exit(void);
|
||||||
|
|
||||||
u64 count_range_bits(struct extent_io_tree *tree,
|
u64 count_range_bits(struct extent_io_tree *tree,
|
||||||
u64 *start, u64 search_end,
|
u64 *start, u64 search_end,
|
||||||
u64 max_bytes, unsigned bits, int contig);
|
u64 max_bytes, u32 bits, int contig);
|
||||||
|
|
||||||
void free_extent_state(struct extent_state *state);
|
void free_extent_state(struct extent_state *state);
|
||||||
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||||
unsigned bits, int filled,
|
u32 bits, int filled, struct extent_state *cached_state);
|
||||||
struct extent_state *cached_state);
|
|
||||||
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||||
unsigned bits, struct extent_changeset *changeset);
|
u32 bits, struct extent_changeset *changeset);
|
||||||
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||||
unsigned bits, int wake, int delete,
|
u32 bits, int wake, int delete,
|
||||||
struct extent_state **cached);
|
struct extent_state **cached);
|
||||||
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||||
unsigned bits, int wake, int delete,
|
u32 bits, int wake, int delete,
|
||||||
struct extent_state **cached, gfp_t mask,
|
struct extent_state **cached, gfp_t mask,
|
||||||
struct extent_changeset *changeset);
|
struct extent_changeset *changeset);
|
||||||
|
|
||||||
|
@ -141,7 +154,7 @@ static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
|
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
|
||||||
u64 end, unsigned bits)
|
u64 end, u32 bits)
|
||||||
{
|
{
|
||||||
int wake = 0;
|
int wake = 0;
|
||||||
|
|
||||||
|
@ -152,17 +165,19 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
|
||||||
}
|
}
|
||||||
|
|
||||||
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||||
unsigned bits, struct extent_changeset *changeset);
|
u32 bits, struct extent_changeset *changeset);
|
||||||
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||||
unsigned bits, u64 *failed_start,
|
u32 bits, unsigned exclusive_bits, u64 *failed_start,
|
||||||
struct extent_state **cached_state, gfp_t mask);
|
struct extent_state **cached_state, gfp_t mask,
|
||||||
|
struct extent_changeset *changeset);
|
||||||
int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
|
int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
|
||||||
unsigned bits);
|
u32 bits);
|
||||||
|
|
||||||
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
|
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
|
||||||
u64 end, unsigned bits)
|
u64 end, u32 bits)
|
||||||
{
|
{
|
||||||
return set_extent_bit(tree, start, end, bits, NULL, NULL, GFP_NOFS);
|
return set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
|
||||||
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
|
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
|
||||||
|
@ -175,8 +190,8 @@ static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
|
||||||
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
|
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
|
||||||
u64 end, gfp_t mask)
|
u64 end, gfp_t mask)
|
||||||
{
|
{
|
||||||
return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
|
return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, NULL,
|
||||||
NULL, mask);
|
mask, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
|
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
|
||||||
|
@ -188,16 +203,16 @@ static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
|
||||||
}
|
}
|
||||||
|
|
||||||
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||||
unsigned bits, unsigned clear_bits,
|
u32 bits, u32 clear_bits,
|
||||||
struct extent_state **cached_state);
|
struct extent_state **cached_state);
|
||||||
|
|
||||||
static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
|
static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
|
||||||
u64 end, unsigned int extra_bits,
|
u64 end, u32 extra_bits,
|
||||||
struct extent_state **cached_state)
|
struct extent_state **cached_state)
|
||||||
{
|
{
|
||||||
return set_extent_bit(tree, start, end,
|
return set_extent_bit(tree, start, end,
|
||||||
EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
|
EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
|
||||||
NULL, cached_state, GFP_NOFS);
|
0, NULL, cached_state, GFP_NOFS, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
|
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
|
||||||
|
@ -205,30 +220,30 @@ static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
|
||||||
{
|
{
|
||||||
return set_extent_bit(tree, start, end,
|
return set_extent_bit(tree, start, end,
|
||||||
EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
|
EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
|
||||||
NULL, cached_state, GFP_NOFS);
|
0, NULL, cached_state, GFP_NOFS, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
|
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
|
||||||
u64 end)
|
u64 end)
|
||||||
{
|
{
|
||||||
return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL,
|
return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, NULL,
|
||||||
GFP_NOFS);
|
GFP_NOFS, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
|
static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
|
||||||
u64 end, struct extent_state **cached_state, gfp_t mask)
|
u64 end, struct extent_state **cached_state, gfp_t mask)
|
||||||
{
|
{
|
||||||
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
|
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
|
||||||
cached_state, mask);
|
cached_state, mask, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||||
u64 *start_ret, u64 *end_ret, unsigned bits,
|
u64 *start_ret, u64 *end_ret, u32 bits,
|
||||||
struct extent_state **cached_state);
|
struct extent_state **cached_state);
|
||||||
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||||
u64 *start_ret, u64 *end_ret, unsigned bits);
|
u64 *start_ret, u64 *end_ret, u32 bits);
|
||||||
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
|
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||||
u64 *start_ret, u64 *end_ret, unsigned bits);
|
u64 *start_ret, u64 *end_ret, u32 bits);
|
||||||
int extent_invalidatepage(struct extent_io_tree *tree,
|
int extent_invalidatepage(struct extent_io_tree *tree,
|
||||||
struct page *page, unsigned long offset);
|
struct page *page, unsigned long offset);
|
||||||
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
|
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
|
||||||
|
|
|
@ -1465,7 +1465,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
/* this will setup the path even if it fails to insert the back ref */
|
/* this will setup the path even if it fails to insert the back ref */
|
||||||
ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
|
ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
|
||||||
parent, root_objectid, owner,
|
parent, root_objectid, owner,
|
||||||
|
@ -1489,7 +1488,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
||||||
btrfs_mark_buffer_dirty(leaf);
|
btrfs_mark_buffer_dirty(leaf);
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
/* now insert the actual backref */
|
/* now insert the actual backref */
|
||||||
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
|
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
|
||||||
BUG_ON(refs_to_add != 1);
|
BUG_ON(refs_to_add != 1);
|
||||||
|
@ -1605,7 +1603,6 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
|
|
||||||
again:
|
again:
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
|
ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
err = ret;
|
err = ret;
|
||||||
|
@ -2132,25 +2129,6 @@ static u64 find_middle(struct rb_root *root)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* Takes the number of bytes to be csumm'ed and figures out how many leaves it
|
|
||||||
* would require to store the csums for that many bytes.
|
|
||||||
*/
|
|
||||||
u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
|
|
||||||
{
|
|
||||||
u64 csum_size;
|
|
||||||
u64 num_csums_per_leaf;
|
|
||||||
u64 num_csums;
|
|
||||||
|
|
||||||
csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
|
|
||||||
num_csums_per_leaf = div64_u64(csum_size,
|
|
||||||
(u64)btrfs_super_csum_size(fs_info->super_copy));
|
|
||||||
num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
|
|
||||||
num_csums += num_csums_per_leaf - 1;
|
|
||||||
num_csums = div64_u64(num_csums, num_csums_per_leaf);
|
|
||||||
return num_csums;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this starts processing the delayed reference count updates and
|
* this starts processing the delayed reference count updates and
|
||||||
* extent insertions we have queued up so far. count can be
|
* extent insertions we have queued up so far. count can be
|
||||||
|
@ -2663,6 +2641,11 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
|
||||||
BUG_ON(!btrfs_block_group_done(block_group));
|
BUG_ON(!btrfs_block_group_done(block_group));
|
||||||
ret = btrfs_remove_free_space(block_group, start, num_bytes);
|
ret = btrfs_remove_free_space(block_group, start, num_bytes);
|
||||||
} else {
|
} else {
|
||||||
|
/*
|
||||||
|
* We must wait for v1 caching to finish, otherwise we may not
|
||||||
|
* remove our space.
|
||||||
|
*/
|
||||||
|
btrfs_wait_space_cache_v1_finished(block_group, caching_ctl);
|
||||||
mutex_lock(&caching_ctl->mutex);
|
mutex_lock(&caching_ctl->mutex);
|
||||||
|
|
||||||
if (start >= caching_ctl->progress) {
|
if (start >= caching_ctl->progress) {
|
||||||
|
@ -2730,31 +2713,6 @@ btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
|
||||||
atomic_inc(&bg->reservations);
|
atomic_inc(&bg->reservations);
|
||||||
}
|
}
|
||||||
|
|
||||||
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
|
|
||||||
{
|
|
||||||
struct btrfs_caching_control *next;
|
|
||||||
struct btrfs_caching_control *caching_ctl;
|
|
||||||
struct btrfs_block_group *cache;
|
|
||||||
|
|
||||||
down_write(&fs_info->commit_root_sem);
|
|
||||||
|
|
||||||
list_for_each_entry_safe(caching_ctl, next,
|
|
||||||
&fs_info->caching_block_groups, list) {
|
|
||||||
cache = caching_ctl->block_group;
|
|
||||||
if (btrfs_block_group_done(cache)) {
|
|
||||||
cache->last_byte_to_unpin = (u64)-1;
|
|
||||||
list_del_init(&caching_ctl->list);
|
|
||||||
btrfs_put_caching_control(caching_ctl);
|
|
||||||
} else {
|
|
||||||
cache->last_byte_to_unpin = caching_ctl->progress;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
up_write(&fs_info->commit_root_sem);
|
|
||||||
|
|
||||||
btrfs_update_global_block_rsv(fs_info);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the free cluster for the given space info and sets empty_cluster to
|
* Returns the free cluster for the given space info and sets empty_cluster to
|
||||||
* what it should be based on the mount options.
|
* what it should be based on the mount options.
|
||||||
|
@ -2816,11 +2774,13 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
|
||||||
len = cache->start + cache->length - start;
|
len = cache->start + cache->length - start;
|
||||||
len = min(len, end + 1 - start);
|
len = min(len, end + 1 - start);
|
||||||
|
|
||||||
if (start < cache->last_byte_to_unpin) {
|
down_read(&fs_info->commit_root_sem);
|
||||||
len = min(len, cache->last_byte_to_unpin - start);
|
if (start < cache->last_byte_to_unpin && return_free_space) {
|
||||||
if (return_free_space)
|
u64 add_len = min(len, cache->last_byte_to_unpin - start);
|
||||||
btrfs_add_free_space(cache, start, len);
|
|
||||||
|
btrfs_add_free_space(cache, start, add_len);
|
||||||
}
|
}
|
||||||
|
up_read(&fs_info->commit_root_sem);
|
||||||
|
|
||||||
start += len;
|
start += len;
|
||||||
total_unpinned += len;
|
total_unpinned += len;
|
||||||
|
@ -3040,8 +3000,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
|
is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
|
||||||
|
|
||||||
if (!is_data && refs_to_drop != 1) {
|
if (!is_data && refs_to_drop != 1) {
|
||||||
|
@ -3106,7 +3064,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
/* Slow path to locate EXTENT/METADATA_ITEM */
|
/* Slow path to locate EXTENT/METADATA_ITEM */
|
||||||
key.objectid = bytenr;
|
key.objectid = bytenr;
|
||||||
|
@ -4448,7 +4405,6 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
|
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
|
||||||
ins, size);
|
ins, size);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -4533,7 +4489,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
|
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
|
||||||
&extent_key, size);
|
&extent_key, size);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -4662,7 +4617,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
struct extent_buffer *buf;
|
struct extent_buffer *buf;
|
||||||
|
|
||||||
buf = btrfs_find_create_tree_block(fs_info, bytenr);
|
buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
|
||||||
if (IS_ERR(buf))
|
if (IS_ERR(buf))
|
||||||
return buf;
|
return buf;
|
||||||
|
|
||||||
|
@ -4679,12 +4634,16 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||||
return ERR_PTR(-EUCLEAN);
|
return ERR_PTR(-EUCLEAN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This needs to stay, because we could allocate a freed block from an
|
||||||
|
* old tree into a new tree, so we need to make sure this new block is
|
||||||
|
* set to the appropriate level and owner.
|
||||||
|
*/
|
||||||
btrfs_set_buffer_lockdep_class(owner, buf, level);
|
btrfs_set_buffer_lockdep_class(owner, buf, level);
|
||||||
__btrfs_tree_lock(buf, nest);
|
__btrfs_tree_lock(buf, nest);
|
||||||
btrfs_clean_tree_block(buf);
|
btrfs_clean_tree_block(buf);
|
||||||
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
|
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
|
||||||
|
|
||||||
btrfs_set_lock_blocking_write(buf);
|
|
||||||
set_extent_buffer_uptodate(buf);
|
set_extent_buffer_uptodate(buf);
|
||||||
|
|
||||||
memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
|
memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
|
||||||
|
@ -4905,7 +4864,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
reada:
|
reada:
|
||||||
readahead_tree_block(fs_info, bytenr);
|
btrfs_readahead_node_child(eb, slot);
|
||||||
nread++;
|
nread++;
|
||||||
}
|
}
|
||||||
wc->reada_slot = slot;
|
wc->reada_slot = slot;
|
||||||
|
@ -5064,16 +5023,13 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
next = find_extent_buffer(fs_info, bytenr);
|
next = find_extent_buffer(fs_info, bytenr);
|
||||||
if (!next) {
|
if (!next) {
|
||||||
next = btrfs_find_create_tree_block(fs_info, bytenr);
|
next = btrfs_find_create_tree_block(fs_info, bytenr,
|
||||||
|
root->root_key.objectid, level - 1);
|
||||||
if (IS_ERR(next))
|
if (IS_ERR(next))
|
||||||
return PTR_ERR(next);
|
return PTR_ERR(next);
|
||||||
|
|
||||||
btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
|
|
||||||
level - 1);
|
|
||||||
reada = 1;
|
reada = 1;
|
||||||
}
|
}
|
||||||
btrfs_tree_lock(next);
|
btrfs_tree_lock(next);
|
||||||
btrfs_set_lock_blocking_write(next);
|
|
||||||
|
|
||||||
ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
|
ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
|
||||||
&wc->refs[level - 1],
|
&wc->refs[level - 1],
|
||||||
|
@ -5124,8 +5080,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
||||||
if (!next) {
|
if (!next) {
|
||||||
if (reada && level == 1)
|
if (reada && level == 1)
|
||||||
reada_walk_down(trans, root, wc, path);
|
reada_walk_down(trans, root, wc, path);
|
||||||
next = read_tree_block(fs_info, bytenr, generation, level - 1,
|
next = read_tree_block(fs_info, bytenr, root->root_key.objectid,
|
||||||
&first_key);
|
generation, level - 1, &first_key);
|
||||||
if (IS_ERR(next)) {
|
if (IS_ERR(next)) {
|
||||||
return PTR_ERR(next);
|
return PTR_ERR(next);
|
||||||
} else if (!extent_buffer_uptodate(next)) {
|
} else if (!extent_buffer_uptodate(next)) {
|
||||||
|
@ -5133,7 +5089,6 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
btrfs_tree_lock(next);
|
btrfs_tree_lock(next);
|
||||||
btrfs_set_lock_blocking_write(next);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
level--;
|
level--;
|
||||||
|
@ -5145,7 +5100,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
path->nodes[level] = next;
|
path->nodes[level] = next;
|
||||||
path->slots[level] = 0;
|
path->slots[level] = 0;
|
||||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
path->locks[level] = BTRFS_WRITE_LOCK;
|
||||||
wc->level = level;
|
wc->level = level;
|
||||||
if (wc->level == 1)
|
if (wc->level == 1)
|
||||||
wc->reada_slot = 0;
|
wc->reada_slot = 0;
|
||||||
|
@ -5273,8 +5228,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
|
||||||
if (!path->locks[level]) {
|
if (!path->locks[level]) {
|
||||||
BUG_ON(level == 0);
|
BUG_ON(level == 0);
|
||||||
btrfs_tree_lock(eb);
|
btrfs_tree_lock(eb);
|
||||||
btrfs_set_lock_blocking_write(eb);
|
path->locks[level] = BTRFS_WRITE_LOCK;
|
||||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
|
||||||
|
|
||||||
ret = btrfs_lookup_extent_info(trans, fs_info,
|
ret = btrfs_lookup_extent_info(trans, fs_info,
|
||||||
eb->start, level, 1,
|
eb->start, level, 1,
|
||||||
|
@ -5317,8 +5271,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
|
||||||
if (!path->locks[level] &&
|
if (!path->locks[level] &&
|
||||||
btrfs_header_generation(eb) == trans->transid) {
|
btrfs_header_generation(eb) == trans->transid) {
|
||||||
btrfs_tree_lock(eb);
|
btrfs_tree_lock(eb);
|
||||||
btrfs_set_lock_blocking_write(eb);
|
path->locks[level] = BTRFS_WRITE_LOCK;
|
||||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
|
||||||
}
|
}
|
||||||
btrfs_clean_tree_block(eb);
|
btrfs_clean_tree_block(eb);
|
||||||
}
|
}
|
||||||
|
@ -5486,9 +5439,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
||||||
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
|
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
|
||||||
level = btrfs_header_level(root->node);
|
level = btrfs_header_level(root->node);
|
||||||
path->nodes[level] = btrfs_lock_root_node(root);
|
path->nodes[level] = btrfs_lock_root_node(root);
|
||||||
btrfs_set_lock_blocking_write(path->nodes[level]);
|
|
||||||
path->slots[level] = 0;
|
path->slots[level] = 0;
|
||||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
path->locks[level] = BTRFS_WRITE_LOCK;
|
||||||
memset(&wc->update_progress, 0,
|
memset(&wc->update_progress, 0,
|
||||||
sizeof(wc->update_progress));
|
sizeof(wc->update_progress));
|
||||||
} else {
|
} else {
|
||||||
|
@ -5496,7 +5448,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
||||||
memcpy(&wc->update_progress, &key,
|
memcpy(&wc->update_progress, &key,
|
||||||
sizeof(wc->update_progress));
|
sizeof(wc->update_progress));
|
||||||
|
|
||||||
level = root_item->drop_level;
|
level = btrfs_root_drop_level(root_item);
|
||||||
BUG_ON(level == 0);
|
BUG_ON(level == 0);
|
||||||
path->lowest_level = level;
|
path->lowest_level = level;
|
||||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||||
|
@ -5516,8 +5468,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
||||||
level = btrfs_header_level(root->node);
|
level = btrfs_header_level(root->node);
|
||||||
while (1) {
|
while (1) {
|
||||||
btrfs_tree_lock(path->nodes[level]);
|
btrfs_tree_lock(path->nodes[level]);
|
||||||
btrfs_set_lock_blocking_write(path->nodes[level]);
|
path->locks[level] = BTRFS_WRITE_LOCK;
|
||||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
|
||||||
|
|
||||||
ret = btrfs_lookup_extent_info(trans, fs_info,
|
ret = btrfs_lookup_extent_info(trans, fs_info,
|
||||||
path->nodes[level]->start,
|
path->nodes[level]->start,
|
||||||
|
@ -5529,7 +5480,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
||||||
}
|
}
|
||||||
BUG_ON(wc->refs[level] == 0);
|
BUG_ON(wc->refs[level] == 0);
|
||||||
|
|
||||||
if (level == root_item->drop_level)
|
if (level == btrfs_root_drop_level(root_item))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
btrfs_tree_unlock(path->nodes[level]);
|
btrfs_tree_unlock(path->nodes[level]);
|
||||||
|
@ -5574,7 +5525,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
||||||
}
|
}
|
||||||
btrfs_cpu_key_to_disk(&root_item->drop_progress,
|
btrfs_cpu_key_to_disk(&root_item->drop_progress,
|
||||||
&wc->drop_progress);
|
&wc->drop_progress);
|
||||||
root_item->drop_level = wc->drop_level;
|
btrfs_set_root_drop_level(root_item, wc->drop_level);
|
||||||
|
|
||||||
BUG_ON(wc->level == 0);
|
BUG_ON(wc->level == 0);
|
||||||
if (btrfs_should_end_transaction(trans) ||
|
if (btrfs_should_end_transaction(trans) ||
|
||||||
|
@ -5704,7 +5655,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
|
||||||
level = btrfs_header_level(node);
|
level = btrfs_header_level(node);
|
||||||
path->nodes[level] = node;
|
path->nodes[level] = node;
|
||||||
path->slots[level] = 0;
|
path->slots[level] = 0;
|
||||||
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
|
path->locks[level] = BTRFS_WRITE_LOCK;
|
||||||
|
|
||||||
wc->refs[parent_level] = 1;
|
wc->refs[parent_level] = 1;
|
||||||
wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
|
wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
|
||||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -6,6 +6,7 @@
|
||||||
#include <linux/rbtree.h>
|
#include <linux/rbtree.h>
|
||||||
#include <linux/refcount.h>
|
#include <linux/refcount.h>
|
||||||
#include <linux/fiemap.h>
|
#include <linux/fiemap.h>
|
||||||
|
#include <linux/btrfs_tree.h>
|
||||||
#include "ulist.h"
|
#include "ulist.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -71,11 +72,10 @@ typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
|
||||||
int mirror_num,
|
int mirror_num,
|
||||||
unsigned long bio_flags);
|
unsigned long bio_flags);
|
||||||
|
|
||||||
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
|
typedef blk_status_t (extent_submit_bio_start_t)(struct inode *inode,
|
||||||
struct bio *bio, u64 bio_offset);
|
struct bio *bio, u64 dio_file_offset);
|
||||||
|
|
||||||
#define INLINE_EXTENT_BUFFER_PAGES 16
|
#define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE)
|
||||||
#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE)
|
|
||||||
struct extent_buffer {
|
struct extent_buffer {
|
||||||
u64 start;
|
u64 start;
|
||||||
unsigned long len;
|
unsigned long len;
|
||||||
|
@ -87,31 +87,13 @@ struct extent_buffer {
|
||||||
int read_mirror;
|
int read_mirror;
|
||||||
struct rcu_head rcu_head;
|
struct rcu_head rcu_head;
|
||||||
pid_t lock_owner;
|
pid_t lock_owner;
|
||||||
|
|
||||||
int blocking_writers;
|
|
||||||
atomic_t blocking_readers;
|
|
||||||
bool lock_recursed;
|
|
||||||
/* >= 0 if eb belongs to a log tree, -1 otherwise */
|
/* >= 0 if eb belongs to a log tree, -1 otherwise */
|
||||||
short log_index;
|
s8 log_index;
|
||||||
|
|
||||||
/* protects write locks */
|
struct rw_semaphore lock;
|
||||||
rwlock_t lock;
|
|
||||||
|
|
||||||
/* readers use lock_wq while they wait for the write
|
|
||||||
* lock holders to unlock
|
|
||||||
*/
|
|
||||||
wait_queue_head_t write_lock_wq;
|
|
||||||
|
|
||||||
/* writers use read_lock_wq while they wait for readers
|
|
||||||
* to unlock
|
|
||||||
*/
|
|
||||||
wait_queue_head_t read_lock_wq;
|
|
||||||
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
|
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
int spinning_writers;
|
|
||||||
atomic_t spinning_readers;
|
|
||||||
atomic_t read_locks;
|
|
||||||
int write_locks;
|
|
||||||
struct list_head leak_list;
|
struct list_head leak_list;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
@ -199,7 +181,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
|
||||||
void set_page_extent_mapped(struct page *page);
|
void set_page_extent_mapped(struct page *page);
|
||||||
|
|
||||||
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||||
u64 start);
|
u64 start, u64 owner_root, int level);
|
||||||
struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
|
struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||||
u64 start, unsigned long len);
|
u64 start, unsigned long len);
|
||||||
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
|
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||||
|
@ -215,11 +197,20 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
|
||||||
int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
|
int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
|
||||||
int mirror_num);
|
int mirror_num);
|
||||||
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
|
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
|
||||||
|
void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
|
||||||
|
u64 bytenr, u64 owner_root, u64 gen, int level);
|
||||||
|
void btrfs_readahead_node_child(struct extent_buffer *node, int slot);
|
||||||
|
|
||||||
static inline int num_extent_pages(const struct extent_buffer *eb)
|
static inline int num_extent_pages(const struct extent_buffer *eb)
|
||||||
{
|
{
|
||||||
return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) -
|
/*
|
||||||
(eb->start >> PAGE_SHIFT);
|
* For sectorsize == PAGE_SIZE case, since nodesize is always aligned to
|
||||||
|
* sectorsize, it's just eb->len >> PAGE_SHIFT.
|
||||||
|
*
|
||||||
|
* For sectorsize < PAGE_SIZE case, we could have nodesize < PAGE_SIZE,
|
||||||
|
* thus have to ensure we get at least one page.
|
||||||
|
*/
|
||||||
|
return (eb->len >> PAGE_SHIFT) ?: 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int extent_buffer_uptodate(const struct extent_buffer *eb)
|
static inline int extent_buffer_uptodate(const struct extent_buffer *eb)
|
||||||
|
@ -270,8 +261,7 @@ void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
|
||||||
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
|
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
|
||||||
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||||
struct page *locked_page,
|
struct page *locked_page,
|
||||||
unsigned bits_to_clear,
|
u32 bits_to_clear, unsigned long page_ops);
|
||||||
unsigned long page_ops);
|
|
||||||
struct bio *btrfs_bio_alloc(u64 first_byte);
|
struct bio *btrfs_bio_alloc(u64 first_byte);
|
||||||
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
|
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
|
||||||
struct bio *btrfs_bio_clone(struct bio *bio);
|
struct bio *btrfs_bio_clone(struct bio *bio);
|
||||||
|
@ -307,7 +297,7 @@ struct io_failure_record {
|
||||||
|
|
||||||
|
|
||||||
blk_status_t btrfs_submit_read_repair(struct inode *inode,
|
blk_status_t btrfs_submit_read_repair(struct inode *inode,
|
||||||
struct bio *failed_bio, u64 phy_offset,
|
struct bio *failed_bio, u32 bio_offset,
|
||||||
struct page *page, unsigned int pgoff,
|
struct page *page, unsigned int pgoff,
|
||||||
u64 start, u64 end, int failed_mirror,
|
u64 start, u64 end, int failed_mirror,
|
||||||
submit_bio_hook_t *submit_bio_hook);
|
submit_bio_hook_t *submit_bio_hook);
|
||||||
|
|
|
@ -38,27 +38,27 @@
|
||||||
* Finally new_i_size should only be set in the case of truncate where we're not
|
* Finally new_i_size should only be set in the case of truncate where we're not
|
||||||
* ready to use i_size_read() as the limiter yet.
|
* ready to use i_size_read() as the limiter yet.
|
||||||
*/
|
*/
|
||||||
void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size)
|
void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
|
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||||
u64 start, end, i_size;
|
u64 start, end, i_size;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
i_size = new_i_size ?: i_size_read(inode);
|
i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
|
||||||
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
|
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
|
||||||
BTRFS_I(inode)->disk_i_size = i_size;
|
inode->disk_i_size = i_size;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock(&BTRFS_I(inode)->lock);
|
spin_lock(&inode->lock);
|
||||||
ret = find_contiguous_extent_bit(&BTRFS_I(inode)->file_extent_tree, 0,
|
ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start,
|
||||||
&start, &end, EXTENT_DIRTY);
|
&end, EXTENT_DIRTY);
|
||||||
if (!ret && start == 0)
|
if (!ret && start == 0)
|
||||||
i_size = min(i_size, end + 1);
|
i_size = min(i_size, end + 1);
|
||||||
else
|
else
|
||||||
i_size = 0;
|
i_size = 0;
|
||||||
BTRFS_I(inode)->disk_i_size = i_size;
|
inode->disk_i_size = i_size;
|
||||||
spin_unlock(&BTRFS_I(inode)->lock);
|
spin_unlock(&inode->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -142,7 +142,6 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||||
file_key.offset = pos;
|
file_key.offset = pos;
|
||||||
file_key.type = BTRFS_EXTENT_DATA_KEY;
|
file_key.type = BTRFS_EXTENT_DATA_KEY;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_insert_empty_item(trans, root, path, &file_key,
|
ret = btrfs_insert_empty_item(trans, root, path, &file_key,
|
||||||
sizeof(*item));
|
sizeof(*item));
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
|
@ -181,7 +180,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_csum_item *item;
|
struct btrfs_csum_item *item;
|
||||||
struct extent_buffer *leaf;
|
struct extent_buffer *leaf;
|
||||||
u64 csum_offset = 0;
|
u64 csum_offset = 0;
|
||||||
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
const u32 csum_size = fs_info->csum_size;
|
||||||
int csums_in_item;
|
int csums_in_item;
|
||||||
|
|
||||||
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||||
|
@ -201,7 +200,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans,
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
csum_offset = (bytenr - found_key.offset) >>
|
csum_offset = (bytenr - found_key.offset) >>
|
||||||
fs_info->sb->s_blocksize_bits;
|
fs_info->sectorsize_bits;
|
||||||
csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
|
csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||||
csums_in_item /= csum_size;
|
csums_in_item /= csum_size;
|
||||||
|
|
||||||
|
@ -239,12 +238,117 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and
|
||||||
|
* estore the result to @dst.
|
||||||
|
*
|
||||||
|
* Return >0 for the number of sectors we found.
|
||||||
|
* Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum
|
||||||
|
* for it. Caller may want to try next sector until one range is hit.
|
||||||
|
* Return <0 for fatal error.
|
||||||
|
*/
|
||||||
|
static int search_csum_tree(struct btrfs_fs_info *fs_info,
|
||||||
|
struct btrfs_path *path, u64 disk_bytenr,
|
||||||
|
u64 len, u8 *dst)
|
||||||
|
{
|
||||||
|
struct btrfs_csum_item *item = NULL;
|
||||||
|
struct btrfs_key key;
|
||||||
|
const u32 sectorsize = fs_info->sectorsize;
|
||||||
|
const u32 csum_size = fs_info->csum_size;
|
||||||
|
u32 itemsize;
|
||||||
|
int ret;
|
||||||
|
u64 csum_start;
|
||||||
|
u64 csum_len;
|
||||||
|
|
||||||
|
ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) &&
|
||||||
|
IS_ALIGNED(len, sectorsize));
|
||||||
|
|
||||||
|
/* Check if the current csum item covers disk_bytenr */
|
||||||
|
if (path->nodes[0]) {
|
||||||
|
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||||
|
struct btrfs_csum_item);
|
||||||
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
||||||
|
itemsize = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
|
||||||
|
|
||||||
|
csum_start = key.offset;
|
||||||
|
csum_len = (itemsize / csum_size) * sectorsize;
|
||||||
|
|
||||||
|
if (in_range(disk_bytenr, csum_start, csum_len))
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Current item doesn't contain the desired range, search again */
|
||||||
|
btrfs_release_path(path);
|
||||||
|
item = btrfs_lookup_csum(NULL, fs_info->csum_root, path, disk_bytenr, 0);
|
||||||
|
if (IS_ERR(item)) {
|
||||||
|
ret = PTR_ERR(item);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
||||||
|
itemsize = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
|
||||||
|
|
||||||
|
csum_start = key.offset;
|
||||||
|
csum_len = (itemsize / csum_size) * sectorsize;
|
||||||
|
ASSERT(in_range(disk_bytenr, csum_start, csum_len));
|
||||||
|
|
||||||
|
found:
|
||||||
|
ret = (min(csum_start + csum_len, disk_bytenr + len) -
|
||||||
|
disk_bytenr) >> fs_info->sectorsize_bits;
|
||||||
|
read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
|
||||||
|
ret * csum_size);
|
||||||
|
out:
|
||||||
|
if (ret == -ENOENT)
|
||||||
|
ret = 0;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Locate the file_offset of @cur_disk_bytenr of a @bio.
|
||||||
|
*
|
||||||
|
* Bio of btrfs represents read range of
|
||||||
|
* [bi_sector << 9, bi_sector << 9 + bi_size).
|
||||||
|
* Knowing this, we can iterate through each bvec to locate the page belong to
|
||||||
|
* @cur_disk_bytenr and get the file offset.
|
||||||
|
*
|
||||||
|
* @inode is used to determine if the bvec page really belongs to @inode.
|
||||||
|
*
|
||||||
|
* Return 0 if we can't find the file offset
|
||||||
|
* Return >0 if we find the file offset and restore it to @file_offset_ret
|
||||||
|
*/
|
||||||
|
static int search_file_offset_in_bio(struct bio *bio, struct inode *inode,
|
||||||
|
u64 disk_bytenr, u64 *file_offset_ret)
|
||||||
|
{
|
||||||
|
struct bvec_iter iter;
|
||||||
|
struct bio_vec bvec;
|
||||||
|
u64 cur = bio->bi_iter.bi_sector << SECTOR_SHIFT;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
bio_for_each_segment(bvec, bio, iter) {
|
||||||
|
struct page *page = bvec.bv_page;
|
||||||
|
|
||||||
|
if (cur > disk_bytenr)
|
||||||
|
break;
|
||||||
|
if (cur + bvec.bv_len <= disk_bytenr) {
|
||||||
|
cur += bvec.bv_len;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ASSERT(in_range(disk_bytenr, cur, bvec.bv_len));
|
||||||
|
if (page->mapping && page->mapping->host &&
|
||||||
|
page->mapping->host == inode) {
|
||||||
|
ret = 1;
|
||||||
|
*file_offset_ret = page_offset(page) + bvec.bv_offset +
|
||||||
|
disk_bytenr - cur;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* btrfs_lookup_bio_sums - Look up checksums for a bio.
|
* Lookup the checksum for the read bio in csum tree.
|
||||||
|
*
|
||||||
* @inode: inode that the bio is for.
|
* @inode: inode that the bio is for.
|
||||||
* @bio: bio to look up.
|
* @bio: bio to look up.
|
||||||
* @offset: Unless (u64)-1, look up checksums for this offset in the file.
|
|
||||||
* If (u64)-1, use the page offsets from the bio instead.
|
|
||||||
* @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
|
* @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
|
||||||
* checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
|
* checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
|
||||||
* NULL, the checksum buffer is allocated and returned in
|
* NULL, the checksum buffer is allocated and returned in
|
||||||
|
@ -252,31 +356,40 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
||||||
*
|
*
|
||||||
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
|
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
|
||||||
*/
|
*/
|
||||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst)
|
||||||
u64 offset, u8 *dst)
|
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||||
struct bio_vec bvec;
|
|
||||||
struct bvec_iter iter;
|
|
||||||
struct btrfs_csum_item *item = NULL;
|
|
||||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||||
struct btrfs_path *path;
|
struct btrfs_path *path;
|
||||||
const bool page_offsets = (offset == (u64)-1);
|
const u32 sectorsize = fs_info->sectorsize;
|
||||||
|
const u32 csum_size = fs_info->csum_size;
|
||||||
|
u32 orig_len = bio->bi_iter.bi_size;
|
||||||
|
u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
|
||||||
|
u64 cur_disk_bytenr;
|
||||||
u8 *csum;
|
u8 *csum;
|
||||||
u64 item_start_offset = 0;
|
const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
|
||||||
u64 item_last_offset = 0;
|
|
||||||
u64 disk_bytenr;
|
|
||||||
u64 page_bytes_left;
|
|
||||||
u32 diff;
|
|
||||||
int nblocks;
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
|
||||||
|
|
||||||
|
if (!fs_info->csum_root || (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
|
||||||
|
return BLK_STS_OK;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function is only called for read bio.
|
||||||
|
*
|
||||||
|
* This means two things:
|
||||||
|
* - All our csums should only be in csum tree
|
||||||
|
* No ordered extents csums, as ordered extents are only for write
|
||||||
|
* path.
|
||||||
|
* - No need to bother any other info from bvec
|
||||||
|
* Since we're looking up csums, the only important info is the
|
||||||
|
* disk_bytenr and the length, which can be extracted from bi_iter
|
||||||
|
* directly.
|
||||||
|
*/
|
||||||
|
ASSERT(bio_op(bio) == REQ_OP_READ);
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
if (!path)
|
if (!path)
|
||||||
return BLK_STS_RESOURCE;
|
return BLK_STS_RESOURCE;
|
||||||
|
|
||||||
nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
|
|
||||||
if (!dst) {
|
if (!dst) {
|
||||||
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
|
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
|
||||||
|
|
||||||
|
@ -295,7 +408,11 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||||
csum = dst;
|
csum = dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bio->bi_iter.bi_size > PAGE_SIZE * 8)
|
/*
|
||||||
|
* If requested number of sectors is larger than one leaf can contain,
|
||||||
|
* kick the readahead for csum tree.
|
||||||
|
*/
|
||||||
|
if (nblocks > fs_info->csums_per_leaf)
|
||||||
path->reada = READA_FORWARD;
|
path->reada = READA_FORWARD;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -309,85 +426,62 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||||
path->skip_locking = 1;
|
path->skip_locking = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
|
for (cur_disk_bytenr = orig_disk_bytenr;
|
||||||
|
cur_disk_bytenr < orig_disk_bytenr + orig_len;
|
||||||
|
cur_disk_bytenr += (count * sectorsize)) {
|
||||||
|
u64 search_len = orig_disk_bytenr + orig_len - cur_disk_bytenr;
|
||||||
|
unsigned int sector_offset;
|
||||||
|
u8 *csum_dst;
|
||||||
|
|
||||||
bio_for_each_segment(bvec, bio, iter) {
|
|
||||||
page_bytes_left = bvec.bv_len;
|
|
||||||
if (count)
|
|
||||||
goto next;
|
|
||||||
|
|
||||||
if (page_offsets)
|
|
||||||
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
|
|
||||||
count = btrfs_find_ordered_sum(BTRFS_I(inode), offset,
|
|
||||||
disk_bytenr, csum, nblocks);
|
|
||||||
if (count)
|
|
||||||
goto found;
|
|
||||||
|
|
||||||
if (!item || disk_bytenr < item_start_offset ||
|
|
||||||
disk_bytenr >= item_last_offset) {
|
|
||||||
struct btrfs_key found_key;
|
|
||||||
u32 item_size;
|
|
||||||
|
|
||||||
if (item)
|
|
||||||
btrfs_release_path(path);
|
|
||||||
item = btrfs_lookup_csum(NULL, fs_info->csum_root,
|
|
||||||
path, disk_bytenr, 0);
|
|
||||||
if (IS_ERR(item)) {
|
|
||||||
count = 1;
|
|
||||||
memset(csum, 0, csum_size);
|
|
||||||
if (BTRFS_I(inode)->root->root_key.objectid ==
|
|
||||||
BTRFS_DATA_RELOC_TREE_OBJECTID) {
|
|
||||||
set_extent_bits(io_tree, offset,
|
|
||||||
offset + fs_info->sectorsize - 1,
|
|
||||||
EXTENT_NODATASUM);
|
|
||||||
} else {
|
|
||||||
btrfs_info_rl(fs_info,
|
|
||||||
"no csum found for inode %llu start %llu",
|
|
||||||
btrfs_ino(BTRFS_I(inode)), offset);
|
|
||||||
}
|
|
||||||
item = NULL;
|
|
||||||
btrfs_release_path(path);
|
|
||||||
goto found;
|
|
||||||
}
|
|
||||||
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
|
|
||||||
path->slots[0]);
|
|
||||||
|
|
||||||
item_start_offset = found_key.offset;
|
|
||||||
item_size = btrfs_item_size_nr(path->nodes[0],
|
|
||||||
path->slots[0]);
|
|
||||||
item_last_offset = item_start_offset +
|
|
||||||
(item_size / csum_size) *
|
|
||||||
fs_info->sectorsize;
|
|
||||||
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
|
||||||
struct btrfs_csum_item);
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* this byte range must be able to fit inside
|
* Although both cur_disk_bytenr and orig_disk_bytenr is u64,
|
||||||
* a single leaf so it will also fit inside a u32
|
* we're calculating the offset to the bio start.
|
||||||
|
*
|
||||||
|
* Bio size is limited to UINT_MAX, thus unsigned int is large
|
||||||
|
* enough to contain the raw result, not to mention the right
|
||||||
|
* shifted result.
|
||||||
*/
|
*/
|
||||||
diff = disk_bytenr - item_start_offset;
|
ASSERT(cur_disk_bytenr - orig_disk_bytenr < UINT_MAX);
|
||||||
diff = diff / fs_info->sectorsize;
|
sector_offset = (cur_disk_bytenr - orig_disk_bytenr) >>
|
||||||
diff = diff * csum_size;
|
fs_info->sectorsize_bits;
|
||||||
count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >>
|
csum_dst = csum + sector_offset * csum_size;
|
||||||
inode->i_sb->s_blocksize_bits);
|
|
||||||
read_extent_buffer(path->nodes[0], csum,
|
count = search_csum_tree(fs_info, path, cur_disk_bytenr,
|
||||||
((unsigned long)item) + diff,
|
search_len, csum_dst);
|
||||||
csum_size * count);
|
if (count <= 0) {
|
||||||
found:
|
/*
|
||||||
csum += count * csum_size;
|
* Either we hit a critical error or we didn't find
|
||||||
nblocks -= count;
|
* the csum.
|
||||||
next:
|
* Either way, we put zero into the csums dst, and skip
|
||||||
while (count > 0) {
|
* to the next sector.
|
||||||
count--;
|
*/
|
||||||
disk_bytenr += fs_info->sectorsize;
|
memset(csum_dst, 0, csum_size);
|
||||||
offset += fs_info->sectorsize;
|
count = 1;
|
||||||
page_bytes_left -= fs_info->sectorsize;
|
|
||||||
if (!page_bytes_left)
|
/*
|
||||||
break; /* move to next bio */
|
* For data reloc inode, we need to mark the range
|
||||||
|
* NODATASUM so that balance won't report false csum
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
if (BTRFS_I(inode)->root->root_key.objectid ==
|
||||||
|
BTRFS_DATA_RELOC_TREE_OBJECTID) {
|
||||||
|
u64 file_offset;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = search_file_offset_in_bio(bio, inode,
|
||||||
|
cur_disk_bytenr, &file_offset);
|
||||||
|
if (ret)
|
||||||
|
set_extent_bits(io_tree, file_offset,
|
||||||
|
file_offset + sectorsize - 1,
|
||||||
|
EXTENT_NODATASUM);
|
||||||
|
} else {
|
||||||
|
btrfs_warn_rl(fs_info,
|
||||||
|
"csum hole found for disk bytenr range [%llu, %llu)",
|
||||||
|
cur_disk_bytenr, cur_disk_bytenr + sectorsize);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON_ONCE(count);
|
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
return BLK_STS_OK;
|
return BLK_STS_OK;
|
||||||
}
|
}
|
||||||
|
@ -406,7 +500,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||||
int ret;
|
int ret;
|
||||||
size_t size;
|
size_t size;
|
||||||
u64 csum_end;
|
u64 csum_end;
|
||||||
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
const u32 csum_size = fs_info->csum_size;
|
||||||
|
|
||||||
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
|
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
|
||||||
IS_ALIGNED(end + 1, fs_info->sectorsize));
|
IS_ALIGNED(end + 1, fs_info->sectorsize));
|
||||||
|
@ -433,8 +527,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
|
btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
|
||||||
if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
|
if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
|
||||||
key.type == BTRFS_EXTENT_CSUM_KEY) {
|
key.type == BTRFS_EXTENT_CSUM_KEY) {
|
||||||
offset = (start - key.offset) >>
|
offset = (start - key.offset) >> fs_info->sectorsize_bits;
|
||||||
fs_info->sb->s_blocksize_bits;
|
|
||||||
if (offset * csum_size <
|
if (offset * csum_size <
|
||||||
btrfs_item_size_nr(leaf, path->slots[0] - 1))
|
btrfs_item_size_nr(leaf, path->slots[0] - 1))
|
||||||
path->slots[0]--;
|
path->slots[0]--;
|
||||||
|
@ -484,10 +577,9 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||||
sums->bytenr = start;
|
sums->bytenr = start;
|
||||||
sums->len = (int)size;
|
sums->len = (int)size;
|
||||||
|
|
||||||
offset = (start - key.offset) >>
|
offset = (start - key.offset) >> fs_info->sectorsize_bits;
|
||||||
fs_info->sb->s_blocksize_bits;
|
|
||||||
offset *= csum_size;
|
offset *= csum_size;
|
||||||
size >>= fs_info->sb->s_blocksize_bits;
|
size >>= fs_info->sectorsize_bits;
|
||||||
|
|
||||||
read_extent_buffer(path->nodes[0],
|
read_extent_buffer(path->nodes[0],
|
||||||
sums->sums,
|
sums->sums,
|
||||||
|
@ -539,7 +631,6 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||||
int i;
|
int i;
|
||||||
u64 offset;
|
u64 offset;
|
||||||
unsigned nofs_flag;
|
unsigned nofs_flag;
|
||||||
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
|
||||||
|
|
||||||
nofs_flag = memalloc_nofs_save();
|
nofs_flag = memalloc_nofs_save();
|
||||||
sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
|
sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
|
||||||
|
@ -557,7 +648,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||||
else
|
else
|
||||||
offset = 0; /* shut up gcc */
|
offset = 0; /* shut up gcc */
|
||||||
|
|
||||||
sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
|
sums->bytenr = bio->bi_iter.bi_sector << 9;
|
||||||
index = 0;
|
index = 0;
|
||||||
|
|
||||||
shash->tfm = fs_info->csum_shash;
|
shash->tfm = fs_info->csum_shash;
|
||||||
|
@ -596,7 +687,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||||
ordered = btrfs_lookup_ordered_extent(inode,
|
ordered = btrfs_lookup_ordered_extent(inode,
|
||||||
offset);
|
offset);
|
||||||
ASSERT(ordered); /* Logic error */
|
ASSERT(ordered); /* Logic error */
|
||||||
sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9)
|
sums->bytenr = (bio->bi_iter.bi_sector << 9)
|
||||||
+ total_bytes;
|
+ total_bytes;
|
||||||
index = 0;
|
index = 0;
|
||||||
}
|
}
|
||||||
|
@ -607,7 +698,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||||
fs_info->sectorsize,
|
fs_info->sectorsize,
|
||||||
sums->sums + index);
|
sums->sums + index);
|
||||||
kunmap_atomic(data);
|
kunmap_atomic(data);
|
||||||
index += csum_size;
|
index += fs_info->csum_size;
|
||||||
offset += fs_info->sectorsize;
|
offset += fs_info->sectorsize;
|
||||||
this_sum_bytes += fs_info->sectorsize;
|
this_sum_bytes += fs_info->sectorsize;
|
||||||
total_bytes += fs_info->sectorsize;
|
total_bytes += fs_info->sectorsize;
|
||||||
|
@ -637,14 +728,14 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
|
||||||
u64 bytenr, u64 len)
|
u64 bytenr, u64 len)
|
||||||
{
|
{
|
||||||
struct extent_buffer *leaf;
|
struct extent_buffer *leaf;
|
||||||
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
const u32 csum_size = fs_info->csum_size;
|
||||||
u64 csum_end;
|
u64 csum_end;
|
||||||
u64 end_byte = bytenr + len;
|
u64 end_byte = bytenr + len;
|
||||||
u32 blocksize_bits = fs_info->sb->s_blocksize_bits;
|
u32 blocksize_bits = fs_info->sectorsize_bits;
|
||||||
|
|
||||||
leaf = path->nodes[0];
|
leaf = path->nodes[0];
|
||||||
csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
|
csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
|
||||||
csum_end <<= fs_info->sb->s_blocksize_bits;
|
csum_end <<= blocksize_bits;
|
||||||
csum_end += key->offset;
|
csum_end += key->offset;
|
||||||
|
|
||||||
if (key->offset < bytenr && csum_end <= end_byte) {
|
if (key->offset < bytenr && csum_end <= end_byte) {
|
||||||
|
@ -691,8 +782,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||||
u64 csum_end;
|
u64 csum_end;
|
||||||
struct extent_buffer *leaf;
|
struct extent_buffer *leaf;
|
||||||
int ret;
|
int ret;
|
||||||
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
const u32 csum_size = fs_info->csum_size;
|
||||||
int blocksize_bits = fs_info->sb->s_blocksize_bits;
|
u32 blocksize_bits = fs_info->sectorsize_bits;
|
||||||
|
|
||||||
ASSERT(root == fs_info->csum_root ||
|
ASSERT(root == fs_info->csum_root ||
|
||||||
root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
|
root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
|
||||||
|
@ -706,7 +797,6 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||||
key.offset = end_byte - 1;
|
key.offset = end_byte - 1;
|
||||||
key.type = BTRFS_EXTENT_CSUM_KEY;
|
key.type = BTRFS_EXTENT_CSUM_KEY;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||||
if (ret > 0) {
|
if (ret > 0) {
|
||||||
if (path->slots[0] == 0)
|
if (path->slots[0] == 0)
|
||||||
|
@ -846,7 +936,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
|
||||||
int index = 0;
|
int index = 0;
|
||||||
int found_next;
|
int found_next;
|
||||||
int ret;
|
int ret;
|
||||||
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
const u32 csum_size = fs_info->csum_size;
|
||||||
|
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
if (!path)
|
if (!path)
|
||||||
|
@ -921,7 +1011,7 @@ again:
|
||||||
if (btrfs_leaf_free_space(leaf) >= csum_size) {
|
if (btrfs_leaf_free_space(leaf) >= csum_size) {
|
||||||
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
||||||
csum_offset = (bytenr - found_key.offset) >>
|
csum_offset = (bytenr - found_key.offset) >>
|
||||||
fs_info->sb->s_blocksize_bits;
|
fs_info->sectorsize_bits;
|
||||||
goto extend_csum;
|
goto extend_csum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -939,8 +1029,7 @@ again:
|
||||||
|
|
||||||
leaf = path->nodes[0];
|
leaf = path->nodes[0];
|
||||||
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
||||||
csum_offset = (bytenr - found_key.offset) >>
|
csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits;
|
||||||
fs_info->sb->s_blocksize_bits;
|
|
||||||
|
|
||||||
if (found_key.type != BTRFS_EXTENT_CSUM_KEY ||
|
if (found_key.type != BTRFS_EXTENT_CSUM_KEY ||
|
||||||
found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
|
found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
|
||||||
|
@ -956,7 +1045,7 @@ extend_csum:
|
||||||
u32 diff;
|
u32 diff;
|
||||||
|
|
||||||
tmp = sums->len - total_bytes;
|
tmp = sums->len - total_bytes;
|
||||||
tmp >>= fs_info->sb->s_blocksize_bits;
|
tmp >>= fs_info->sectorsize_bits;
|
||||||
WARN_ON(tmp < 1);
|
WARN_ON(tmp < 1);
|
||||||
|
|
||||||
extend_nr = max_t(int, 1, (int)tmp);
|
extend_nr = max_t(int, 1, (int)tmp);
|
||||||
|
@ -981,9 +1070,9 @@ insert:
|
||||||
u64 tmp;
|
u64 tmp;
|
||||||
|
|
||||||
tmp = sums->len - total_bytes;
|
tmp = sums->len - total_bytes;
|
||||||
tmp >>= fs_info->sb->s_blocksize_bits;
|
tmp >>= fs_info->sectorsize_bits;
|
||||||
tmp = min(tmp, (next_offset - file_key.offset) >>
|
tmp = min(tmp, (next_offset - file_key.offset) >>
|
||||||
fs_info->sb->s_blocksize_bits);
|
fs_info->sectorsize_bits);
|
||||||
|
|
||||||
tmp = max_t(u64, 1, tmp);
|
tmp = max_t(u64, 1, tmp);
|
||||||
tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size));
|
tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size));
|
||||||
|
@ -991,10 +1080,8 @@ insert:
|
||||||
} else {
|
} else {
|
||||||
ins_size = csum_size;
|
ins_size = csum_size;
|
||||||
}
|
}
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_insert_empty_item(trans, root, path, &file_key,
|
ret = btrfs_insert_empty_item(trans, root, path, &file_key,
|
||||||
ins_size);
|
ins_size);
|
||||||
path->leave_spinning = 0;
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
goto out;
|
||||||
if (WARN_ON(ret != 0))
|
if (WARN_ON(ret != 0))
|
||||||
|
@ -1007,8 +1094,7 @@ csum:
|
||||||
item = (struct btrfs_csum_item *)((unsigned char *)item +
|
item = (struct btrfs_csum_item *)((unsigned char *)item +
|
||||||
csum_offset * csum_size);
|
csum_offset * csum_size);
|
||||||
found:
|
found:
|
||||||
ins_size = (u32)(sums->len - total_bytes) >>
|
ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits;
|
||||||
fs_info->sb->s_blocksize_bits;
|
|
||||||
ins_size *= csum_size;
|
ins_size *= csum_size;
|
||||||
ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item,
|
ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item,
|
||||||
ins_size);
|
ins_size);
|
||||||
|
|
737
fs/btrfs/file.c
737
fs/btrfs/file.c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -16,7 +16,6 @@
|
||||||
#include "transaction.h"
|
#include "transaction.h"
|
||||||
#include "disk-io.h"
|
#include "disk-io.h"
|
||||||
#include "extent_io.h"
|
#include "extent_io.h"
|
||||||
#include "inode-map.h"
|
|
||||||
#include "volumes.h"
|
#include "volumes.h"
|
||||||
#include "space-info.h"
|
#include "space-info.h"
|
||||||
#include "delalloc-space.h"
|
#include "delalloc-space.h"
|
||||||
|
@ -33,16 +32,18 @@ struct btrfs_trim_range {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
|
|
||||||
struct btrfs_free_space *bitmap_info);
|
|
||||||
static int link_free_space(struct btrfs_free_space_ctl *ctl,
|
static int link_free_space(struct btrfs_free_space_ctl *ctl,
|
||||||
struct btrfs_free_space *info);
|
struct btrfs_free_space *info);
|
||||||
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
|
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
|
||||||
struct btrfs_free_space *info);
|
struct btrfs_free_space *info);
|
||||||
static int btrfs_wait_cache_io_root(struct btrfs_root *root,
|
static int search_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||||
struct btrfs_trans_handle *trans,
|
struct btrfs_free_space *bitmap_info, u64 *offset,
|
||||||
struct btrfs_io_ctl *io_ctl,
|
u64 *bytes, bool for_alloc);
|
||||||
struct btrfs_path *path);
|
static void free_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||||
|
struct btrfs_free_space *bitmap_info);
|
||||||
|
static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
|
||||||
|
struct btrfs_free_space *info, u64 offset,
|
||||||
|
u64 bytes);
|
||||||
|
|
||||||
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
|
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
|
||||||
struct btrfs_path *path,
|
struct btrfs_path *path,
|
||||||
|
@ -141,17 +142,15 @@ static int __create_free_space_inode(struct btrfs_root *root,
|
||||||
struct btrfs_free_space_header *header;
|
struct btrfs_free_space_header *header;
|
||||||
struct btrfs_inode_item *inode_item;
|
struct btrfs_inode_item *inode_item;
|
||||||
struct extent_buffer *leaf;
|
struct extent_buffer *leaf;
|
||||||
u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
|
/* We inline CRCs for the free disk space cache */
|
||||||
|
const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC |
|
||||||
|
BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = btrfs_insert_empty_inode(trans, root, path, ino);
|
ret = btrfs_insert_empty_inode(trans, root, path, ino);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
/* We inline crc's for the free disk space cache */
|
|
||||||
if (ino != BTRFS_FREE_INO_OBJECTID)
|
|
||||||
flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
|
|
||||||
|
|
||||||
leaf = path->nodes[0];
|
leaf = path->nodes[0];
|
||||||
inode_item = btrfs_item_ptr(leaf, path->slots[0],
|
inode_item = btrfs_item_ptr(leaf, path->slots[0],
|
||||||
struct btrfs_inode_item);
|
struct btrfs_inode_item);
|
||||||
|
@ -207,6 +206,65 @@ int create_free_space_inode(struct btrfs_trans_handle *trans,
|
||||||
ino, block_group->start);
|
ino, block_group->start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode
|
||||||
|
* handles lookup, otherwise it takes ownership and iputs the inode.
|
||||||
|
* Don't reuse an inode pointer after passing it into this function.
|
||||||
|
*/
|
||||||
|
int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
|
||||||
|
struct inode *inode,
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
struct btrfs_path *path;
|
||||||
|
struct btrfs_key key;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
path = btrfs_alloc_path();
|
||||||
|
if (!path)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
if (!inode)
|
||||||
|
inode = lookup_free_space_inode(block_group, path);
|
||||||
|
if (IS_ERR(inode)) {
|
||||||
|
if (PTR_ERR(inode) != -ENOENT)
|
||||||
|
ret = PTR_ERR(inode);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
|
||||||
|
if (ret) {
|
||||||
|
btrfs_add_delayed_iput(inode);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
clear_nlink(inode);
|
||||||
|
/* One for the block groups ref */
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
if (block_group->iref) {
|
||||||
|
block_group->iref = 0;
|
||||||
|
block_group->inode = NULL;
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
iput(inode);
|
||||||
|
} else {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
}
|
||||||
|
/* One for the lookup ref */
|
||||||
|
btrfs_add_delayed_iput(inode);
|
||||||
|
|
||||||
|
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
|
||||||
|
key.type = 0;
|
||||||
|
key.offset = block_group->start;
|
||||||
|
ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path,
|
||||||
|
-1, 1);
|
||||||
|
if (ret) {
|
||||||
|
if (ret > 0)
|
||||||
|
ret = 0;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
ret = btrfs_del_item(trans, trans->fs_info->tree_root, path);
|
||||||
|
out:
|
||||||
|
btrfs_free_path(path);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
|
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_block_rsv *rsv)
|
struct btrfs_block_rsv *rsv)
|
||||||
{
|
{
|
||||||
|
@ -267,12 +325,12 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
|
||||||
* We skip the throttling logic for free space cache inodes, so we don't
|
* We skip the throttling logic for free space cache inodes, so we don't
|
||||||
* need to check for -EAGAIN.
|
* need to check for -EAGAIN.
|
||||||
*/
|
*/
|
||||||
ret = btrfs_truncate_inode_items(trans, root, inode,
|
ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
|
||||||
0, BTRFS_EXTENT_DATA_KEY);
|
0, BTRFS_EXTENT_DATA_KEY);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
if (locked)
|
if (locked)
|
||||||
|
@ -304,16 +362,11 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
|
||||||
int write)
|
int write)
|
||||||
{
|
{
|
||||||
int num_pages;
|
int num_pages;
|
||||||
int check_crcs = 0;
|
|
||||||
|
|
||||||
num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
|
num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
|
||||||
|
|
||||||
if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
|
|
||||||
check_crcs = 1;
|
|
||||||
|
|
||||||
/* Make sure we can fit our crcs and generation into the first page */
|
/* Make sure we can fit our crcs and generation into the first page */
|
||||||
if (write && check_crcs &&
|
if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
|
||||||
(num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
|
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
|
|
||||||
memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
|
memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
|
||||||
|
@ -324,7 +377,6 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
|
||||||
|
|
||||||
io_ctl->num_pages = num_pages;
|
io_ctl->num_pages = num_pages;
|
||||||
io_ctl->fs_info = btrfs_sb(inode->i_sb);
|
io_ctl->fs_info = btrfs_sb(inode->i_sb);
|
||||||
io_ctl->check_crcs = check_crcs;
|
|
||||||
io_ctl->inode = inode;
|
io_ctl->inode = inode;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -419,13 +471,8 @@ static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
|
||||||
* Skip the csum areas. If we don't check crcs then we just have a
|
* Skip the csum areas. If we don't check crcs then we just have a
|
||||||
* 64bit chunk at the front of the first page.
|
* 64bit chunk at the front of the first page.
|
||||||
*/
|
*/
|
||||||
if (io_ctl->check_crcs) {
|
io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
|
||||||
io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
|
io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
|
||||||
io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
|
|
||||||
} else {
|
|
||||||
io_ctl->cur += sizeof(u64);
|
|
||||||
io_ctl->size -= sizeof(u64) * 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
put_unaligned_le64(generation, io_ctl->cur);
|
put_unaligned_le64(generation, io_ctl->cur);
|
||||||
io_ctl->cur += sizeof(u64);
|
io_ctl->cur += sizeof(u64);
|
||||||
|
@ -439,14 +486,8 @@ static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
|
||||||
* Skip the crc area. If we don't check crcs then we just have a 64bit
|
* Skip the crc area. If we don't check crcs then we just have a 64bit
|
||||||
* chunk at the front of the first page.
|
* chunk at the front of the first page.
|
||||||
*/
|
*/
|
||||||
if (io_ctl->check_crcs) {
|
io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
|
||||||
io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
|
io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
|
||||||
io_ctl->size -= sizeof(u64) +
|
|
||||||
(sizeof(u32) * io_ctl->num_pages);
|
|
||||||
} else {
|
|
||||||
io_ctl->cur += sizeof(u64);
|
|
||||||
io_ctl->size -= sizeof(u64) * 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
cache_gen = get_unaligned_le64(io_ctl->cur);
|
cache_gen = get_unaligned_le64(io_ctl->cur);
|
||||||
if (cache_gen != generation) {
|
if (cache_gen != generation) {
|
||||||
|
@ -466,11 +507,6 @@ static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
|
||||||
u32 crc = ~(u32)0;
|
u32 crc = ~(u32)0;
|
||||||
unsigned offset = 0;
|
unsigned offset = 0;
|
||||||
|
|
||||||
if (!io_ctl->check_crcs) {
|
|
||||||
io_ctl_unmap_page(io_ctl);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (index == 0)
|
if (index == 0)
|
||||||
offset = sizeof(u32) * io_ctl->num_pages;
|
offset = sizeof(u32) * io_ctl->num_pages;
|
||||||
|
|
||||||
|
@ -488,11 +524,6 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
|
||||||
u32 crc = ~(u32)0;
|
u32 crc = ~(u32)0;
|
||||||
unsigned offset = 0;
|
unsigned offset = 0;
|
||||||
|
|
||||||
if (!io_ctl->check_crcs) {
|
|
||||||
io_ctl_map_page(io_ctl, 0);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (index == 0)
|
if (index == 0)
|
||||||
offset = sizeof(u32) * io_ctl->num_pages;
|
offset = sizeof(u32) * io_ctl->num_pages;
|
||||||
|
|
||||||
|
@ -625,42 +656,42 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
|
||||||
* Since we attach pinned extents after the fact we can have contiguous sections
|
|
||||||
* of free space that are split up in entries. This poses a problem with the
|
|
||||||
* tree logging stuff since it could have allocated across what appears to be 2
|
|
||||||
* entries since we would have merged the entries when adding the pinned extents
|
|
||||||
* back to the free space cache. So run through the space cache that we just
|
|
||||||
* loaded and merge contiguous entries. This will make the log replay stuff not
|
|
||||||
* blow up and it will make for nicer allocator behavior.
|
|
||||||
*/
|
|
||||||
static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
|
|
||||||
{
|
{
|
||||||
struct btrfs_free_space *e, *prev = NULL;
|
struct btrfs_block_group *block_group = ctl->private;
|
||||||
struct rb_node *n;
|
u64 max_bytes;
|
||||||
|
u64 bitmap_bytes;
|
||||||
|
u64 extent_bytes;
|
||||||
|
u64 size = block_group->length;
|
||||||
|
u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
|
||||||
|
u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
|
||||||
|
|
||||||
again:
|
max_bitmaps = max_t(u64, max_bitmaps, 1);
|
||||||
spin_lock(&ctl->tree_lock);
|
|
||||||
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
|
ASSERT(ctl->total_bitmaps <= max_bitmaps);
|
||||||
e = rb_entry(n, struct btrfs_free_space, offset_index);
|
|
||||||
if (!prev)
|
/*
|
||||||
goto next;
|
* We are trying to keep the total amount of memory used per 1GiB of
|
||||||
if (e->bitmap || prev->bitmap)
|
* space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
|
||||||
goto next;
|
* mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
|
||||||
if (prev->offset + prev->bytes == e->offset) {
|
* bitmaps, we may end up using more memory than this.
|
||||||
unlink_free_space(ctl, prev);
|
*/
|
||||||
unlink_free_space(ctl, e);
|
if (size < SZ_1G)
|
||||||
prev->bytes += e->bytes;
|
max_bytes = MAX_CACHE_BYTES_PER_GIG;
|
||||||
kmem_cache_free(btrfs_free_space_cachep, e);
|
else
|
||||||
link_free_space(ctl, prev);
|
max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
|
||||||
prev = NULL;
|
|
||||||
spin_unlock(&ctl->tree_lock);
|
bitmap_bytes = ctl->total_bitmaps * ctl->unit;
|
||||||
goto again;
|
|
||||||
}
|
/*
|
||||||
next:
|
* we want the extent entry threshold to always be at most 1/2 the max
|
||||||
prev = e;
|
* bytes we can have, or whatever is less than that.
|
||||||
}
|
*/
|
||||||
spin_unlock(&ctl->tree_lock);
|
extent_bytes = max_bytes - bitmap_bytes;
|
||||||
|
extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
|
||||||
|
|
||||||
|
ctl->extents_thresh =
|
||||||
|
div_u64(extent_bytes, sizeof(struct btrfs_free_space));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
|
static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
|
||||||
|
@ -753,16 +784,6 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
|
||||||
goto free_cache;
|
goto free_cache;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Sync discard ensures that the free space cache is always
|
|
||||||
* trimmed. So when reading this in, the state should reflect
|
|
||||||
* that. We also do this for async as a stop gap for lack of
|
|
||||||
* persistence.
|
|
||||||
*/
|
|
||||||
if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
|
|
||||||
btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
|
||||||
e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
|
|
||||||
|
|
||||||
if (!e->bytes) {
|
if (!e->bytes) {
|
||||||
kmem_cache_free(btrfs_free_space_cachep, e);
|
kmem_cache_free(btrfs_free_space_cachep, e);
|
||||||
goto free_cache;
|
goto free_cache;
|
||||||
|
@ -791,7 +812,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
|
||||||
spin_lock(&ctl->tree_lock);
|
spin_lock(&ctl->tree_lock);
|
||||||
ret = link_free_space(ctl, e);
|
ret = link_free_space(ctl, e);
|
||||||
ctl->total_bitmaps++;
|
ctl->total_bitmaps++;
|
||||||
ctl->op->recalc_thresholds(ctl);
|
recalculate_thresholds(ctl);
|
||||||
spin_unlock(&ctl->tree_lock);
|
spin_unlock(&ctl->tree_lock);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
|
@ -816,19 +837,11 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
|
||||||
ret = io_ctl_read_bitmap(&io_ctl, e);
|
ret = io_ctl_read_bitmap(&io_ctl, e);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto free_cache;
|
goto free_cache;
|
||||||
e->bitmap_extents = count_bitmap_extents(ctl, e);
|
|
||||||
if (!btrfs_free_space_trimmed(e)) {
|
|
||||||
ctl->discardable_extents[BTRFS_STAT_CURR] +=
|
|
||||||
e->bitmap_extents;
|
|
||||||
ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
io_ctl_drop_pages(&io_ctl);
|
io_ctl_drop_pages(&io_ctl);
|
||||||
merge_space_tree(ctl);
|
|
||||||
ret = 1;
|
ret = 1;
|
||||||
out:
|
out:
|
||||||
btrfs_discard_update_discardable(ctl->private, ctl);
|
|
||||||
io_ctl_free(&io_ctl);
|
io_ctl_free(&io_ctl);
|
||||||
return ret;
|
return ret;
|
||||||
free_cache:
|
free_cache:
|
||||||
|
@ -837,16 +850,59 @@ free_cache:
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int copy_free_space_cache(struct btrfs_block_group *block_group,
|
||||||
|
struct btrfs_free_space_ctl *ctl)
|
||||||
|
{
|
||||||
|
struct btrfs_free_space *info;
|
||||||
|
struct rb_node *n;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
|
||||||
|
info = rb_entry(n, struct btrfs_free_space, offset_index);
|
||||||
|
if (!info->bitmap) {
|
||||||
|
unlink_free_space(ctl, info);
|
||||||
|
ret = btrfs_add_free_space(block_group, info->offset,
|
||||||
|
info->bytes);
|
||||||
|
kmem_cache_free(btrfs_free_space_cachep, info);
|
||||||
|
} else {
|
||||||
|
u64 offset = info->offset;
|
||||||
|
u64 bytes = ctl->unit;
|
||||||
|
|
||||||
|
while (search_bitmap(ctl, info, &offset, &bytes,
|
||||||
|
false) == 0) {
|
||||||
|
ret = btrfs_add_free_space(block_group, offset,
|
||||||
|
bytes);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
|
bitmap_clear_bits(ctl, info, offset, bytes);
|
||||||
|
offset = info->offset;
|
||||||
|
bytes = ctl->unit;
|
||||||
|
}
|
||||||
|
free_bitmap(ctl, info);
|
||||||
|
}
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int load_free_space_cache(struct btrfs_block_group *block_group)
|
int load_free_space_cache(struct btrfs_block_group *block_group)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||||
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
|
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
|
||||||
|
struct btrfs_free_space_ctl tmp_ctl = {};
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct btrfs_path *path;
|
struct btrfs_path *path;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
bool matched;
|
bool matched;
|
||||||
u64 used = block_group->used;
|
u64 used = block_group->used;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Because we could potentially discard our loaded free space, we want
|
||||||
|
* to load everything into a temporary structure first, and then if it's
|
||||||
|
* valid copy it all into the actual free space ctl.
|
||||||
|
*/
|
||||||
|
btrfs_init_free_space_ctl(block_group, &tmp_ctl);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this block group has been marked to be cleared for one reason or
|
* If this block group has been marked to be cleared for one reason or
|
||||||
* another then we can't trust the on disk cache, so just return.
|
* another then we can't trust the on disk cache, so just return.
|
||||||
|
@ -898,19 +954,25 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
|
||||||
}
|
}
|
||||||
spin_unlock(&block_group->lock);
|
spin_unlock(&block_group->lock);
|
||||||
|
|
||||||
ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
|
ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
|
||||||
path, block_group->start);
|
path, block_group->start);
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
spin_lock(&ctl->tree_lock);
|
matched = (tmp_ctl.free_space == (block_group->length - used -
|
||||||
matched = (ctl->free_space == (block_group->length - used -
|
block_group->bytes_super));
|
||||||
block_group->bytes_super));
|
|
||||||
spin_unlock(&ctl->tree_lock);
|
|
||||||
|
|
||||||
if (!matched) {
|
if (matched) {
|
||||||
__btrfs_remove_free_space_cache(ctl);
|
ret = copy_free_space_cache(block_group, &tmp_ctl);
|
||||||
|
/*
|
||||||
|
* ret == 1 means we successfully loaded the free space cache,
|
||||||
|
* so we need to re-set it here.
|
||||||
|
*/
|
||||||
|
if (ret == 0)
|
||||||
|
ret = 1;
|
||||||
|
} else {
|
||||||
|
__btrfs_remove_free_space_cache(&tmp_ctl);
|
||||||
btrfs_warn(fs_info,
|
btrfs_warn(fs_info,
|
||||||
"block group %llu has wrong amount of free space",
|
"block group %llu has wrong amount of free space",
|
||||||
block_group->start);
|
block_group->start);
|
||||||
|
@ -929,6 +991,9 @@ out:
|
||||||
block_group->start);
|
block_group->start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_lock(&ctl->tree_lock);
|
||||||
|
btrfs_discard_update_discardable(block_group);
|
||||||
|
spin_unlock(&ctl->tree_lock);
|
||||||
iput(inode);
|
iput(inode);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -1191,7 +1256,7 @@ out:
|
||||||
"failed to write free space cache for block group %llu error %d",
|
"failed to write free space cache for block group %llu error %d",
|
||||||
block_group->start, ret);
|
block_group->start, ret);
|
||||||
}
|
}
|
||||||
btrfs_update_inode(trans, root, inode);
|
btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
|
|
||||||
if (block_group) {
|
if (block_group) {
|
||||||
/* the dirty list is protected by the dirty_bgs_lock */
|
/* the dirty list is protected by the dirty_bgs_lock */
|
||||||
|
@ -1220,14 +1285,6 @@ out:
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int btrfs_wait_cache_io_root(struct btrfs_root *root,
|
|
||||||
struct btrfs_trans_handle *trans,
|
|
||||||
struct btrfs_io_ctl *io_ctl,
|
|
||||||
struct btrfs_path *path)
|
|
||||||
{
|
|
||||||
return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
|
int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_block_group *block_group,
|
struct btrfs_block_group *block_group,
|
||||||
struct btrfs_path *path)
|
struct btrfs_path *path)
|
||||||
|
@ -1332,7 +1389,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
||||||
/* Everything is written out, now we dirty the pages in the file. */
|
/* Everything is written out, now we dirty the pages in the file. */
|
||||||
ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
|
ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
|
||||||
io_ctl->num_pages, 0, i_size_read(inode),
|
io_ctl->num_pages, 0, i_size_read(inode),
|
||||||
&cached_state);
|
&cached_state, false);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_nospc;
|
goto out_nospc;
|
||||||
|
|
||||||
|
@ -1381,7 +1438,7 @@ out:
|
||||||
invalidate_inode_pages2(inode->i_mapping);
|
invalidate_inode_pages2(inode->i_mapping);
|
||||||
BTRFS_I(inode)->generation = 0;
|
BTRFS_I(inode)->generation = 0;
|
||||||
}
|
}
|
||||||
btrfs_update_inode(trans, root, inode);
|
btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
if (must_iput)
|
if (must_iput)
|
||||||
iput(inode);
|
iput(inode);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1672,44 +1729,6 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
|
|
||||||
{
|
|
||||||
struct btrfs_block_group *block_group = ctl->private;
|
|
||||||
u64 max_bytes;
|
|
||||||
u64 bitmap_bytes;
|
|
||||||
u64 extent_bytes;
|
|
||||||
u64 size = block_group->length;
|
|
||||||
u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
|
|
||||||
u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
|
|
||||||
|
|
||||||
max_bitmaps = max_t(u64, max_bitmaps, 1);
|
|
||||||
|
|
||||||
ASSERT(ctl->total_bitmaps <= max_bitmaps);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We are trying to keep the total amount of memory used per 1GiB of
|
|
||||||
* space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
|
|
||||||
* mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
|
|
||||||
* bitmaps, we may end up using more memory than this.
|
|
||||||
*/
|
|
||||||
if (size < SZ_1G)
|
|
||||||
max_bytes = MAX_CACHE_BYTES_PER_GIG;
|
|
||||||
else
|
|
||||||
max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
|
|
||||||
|
|
||||||
bitmap_bytes = ctl->total_bitmaps * ctl->unit;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* we want the extent entry threshold to always be at most 1/2 the max
|
|
||||||
* bytes we can have, or whatever is less than that.
|
|
||||||
*/
|
|
||||||
extent_bytes = max_bytes - bitmap_bytes;
|
|
||||||
extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
|
|
||||||
|
|
||||||
ctl->extents_thresh =
|
|
||||||
div_u64(extent_bytes, sizeof(struct btrfs_free_space));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
|
static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
|
||||||
struct btrfs_free_space *info,
|
struct btrfs_free_space *info,
|
||||||
u64 offset, u64 bytes)
|
u64 offset, u64 bytes)
|
||||||
|
@ -1912,29 +1931,6 @@ out:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
|
|
||||||
struct btrfs_free_space *bitmap_info)
|
|
||||||
{
|
|
||||||
struct btrfs_block_group *block_group = ctl->private;
|
|
||||||
u64 bytes = bitmap_info->bytes;
|
|
||||||
unsigned int rs, re;
|
|
||||||
int count = 0;
|
|
||||||
|
|
||||||
if (!block_group || !bytes)
|
|
||||||
return count;
|
|
||||||
|
|
||||||
bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
|
|
||||||
BITS_PER_BITMAP) {
|
|
||||||
bytes -= (rs - re) * ctl->unit;
|
|
||||||
count++;
|
|
||||||
|
|
||||||
if (!bytes)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
|
static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||||
struct btrfs_free_space *info, u64 offset)
|
struct btrfs_free_space *info, u64 offset)
|
||||||
{
|
{
|
||||||
|
@ -1944,8 +1940,7 @@ static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||||
INIT_LIST_HEAD(&info->list);
|
INIT_LIST_HEAD(&info->list);
|
||||||
link_free_space(ctl, info);
|
link_free_space(ctl, info);
|
||||||
ctl->total_bitmaps++;
|
ctl->total_bitmaps++;
|
||||||
|
recalculate_thresholds(ctl);
|
||||||
ctl->op->recalc_thresholds(ctl);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void free_bitmap(struct btrfs_free_space_ctl *ctl,
|
static void free_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||||
|
@ -1967,7 +1962,7 @@ static void free_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||||
kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
|
kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
|
||||||
kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
|
kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
|
||||||
ctl->total_bitmaps--;
|
ctl->total_bitmaps--;
|
||||||
ctl->op->recalc_thresholds(ctl);
|
recalculate_thresholds(ctl);
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
|
static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||||
|
@ -2134,7 +2129,6 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct btrfs_free_space_op free_space_op = {
|
static const struct btrfs_free_space_op free_space_op = {
|
||||||
.recalc_thresholds = recalculate_thresholds,
|
|
||||||
.use_bitmap = use_bitmap,
|
.use_bitmap = use_bitmap,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2508,7 +2502,7 @@ link:
|
||||||
if (ret)
|
if (ret)
|
||||||
kmem_cache_free(btrfs_free_space_cachep, info);
|
kmem_cache_free(btrfs_free_space_cachep, info);
|
||||||
out:
|
out:
|
||||||
btrfs_discard_update_discardable(block_group, ctl);
|
btrfs_discard_update_discardable(block_group);
|
||||||
spin_unlock(&ctl->tree_lock);
|
spin_unlock(&ctl->tree_lock);
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -2643,7 +2637,7 @@ again:
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
out_lock:
|
out_lock:
|
||||||
btrfs_discard_update_discardable(block_group, ctl);
|
btrfs_discard_update_discardable(block_group);
|
||||||
spin_unlock(&ctl->tree_lock);
|
spin_unlock(&ctl->tree_lock);
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -2674,10 +2668,10 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
|
||||||
"%d blocks of free space at or bigger than bytes is", count);
|
"%d blocks of free space at or bigger than bytes is", count);
|
||||||
}
|
}
|
||||||
|
|
||||||
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
|
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
|
||||||
|
struct btrfs_free_space_ctl *ctl)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||||
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
|
|
||||||
|
|
||||||
spin_lock_init(&ctl->tree_lock);
|
spin_lock_init(&ctl->tree_lock);
|
||||||
ctl->unit = fs_info->sectorsize;
|
ctl->unit = fs_info->sectorsize;
|
||||||
|
@ -2779,7 +2773,7 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
|
||||||
spin_lock(&ctl->tree_lock);
|
spin_lock(&ctl->tree_lock);
|
||||||
__btrfs_remove_free_space_cache_locked(ctl);
|
__btrfs_remove_free_space_cache_locked(ctl);
|
||||||
if (ctl->private)
|
if (ctl->private)
|
||||||
btrfs_discard_update_discardable(ctl->private, ctl);
|
btrfs_discard_update_discardable(ctl->private);
|
||||||
spin_unlock(&ctl->tree_lock);
|
spin_unlock(&ctl->tree_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2801,7 +2795,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
|
||||||
cond_resched_lock(&ctl->tree_lock);
|
cond_resched_lock(&ctl->tree_lock);
|
||||||
}
|
}
|
||||||
__btrfs_remove_free_space_cache_locked(ctl);
|
__btrfs_remove_free_space_cache_locked(ctl);
|
||||||
btrfs_discard_update_discardable(block_group, ctl);
|
btrfs_discard_update_discardable(block_group);
|
||||||
spin_unlock(&ctl->tree_lock);
|
spin_unlock(&ctl->tree_lock);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -2885,7 +2879,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
|
||||||
link_free_space(ctl, entry);
|
link_free_space(ctl, entry);
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
btrfs_discard_update_discardable(block_group, ctl);
|
btrfs_discard_update_discardable(block_group);
|
||||||
spin_unlock(&ctl->tree_lock);
|
spin_unlock(&ctl->tree_lock);
|
||||||
|
|
||||||
if (align_gap_len)
|
if (align_gap_len)
|
||||||
|
@ -3054,7 +3048,7 @@ out:
|
||||||
kmem_cache_free(btrfs_free_space_bitmap_cachep,
|
kmem_cache_free(btrfs_free_space_bitmap_cachep,
|
||||||
entry->bitmap);
|
entry->bitmap);
|
||||||
ctl->total_bitmaps--;
|
ctl->total_bitmaps--;
|
||||||
ctl->op->recalc_thresholds(ctl);
|
recalculate_thresholds(ctl);
|
||||||
} else if (!btrfs_free_space_trimmed(entry)) {
|
} else if (!btrfs_free_space_trimmed(entry)) {
|
||||||
ctl->discardable_extents[BTRFS_STAT_CURR]--;
|
ctl->discardable_extents[BTRFS_STAT_CURR]--;
|
||||||
}
|
}
|
||||||
|
@ -3828,166 +3822,62 @@ int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info)
|
||||||
* Find the left-most item in the cache tree, and then return the
|
|
||||||
* smallest inode number in the item.
|
|
||||||
*
|
|
||||||
* Note: the returned inode number may not be the smallest one in
|
|
||||||
* the tree, if the left-most item is a bitmap.
|
|
||||||
*/
|
|
||||||
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
|
|
||||||
{
|
{
|
||||||
struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
|
return btrfs_super_cache_generation(fs_info->super_copy);
|
||||||
struct btrfs_free_space *entry = NULL;
|
}
|
||||||
u64 ino = 0;
|
|
||||||
|
|
||||||
spin_lock(&ctl->tree_lock);
|
static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
|
||||||
|
struct btrfs_trans_handle *trans)
|
||||||
|
{
|
||||||
|
struct btrfs_block_group *block_group;
|
||||||
|
struct rb_node *node;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (RB_EMPTY_ROOT(&ctl->free_space_offset))
|
btrfs_info(fs_info, "cleaning free space cache v1");
|
||||||
goto out;
|
|
||||||
|
|
||||||
entry = rb_entry(rb_first(&ctl->free_space_offset),
|
node = rb_first(&fs_info->block_group_cache_tree);
|
||||||
struct btrfs_free_space, offset_index);
|
while (node) {
|
||||||
|
block_group = rb_entry(node, struct btrfs_block_group, cache_node);
|
||||||
if (!entry->bitmap) {
|
ret = btrfs_remove_free_space_inode(trans, NULL, block_group);
|
||||||
ino = entry->offset;
|
if (ret)
|
||||||
|
goto out;
|
||||||
unlink_free_space(ctl, entry);
|
node = rb_next(node);
|
||||||
entry->offset++;
|
|
||||||
entry->bytes--;
|
|
||||||
if (!entry->bytes)
|
|
||||||
kmem_cache_free(btrfs_free_space_cachep, entry);
|
|
||||||
else
|
|
||||||
link_free_space(ctl, entry);
|
|
||||||
} else {
|
|
||||||
u64 offset = 0;
|
|
||||||
u64 count = 1;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = search_bitmap(ctl, entry, &offset, &count, true);
|
|
||||||
/* Logic error; Should be empty if it can't find anything */
|
|
||||||
ASSERT(!ret);
|
|
||||||
|
|
||||||
ino = offset;
|
|
||||||
bitmap_clear_bits(ctl, entry, offset, 1);
|
|
||||||
if (entry->bytes == 0)
|
|
||||||
free_bitmap(ctl, entry);
|
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
spin_unlock(&ctl->tree_lock);
|
|
||||||
|
|
||||||
return ino;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
|
|
||||||
struct btrfs_path *path)
|
|
||||||
{
|
|
||||||
struct inode *inode = NULL;
|
|
||||||
|
|
||||||
spin_lock(&root->ino_cache_lock);
|
|
||||||
if (root->ino_cache_inode)
|
|
||||||
inode = igrab(root->ino_cache_inode);
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
if (inode)
|
|
||||||
return inode;
|
|
||||||
|
|
||||||
inode = __lookup_free_space_inode(root, path, 0);
|
|
||||||
if (IS_ERR(inode))
|
|
||||||
return inode;
|
|
||||||
|
|
||||||
spin_lock(&root->ino_cache_lock);
|
|
||||||
if (!btrfs_fs_closing(root->fs_info))
|
|
||||||
root->ino_cache_inode = igrab(inode);
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
|
|
||||||
return inode;
|
|
||||||
}
|
|
||||||
|
|
||||||
int create_free_ino_inode(struct btrfs_root *root,
|
|
||||||
struct btrfs_trans_handle *trans,
|
|
||||||
struct btrfs_path *path)
|
|
||||||
{
|
|
||||||
return __create_free_space_inode(root, trans, path,
|
|
||||||
BTRFS_FREE_INO_OBJECTID, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
|
|
||||||
{
|
|
||||||
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
|
|
||||||
struct btrfs_path *path;
|
|
||||||
struct inode *inode;
|
|
||||||
int ret = 0;
|
|
||||||
u64 root_gen = btrfs_root_generation(&root->root_item);
|
|
||||||
|
|
||||||
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we're unmounting then just return, since this does a search on the
|
|
||||||
* normal root and not the commit root and we could deadlock.
|
|
||||||
*/
|
|
||||||
if (btrfs_fs_closing(fs_info))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
path = btrfs_alloc_path();
|
|
||||||
if (!path)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
inode = lookup_free_ino_inode(root, path);
|
|
||||||
if (IS_ERR(inode))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
if (root_gen != BTRFS_I(inode)->generation)
|
|
||||||
goto out_put;
|
|
||||||
|
|
||||||
ret = __load_free_space_cache(root, inode, ctl, path, 0);
|
|
||||||
|
|
||||||
if (ret < 0)
|
|
||||||
btrfs_err(fs_info,
|
|
||||||
"failed to load free ino cache for root %llu",
|
|
||||||
root->root_key.objectid);
|
|
||||||
out_put:
|
|
||||||
iput(inode);
|
|
||||||
out:
|
|
||||||
btrfs_free_path(path);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_write_out_ino_cache(struct btrfs_root *root,
|
int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active)
|
||||||
struct btrfs_trans_handle *trans,
|
|
||||||
struct btrfs_path *path,
|
|
||||||
struct inode *inode)
|
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_trans_handle *trans;
|
||||||
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
|
|
||||||
int ret;
|
int ret;
|
||||||
struct btrfs_io_ctl io_ctl;
|
|
||||||
bool release_metadata = true;
|
|
||||||
|
|
||||||
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
|
/*
|
||||||
return 0;
|
* update_super_roots will appropriately set or unset
|
||||||
|
* super_copy->cache_generation based on SPACE_CACHE and
|
||||||
|
* BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a
|
||||||
|
* transaction commit whether we are enabling space cache v1 and don't
|
||||||
|
* have any other work to do, or are disabling it and removing free
|
||||||
|
* space inodes.
|
||||||
|
*/
|
||||||
|
trans = btrfs_start_transaction(fs_info->tree_root, 0);
|
||||||
|
if (IS_ERR(trans))
|
||||||
|
return PTR_ERR(trans);
|
||||||
|
|
||||||
memset(&io_ctl, 0, sizeof(io_ctl));
|
if (!active) {
|
||||||
ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans);
|
set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
|
||||||
if (!ret) {
|
ret = cleanup_free_space_cache_v1(fs_info, trans);
|
||||||
/*
|
if (ret) {
|
||||||
* At this point writepages() didn't error out, so our metadata
|
btrfs_abort_transaction(trans, ret);
|
||||||
* reservation is released when the writeback finishes, at
|
btrfs_end_transaction(trans);
|
||||||
* inode.c:btrfs_finish_ordered_io(), regardless of it finishing
|
goto out;
|
||||||
* with or without an error.
|
}
|
||||||
*/
|
|
||||||
release_metadata = false;
|
|
||||||
ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret) {
|
ret = btrfs_commit_transaction(trans);
|
||||||
if (release_metadata)
|
out:
|
||||||
btrfs_delalloc_release_metadata(BTRFS_I(inode),
|
clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
|
||||||
inode->i_size, true);
|
|
||||||
btrfs_debug(fs_info,
|
|
||||||
"failed to write free ino cache for root %llu error %d",
|
|
||||||
root->root_key.objectid, ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,7 +60,6 @@ struct btrfs_free_space_ctl {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btrfs_free_space_op {
|
struct btrfs_free_space_op {
|
||||||
void (*recalc_thresholds)(struct btrfs_free_space_ctl *ctl);
|
|
||||||
bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl,
|
bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl,
|
||||||
struct btrfs_free_space *info);
|
struct btrfs_free_space *info);
|
||||||
};
|
};
|
||||||
|
@ -76,7 +75,6 @@ struct btrfs_io_ctl {
|
||||||
int num_pages;
|
int num_pages;
|
||||||
int entries;
|
int entries;
|
||||||
int bitmaps;
|
int bitmaps;
|
||||||
unsigned check_crcs:1;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
|
struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
|
||||||
|
@ -84,6 +82,9 @@ struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
|
||||||
int create_free_space_inode(struct btrfs_trans_handle *trans,
|
int create_free_space_inode(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_block_group *block_group,
|
struct btrfs_block_group *block_group,
|
||||||
struct btrfs_path *path);
|
struct btrfs_path *path);
|
||||||
|
int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
|
||||||
|
struct inode *inode,
|
||||||
|
struct btrfs_block_group *block_group);
|
||||||
|
|
||||||
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
|
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_block_rsv *rsv);
|
struct btrfs_block_rsv *rsv);
|
||||||
|
@ -97,19 +98,9 @@ int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
|
||||||
int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
|
int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_block_group *block_group,
|
struct btrfs_block_group *block_group,
|
||||||
struct btrfs_path *path);
|
struct btrfs_path *path);
|
||||||
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
|
|
||||||
struct btrfs_path *path);
|
|
||||||
int create_free_ino_inode(struct btrfs_root *root,
|
|
||||||
struct btrfs_trans_handle *trans,
|
|
||||||
struct btrfs_path *path);
|
|
||||||
int load_free_ino_cache(struct btrfs_fs_info *fs_info,
|
|
||||||
struct btrfs_root *root);
|
|
||||||
int btrfs_write_out_ino_cache(struct btrfs_root *root,
|
|
||||||
struct btrfs_trans_handle *trans,
|
|
||||||
struct btrfs_path *path,
|
|
||||||
struct inode *inode);
|
|
||||||
|
|
||||||
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group);
|
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
|
||||||
|
struct btrfs_free_space_ctl *ctl);
|
||||||
int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
|
int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_free_space_ctl *ctl,
|
struct btrfs_free_space_ctl *ctl,
|
||||||
u64 bytenr, u64 size,
|
u64 bytenr, u64 size,
|
||||||
|
@ -126,7 +117,6 @@ bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group);
|
||||||
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
|
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
|
||||||
u64 offset, u64 bytes, u64 empty_size,
|
u64 offset, u64 bytes, u64 empty_size,
|
||||||
u64 *max_extent_size);
|
u64 *max_extent_size);
|
||||||
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
|
|
||||||
void btrfs_dump_free_space(struct btrfs_block_group *block_group,
|
void btrfs_dump_free_space(struct btrfs_block_group *block_group,
|
||||||
u64 bytes);
|
u64 bytes);
|
||||||
int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
|
int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
|
||||||
|
@ -148,6 +138,8 @@ int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
|
||||||
u64 *trimmed, u64 start, u64 end, u64 minlen,
|
u64 *trimmed, u64 start, u64 end, u64 minlen,
|
||||||
u64 maxlen, bool async);
|
u64 maxlen, bool async);
|
||||||
|
|
||||||
|
bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info);
|
||||||
|
int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active);
|
||||||
/* Support functions for running our sanity tests */
|
/* Support functions for running our sanity tests */
|
||||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||||
int test_add_free_space_entry(struct btrfs_block_group *cache,
|
int test_add_free_space_entry(struct btrfs_block_group *cache,
|
||||||
|
|
|
@ -136,9 +136,10 @@ static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
|
static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
|
||||||
|
u64 size)
|
||||||
{
|
{
|
||||||
return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
|
return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long *alloc_bitmap(u32 bitmap_size)
|
static unsigned long *alloc_bitmap(u32 bitmap_size)
|
||||||
|
@ -200,8 +201,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
|
||||||
int done = 0, nr;
|
int done = 0, nr;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
bitmap_size = free_space_bitmap_size(block_group->length,
|
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
|
||||||
fs_info->sectorsize);
|
|
||||||
bitmap = alloc_bitmap(bitmap_size);
|
bitmap = alloc_bitmap(bitmap_size);
|
||||||
if (!bitmap) {
|
if (!bitmap) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
|
@ -290,8 +290,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
|
||||||
u32 data_size;
|
u32 data_size;
|
||||||
|
|
||||||
extent_size = min(end - i, bitmap_range);
|
extent_size = min(end - i, bitmap_range);
|
||||||
data_size = free_space_bitmap_size(extent_size,
|
data_size = free_space_bitmap_size(fs_info, extent_size);
|
||||||
fs_info->sectorsize);
|
|
||||||
|
|
||||||
key.objectid = i;
|
key.objectid = i;
|
||||||
key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
|
key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
|
||||||
|
@ -339,8 +338,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
|
||||||
int done = 0, nr;
|
int done = 0, nr;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
bitmap_size = free_space_bitmap_size(block_group->length,
|
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
|
||||||
fs_info->sectorsize);
|
|
||||||
bitmap = alloc_bitmap(bitmap_size);
|
bitmap = alloc_bitmap(bitmap_size);
|
||||||
if (!bitmap) {
|
if (!bitmap) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
|
@ -383,8 +381,8 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
|
||||||
fs_info->sectorsize *
|
fs_info->sectorsize *
|
||||||
BITS_PER_BYTE);
|
BITS_PER_BYTE);
|
||||||
bitmap_cursor = ((char *)bitmap) + bitmap_pos;
|
bitmap_cursor = ((char *)bitmap) + bitmap_pos;
|
||||||
data_size = free_space_bitmap_size(found_key.offset,
|
data_size = free_space_bitmap_size(fs_info,
|
||||||
fs_info->sectorsize);
|
found_key.offset);
|
||||||
|
|
||||||
ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
|
ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
|
||||||
read_extent_buffer(leaf, bitmap_cursor, ptr,
|
read_extent_buffer(leaf, bitmap_cursor, ptr,
|
||||||
|
@ -416,7 +414,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
|
||||||
btrfs_mark_buffer_dirty(leaf);
|
btrfs_mark_buffer_dirty(leaf);
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
|
|
||||||
nrbits = div_u64(block_group->length, block_group->fs_info->sectorsize);
|
nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
|
||||||
start_bit = find_next_bit_le(bitmap, nrbits, 0);
|
start_bit = find_next_bit_le(bitmap, nrbits, 0);
|
||||||
|
|
||||||
while (start_bit < nrbits) {
|
while (start_bit < nrbits) {
|
||||||
|
@ -540,8 +538,8 @@ static void free_space_set_bits(struct btrfs_block_group *block_group,
|
||||||
end = found_end;
|
end = found_end;
|
||||||
|
|
||||||
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
||||||
first = div_u64(*start - found_start, fs_info->sectorsize);
|
first = (*start - found_start) >> fs_info->sectorsize_bits;
|
||||||
last = div_u64(end - found_start, fs_info->sectorsize);
|
last = (end - found_start) >> fs_info->sectorsize_bits;
|
||||||
if (bit)
|
if (bit)
|
||||||
extent_buffer_bitmap_set(leaf, ptr, first, last - first);
|
extent_buffer_bitmap_set(leaf, ptr, first, last - first);
|
||||||
else
|
else
|
||||||
|
@ -1195,8 +1193,6 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
key.objectid = 0;
|
key.objectid = 0;
|
||||||
key.type = 0;
|
key.type = 0;
|
||||||
key.offset = 0;
|
key.offset = 0;
|
||||||
|
|
|
@ -119,8 +119,6 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
ret = -ENOENT;
|
ret = -ENOENT;
|
||||||
|
@ -193,8 +191,6 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||||
if (ret > 0) {
|
if (ret > 0) {
|
||||||
ret = -ENOENT;
|
ret = -ENOENT;
|
||||||
|
@ -270,7 +266,6 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_insert_empty_item(trans, root, path, &key,
|
ret = btrfs_insert_empty_item(trans, root, path, &key,
|
||||||
ins_len);
|
ins_len);
|
||||||
if (ret == -EEXIST) {
|
if (ret == -EEXIST) {
|
||||||
|
@ -327,7 +322,6 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
path->skip_release_on_error = 1;
|
path->skip_release_on_error = 1;
|
||||||
ret = btrfs_insert_empty_item(trans, root, path, &key,
|
ret = btrfs_insert_empty_item(trans, root, path, &key,
|
||||||
ins_len);
|
ins_len);
|
||||||
|
|
|
@ -1,582 +0,0 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
|
||||||
/*
|
|
||||||
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <linux/kthread.h>
|
|
||||||
#include <linux/pagemap.h>
|
|
||||||
|
|
||||||
#include "ctree.h"
|
|
||||||
#include "disk-io.h"
|
|
||||||
#include "free-space-cache.h"
|
|
||||||
#include "inode-map.h"
|
|
||||||
#include "transaction.h"
|
|
||||||
#include "delalloc-space.h"
|
|
||||||
|
|
||||||
static void fail_caching_thread(struct btrfs_root *root)
|
|
||||||
{
|
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
||||||
|
|
||||||
btrfs_warn(fs_info, "failed to start inode caching task");
|
|
||||||
btrfs_clear_pending_and_info(fs_info, INODE_MAP_CACHE,
|
|
||||||
"disabling inode map caching");
|
|
||||||
spin_lock(&root->ino_cache_lock);
|
|
||||||
root->ino_cache_state = BTRFS_CACHE_ERROR;
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
wake_up(&root->ino_cache_wait);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int caching_kthread(void *data)
|
|
||||||
{
|
|
||||||
struct btrfs_root *root = data;
|
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
||||||
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
|
|
||||||
struct btrfs_key key;
|
|
||||||
struct btrfs_path *path;
|
|
||||||
struct extent_buffer *leaf;
|
|
||||||
u64 last = (u64)-1;
|
|
||||||
int slot;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
path = btrfs_alloc_path();
|
|
||||||
if (!path) {
|
|
||||||
fail_caching_thread(root);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Since the commit root is read-only, we can safely skip locking. */
|
|
||||||
path->skip_locking = 1;
|
|
||||||
path->search_commit_root = 1;
|
|
||||||
path->reada = READA_FORWARD;
|
|
||||||
|
|
||||||
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
|
|
||||||
key.offset = 0;
|
|
||||||
key.type = BTRFS_INODE_ITEM_KEY;
|
|
||||||
again:
|
|
||||||
/* need to make sure the commit_root doesn't disappear */
|
|
||||||
down_read(&fs_info->commit_root_sem);
|
|
||||||
|
|
||||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
if (btrfs_fs_closing(fs_info))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
leaf = path->nodes[0];
|
|
||||||
slot = path->slots[0];
|
|
||||||
if (slot >= btrfs_header_nritems(leaf)) {
|
|
||||||
ret = btrfs_next_leaf(root, path);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out;
|
|
||||||
else if (ret > 0)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (need_resched() ||
|
|
||||||
btrfs_transaction_in_commit(fs_info)) {
|
|
||||||
leaf = path->nodes[0];
|
|
||||||
|
|
||||||
if (WARN_ON(btrfs_header_nritems(leaf) == 0))
|
|
||||||
break;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Save the key so we can advances forward
|
|
||||||
* in the next search.
|
|
||||||
*/
|
|
||||||
btrfs_item_key_to_cpu(leaf, &key, 0);
|
|
||||||
btrfs_release_path(path);
|
|
||||||
root->ino_cache_progress = last;
|
|
||||||
up_read(&fs_info->commit_root_sem);
|
|
||||||
schedule_timeout(1);
|
|
||||||
goto again;
|
|
||||||
} else
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
|
||||||
|
|
||||||
if (key.type != BTRFS_INODE_ITEM_KEY)
|
|
||||||
goto next;
|
|
||||||
|
|
||||||
if (key.objectid >= root->highest_objectid)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (last != (u64)-1 && last + 1 != key.objectid) {
|
|
||||||
__btrfs_add_free_space(fs_info, ctl, last + 1,
|
|
||||||
key.objectid - last - 1, 0);
|
|
||||||
wake_up(&root->ino_cache_wait);
|
|
||||||
}
|
|
||||||
|
|
||||||
last = key.objectid;
|
|
||||||
next:
|
|
||||||
path->slots[0]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (last < root->highest_objectid - 1) {
|
|
||||||
__btrfs_add_free_space(fs_info, ctl, last + 1,
|
|
||||||
root->highest_objectid - last - 1, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_lock(&root->ino_cache_lock);
|
|
||||||
root->ino_cache_state = BTRFS_CACHE_FINISHED;
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
|
|
||||||
root->ino_cache_progress = (u64)-1;
|
|
||||||
btrfs_unpin_free_ino(root);
|
|
||||||
out:
|
|
||||||
wake_up(&root->ino_cache_wait);
|
|
||||||
up_read(&fs_info->commit_root_sem);
|
|
||||||
|
|
||||||
btrfs_free_path(path);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void start_caching(struct btrfs_root *root)
|
|
||||||
{
|
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
||||||
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
|
|
||||||
struct task_struct *tsk;
|
|
||||||
int ret;
|
|
||||||
u64 objectid;
|
|
||||||
|
|
||||||
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
|
|
||||||
return;
|
|
||||||
|
|
||||||
spin_lock(&root->ino_cache_lock);
|
|
||||||
if (root->ino_cache_state != BTRFS_CACHE_NO) {
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
root->ino_cache_state = BTRFS_CACHE_STARTED;
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
|
|
||||||
ret = load_free_ino_cache(fs_info, root);
|
|
||||||
if (ret == 1) {
|
|
||||||
spin_lock(&root->ino_cache_lock);
|
|
||||||
root->ino_cache_state = BTRFS_CACHE_FINISHED;
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
wake_up(&root->ino_cache_wait);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* It can be quite time-consuming to fill the cache by searching
|
|
||||||
* through the extent tree, and this can keep ino allocation path
|
|
||||||
* waiting. Therefore at start we quickly find out the highest
|
|
||||||
* inode number and we know we can use inode numbers which fall in
|
|
||||||
* [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
|
|
||||||
*/
|
|
||||||
ret = btrfs_find_free_objectid(root, &objectid);
|
|
||||||
if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
|
|
||||||
__btrfs_add_free_space(fs_info, ctl, objectid,
|
|
||||||
BTRFS_LAST_FREE_OBJECTID - objectid + 1,
|
|
||||||
0);
|
|
||||||
wake_up(&root->ino_cache_wait);
|
|
||||||
}
|
|
||||||
|
|
||||||
tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu",
|
|
||||||
root->root_key.objectid);
|
|
||||||
if (IS_ERR(tsk))
|
|
||||||
fail_caching_thread(root);
|
|
||||||
}
|
|
||||||
|
|
||||||
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
|
|
||||||
{
|
|
||||||
if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
|
|
||||||
return btrfs_find_free_objectid(root, objectid);
|
|
||||||
|
|
||||||
again:
|
|
||||||
*objectid = btrfs_find_ino_for_alloc(root);
|
|
||||||
|
|
||||||
if (*objectid != 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
start_caching(root);
|
|
||||||
|
|
||||||
wait_event(root->ino_cache_wait,
|
|
||||||
root->ino_cache_state == BTRFS_CACHE_FINISHED ||
|
|
||||||
root->ino_cache_state == BTRFS_CACHE_ERROR ||
|
|
||||||
root->free_ino_ctl->free_space > 0);
|
|
||||||
|
|
||||||
if (root->ino_cache_state == BTRFS_CACHE_FINISHED &&
|
|
||||||
root->free_ino_ctl->free_space == 0)
|
|
||||||
return -ENOSPC;
|
|
||||||
else if (root->ino_cache_state == BTRFS_CACHE_ERROR)
|
|
||||||
return btrfs_find_free_objectid(root, objectid);
|
|
||||||
else
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
|
|
||||||
void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
|
|
||||||
{
|
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
||||||
struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
|
|
||||||
|
|
||||||
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
|
|
||||||
return;
|
|
||||||
again:
|
|
||||||
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
|
|
||||||
__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
|
|
||||||
} else {
|
|
||||||
down_write(&fs_info->commit_root_sem);
|
|
||||||
spin_lock(&root->ino_cache_lock);
|
|
||||||
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
up_write(&fs_info->commit_root_sem);
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
|
|
||||||
start_caching(root);
|
|
||||||
|
|
||||||
__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
|
|
||||||
|
|
||||||
up_write(&fs_info->commit_root_sem);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When a transaction is committed, we'll move those inode numbers which are
|
|
||||||
* smaller than root->ino_cache_progress from pinned tree to free_ino tree, and
|
|
||||||
* others will just be dropped, because the commit root we were searching has
|
|
||||||
* changed.
|
|
||||||
*
|
|
||||||
* Must be called with root->fs_info->commit_root_sem held
|
|
||||||
*/
|
|
||||||
void btrfs_unpin_free_ino(struct btrfs_root *root)
|
|
||||||
{
|
|
||||||
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
|
|
||||||
struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
|
|
||||||
spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
|
|
||||||
struct btrfs_free_space *info;
|
|
||||||
struct rb_node *n;
|
|
||||||
u64 count;
|
|
||||||
|
|
||||||
if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
|
|
||||||
return;
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
spin_lock(rbroot_lock);
|
|
||||||
n = rb_first(rbroot);
|
|
||||||
if (!n) {
|
|
||||||
spin_unlock(rbroot_lock);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
info = rb_entry(n, struct btrfs_free_space, offset_index);
|
|
||||||
BUG_ON(info->bitmap); /* Logic error */
|
|
||||||
|
|
||||||
if (info->offset > root->ino_cache_progress)
|
|
||||||
count = 0;
|
|
||||||
else
|
|
||||||
count = min(root->ino_cache_progress - info->offset + 1,
|
|
||||||
info->bytes);
|
|
||||||
|
|
||||||
rb_erase(&info->offset_index, rbroot);
|
|
||||||
spin_unlock(rbroot_lock);
|
|
||||||
if (count)
|
|
||||||
__btrfs_add_free_space(root->fs_info, ctl,
|
|
||||||
info->offset, count, 0);
|
|
||||||
kmem_cache_free(btrfs_free_space_cachep, info);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define INIT_THRESHOLD ((SZ_32K / 2) / sizeof(struct btrfs_free_space))
|
|
||||||
#define INODES_PER_BITMAP (PAGE_SIZE * 8)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The goal is to keep the memory used by the free_ino tree won't
|
|
||||||
* exceed the memory if we use bitmaps only.
|
|
||||||
*/
|
|
||||||
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
|
|
||||||
{
|
|
||||||
struct btrfs_free_space *info;
|
|
||||||
struct rb_node *n;
|
|
||||||
int max_ino;
|
|
||||||
int max_bitmaps;
|
|
||||||
|
|
||||||
n = rb_last(&ctl->free_space_offset);
|
|
||||||
if (!n) {
|
|
||||||
ctl->extents_thresh = INIT_THRESHOLD;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
info = rb_entry(n, struct btrfs_free_space, offset_index);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the maximum inode number in the filesystem. Note we
|
|
||||||
* ignore the fact that this can be a bitmap, because we are
|
|
||||||
* not doing precise calculation.
|
|
||||||
*/
|
|
||||||
max_ino = info->bytes - 1;
|
|
||||||
|
|
||||||
max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
|
|
||||||
if (max_bitmaps <= ctl->total_bitmaps) {
|
|
||||||
ctl->extents_thresh = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
|
|
||||||
PAGE_SIZE / sizeof(*info);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We don't fall back to bitmap, if we are below the extents threshold
|
|
||||||
* or this chunk of inode numbers is a big one.
|
|
||||||
*/
|
|
||||||
static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
|
|
||||||
struct btrfs_free_space *info)
|
|
||||||
{
|
|
||||||
if (ctl->free_extents < ctl->extents_thresh ||
|
|
||||||
info->bytes > INODES_PER_BITMAP / 10)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct btrfs_free_space_op free_ino_op = {
|
|
||||||
.recalc_thresholds = recalculate_thresholds,
|
|
||||||
.use_bitmap = use_bitmap,
|
|
||||||
};
|
|
||||||
|
|
||||||
static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
|
|
||||||
struct btrfs_free_space *info)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* We always use extents for two reasons:
|
|
||||||
*
|
|
||||||
* - The pinned tree is only used during the process of caching
|
|
||||||
* work.
|
|
||||||
* - Make code simpler. See btrfs_unpin_free_ino().
|
|
||||||
*/
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct btrfs_free_space_op pinned_free_ino_op = {
|
|
||||||
.recalc_thresholds = pinned_recalc_thresholds,
|
|
||||||
.use_bitmap = pinned_use_bitmap,
|
|
||||||
};
|
|
||||||
|
|
||||||
void btrfs_init_free_ino_ctl(struct btrfs_root *root)
|
|
||||||
{
|
|
||||||
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
|
|
||||||
struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
|
|
||||||
|
|
||||||
spin_lock_init(&ctl->tree_lock);
|
|
||||||
ctl->unit = 1;
|
|
||||||
ctl->start = 0;
|
|
||||||
ctl->private = NULL;
|
|
||||||
ctl->op = &free_ino_op;
|
|
||||||
INIT_LIST_HEAD(&ctl->trimming_ranges);
|
|
||||||
mutex_init(&ctl->cache_writeout_mutex);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Initially we allow to use 16K of ram to cache chunks of
|
|
||||||
* inode numbers before we resort to bitmaps. This is somewhat
|
|
||||||
* arbitrary, but it will be adjusted in runtime.
|
|
||||||
*/
|
|
||||||
ctl->extents_thresh = INIT_THRESHOLD;
|
|
||||||
|
|
||||||
spin_lock_init(&pinned->tree_lock);
|
|
||||||
pinned->unit = 1;
|
|
||||||
pinned->start = 0;
|
|
||||||
pinned->private = NULL;
|
|
||||||
pinned->extents_thresh = 0;
|
|
||||||
pinned->op = &pinned_free_ino_op;
|
|
||||||
}
|
|
||||||
|
|
||||||
int btrfs_save_ino_cache(struct btrfs_root *root,
|
|
||||||
struct btrfs_trans_handle *trans)
|
|
||||||
{
|
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
||||||
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
|
|
||||||
struct btrfs_path *path;
|
|
||||||
struct inode *inode;
|
|
||||||
struct btrfs_block_rsv *rsv;
|
|
||||||
struct extent_changeset *data_reserved = NULL;
|
|
||||||
u64 num_bytes;
|
|
||||||
u64 alloc_hint = 0;
|
|
||||||
int ret;
|
|
||||||
int prealloc;
|
|
||||||
bool retry = false;
|
|
||||||
|
|
||||||
/* only fs tree and subvol/snap needs ino cache */
|
|
||||||
if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
|
|
||||||
(root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
|
|
||||||
root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* Don't save inode cache if we are deleting this root */
|
|
||||||
if (btrfs_root_refs(&root->root_item) == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
path = btrfs_alloc_path();
|
|
||||||
if (!path)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
rsv = trans->block_rsv;
|
|
||||||
trans->block_rsv = &fs_info->trans_block_rsv;
|
|
||||||
|
|
||||||
num_bytes = trans->bytes_reserved;
|
|
||||||
/*
|
|
||||||
* 1 item for inode item insertion if need
|
|
||||||
* 4 items for inode item update (in the worst case)
|
|
||||||
* 1 items for slack space if we need do truncation
|
|
||||||
* 1 item for free space object
|
|
||||||
* 3 items for pre-allocation
|
|
||||||
*/
|
|
||||||
trans->bytes_reserved = btrfs_calc_insert_metadata_size(fs_info, 10);
|
|
||||||
ret = btrfs_block_rsv_add(root, trans->block_rsv,
|
|
||||||
trans->bytes_reserved,
|
|
||||||
BTRFS_RESERVE_NO_FLUSH);
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
|
|
||||||
trans->bytes_reserved, 1);
|
|
||||||
again:
|
|
||||||
inode = lookup_free_ino_inode(root, path);
|
|
||||||
if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
|
|
||||||
ret = PTR_ERR(inode);
|
|
||||||
goto out_release;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (IS_ERR(inode)) {
|
|
||||||
BUG_ON(retry); /* Logic error */
|
|
||||||
retry = true;
|
|
||||||
|
|
||||||
ret = create_free_ino_inode(root, trans, path);
|
|
||||||
if (ret)
|
|
||||||
goto out_release;
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
|
|
||||||
BTRFS_I(inode)->generation = 0;
|
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
|
||||||
if (ret) {
|
|
||||||
btrfs_abort_transaction(trans, ret);
|
|
||||||
goto out_put;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i_size_read(inode) > 0) {
|
|
||||||
ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
|
|
||||||
if (ret) {
|
|
||||||
if (ret != -ENOSPC)
|
|
||||||
btrfs_abort_transaction(trans, ret);
|
|
||||||
goto out_put;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_lock(&root->ino_cache_lock);
|
|
||||||
if (root->ino_cache_state != BTRFS_CACHE_FINISHED) {
|
|
||||||
ret = -1;
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
goto out_put;
|
|
||||||
}
|
|
||||||
spin_unlock(&root->ino_cache_lock);
|
|
||||||
|
|
||||||
spin_lock(&ctl->tree_lock);
|
|
||||||
prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
|
|
||||||
prealloc = ALIGN(prealloc, PAGE_SIZE);
|
|
||||||
prealloc += ctl->total_bitmaps * PAGE_SIZE;
|
|
||||||
spin_unlock(&ctl->tree_lock);
|
|
||||||
|
|
||||||
/* Just to make sure we have enough space */
|
|
||||||
prealloc += 8 * PAGE_SIZE;
|
|
||||||
|
|
||||||
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, 0,
|
|
||||||
prealloc);
|
|
||||||
if (ret)
|
|
||||||
goto out_put;
|
|
||||||
|
|
||||||
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
|
|
||||||
prealloc, prealloc, &alloc_hint);
|
|
||||||
if (ret) {
|
|
||||||
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
|
|
||||||
btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc, true);
|
|
||||||
goto out_put;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
|
|
||||||
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
|
|
||||||
out_put:
|
|
||||||
iput(inode);
|
|
||||||
out_release:
|
|
||||||
trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
|
|
||||||
trans->bytes_reserved, 0);
|
|
||||||
btrfs_block_rsv_release(fs_info, trans->block_rsv,
|
|
||||||
trans->bytes_reserved, NULL);
|
|
||||||
out:
|
|
||||||
trans->block_rsv = rsv;
|
|
||||||
trans->bytes_reserved = num_bytes;
|
|
||||||
|
|
||||||
btrfs_free_path(path);
|
|
||||||
extent_changeset_free(data_reserved);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
|
|
||||||
{
|
|
||||||
struct btrfs_path *path;
|
|
||||||
int ret;
|
|
||||||
struct extent_buffer *l;
|
|
||||||
struct btrfs_key search_key;
|
|
||||||
struct btrfs_key found_key;
|
|
||||||
int slot;
|
|
||||||
|
|
||||||
path = btrfs_alloc_path();
|
|
||||||
if (!path)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
|
|
||||||
search_key.type = -1;
|
|
||||||
search_key.offset = (u64)-1;
|
|
||||||
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
|
|
||||||
if (ret < 0)
|
|
||||||
goto error;
|
|
||||||
BUG_ON(ret == 0); /* Corruption */
|
|
||||||
if (path->slots[0] > 0) {
|
|
||||||
slot = path->slots[0] - 1;
|
|
||||||
l = path->nodes[0];
|
|
||||||
btrfs_item_key_to_cpu(l, &found_key, slot);
|
|
||||||
*objectid = max_t(u64, found_key.objectid,
|
|
||||||
BTRFS_FIRST_FREE_OBJECTID - 1);
|
|
||||||
} else {
|
|
||||||
*objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
|
|
||||||
}
|
|
||||||
ret = 0;
|
|
||||||
error:
|
|
||||||
btrfs_free_path(path);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
mutex_lock(&root->objectid_mutex);
|
|
||||||
|
|
||||||
if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
|
|
||||||
btrfs_warn(root->fs_info,
|
|
||||||
"the objectid of root %llu reaches its highest value",
|
|
||||||
root->root_key.objectid);
|
|
||||||
ret = -ENOSPC;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
*objectid = ++root->highest_objectid;
|
|
||||||
ret = 0;
|
|
||||||
out:
|
|
||||||
mutex_unlock(&root->objectid_mutex);
|
|
||||||
return ret;
|
|
||||||
}
|
|
|
@ -1,16 +0,0 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
|
|
||||||
#ifndef BTRFS_INODE_MAP_H
|
|
||||||
#define BTRFS_INODE_MAP_H
|
|
||||||
|
|
||||||
void btrfs_init_free_ino_ctl(struct btrfs_root *root);
|
|
||||||
void btrfs_unpin_free_ino(struct btrfs_root *root);
|
|
||||||
void btrfs_return_ino(struct btrfs_root *root, u64 objectid);
|
|
||||||
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid);
|
|
||||||
int btrfs_save_ino_cache(struct btrfs_root *root,
|
|
||||||
struct btrfs_trans_handle *trans);
|
|
||||||
|
|
||||||
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
|
|
||||||
int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid);
|
|
||||||
|
|
||||||
#endif
|
|
815
fs/btrfs/inode.c
815
fs/btrfs/inode.c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -34,7 +34,6 @@
|
||||||
#include "print-tree.h"
|
#include "print-tree.h"
|
||||||
#include "volumes.h"
|
#include "volumes.h"
|
||||||
#include "locking.h"
|
#include "locking.h"
|
||||||
#include "inode-map.h"
|
|
||||||
#include "backref.h"
|
#include "backref.h"
|
||||||
#include "rcu-string.h"
|
#include "rcu-string.h"
|
||||||
#include "send.h"
|
#include "send.h"
|
||||||
|
@ -193,6 +192,15 @@ static int check_fsflags(unsigned int old_flags, unsigned int flags)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int check_fsflags_compatible(struct btrfs_fs_info *fs_info,
|
||||||
|
unsigned int flags)
|
||||||
|
{
|
||||||
|
if (btrfs_is_zoned(fs_info) && (flags & FS_NOCOW_FL))
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
|
static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
|
||||||
{
|
{
|
||||||
struct inode *inode = file_inode(file);
|
struct inode *inode = file_inode(file);
|
||||||
|
@ -230,6 +238,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
|
ret = check_fsflags_compatible(fs_info, fsflags);
|
||||||
|
if (ret)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
binode_flags = binode->flags;
|
binode_flags = binode->flags;
|
||||||
if (fsflags & FS_SYNC_FL)
|
if (fsflags & FS_SYNC_FL)
|
||||||
binode_flags |= BTRFS_INODE_SYNC;
|
binode_flags |= BTRFS_INODE_SYNC;
|
||||||
|
@ -336,7 +348,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
|
||||||
btrfs_sync_inode_flags_to_i_flags(inode);
|
btrfs_sync_inode_flags_to_i_flags(inode);
|
||||||
inode_inc_iversion(inode);
|
inode_inc_iversion(inode);
|
||||||
inode->i_ctime = current_time(inode);
|
inode->i_ctime = current_time(inode);
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
|
|
||||||
out_end_trans:
|
out_end_trans:
|
||||||
btrfs_end_transaction(trans);
|
btrfs_end_transaction(trans);
|
||||||
|
@ -479,7 +491,7 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
|
||||||
btrfs_sync_inode_flags_to_i_flags(inode);
|
btrfs_sync_inode_flags_to_i_flags(inode);
|
||||||
inode_inc_iversion(inode);
|
inode_inc_iversion(inode);
|
||||||
inode->i_ctime = current_time(inode);
|
inode->i_ctime = current_time(inode);
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
|
|
||||||
btrfs_end_transaction(trans);
|
btrfs_end_transaction(trans);
|
||||||
|
|
||||||
|
@ -733,7 +745,7 @@ static noinline int create_subvol(struct inode *dir,
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
|
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
|
||||||
ret = btrfs_update_inode(trans, root, dir);
|
ret = btrfs_update_inode(trans, root, BTRFS_I(dir));
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -1275,6 +1287,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
|
||||||
u64 page_end;
|
u64 page_end;
|
||||||
u64 page_cnt;
|
u64 page_cnt;
|
||||||
u64 start = (u64)start_index << PAGE_SHIFT;
|
u64 start = (u64)start_index << PAGE_SHIFT;
|
||||||
|
u64 search_start;
|
||||||
int ret;
|
int ret;
|
||||||
int i;
|
int i;
|
||||||
int i_done;
|
int i_done;
|
||||||
|
@ -1371,6 +1384,40 @@ again:
|
||||||
|
|
||||||
lock_extent_bits(&BTRFS_I(inode)->io_tree,
|
lock_extent_bits(&BTRFS_I(inode)->io_tree,
|
||||||
page_start, page_end - 1, &cached_state);
|
page_start, page_end - 1, &cached_state);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When defragmenting we skip ranges that have holes or inline extents,
|
||||||
|
* (check should_defrag_range()), to avoid unnecessary IO and wasting
|
||||||
|
* space. At btrfs_defrag_file(), we check if a range should be defragged
|
||||||
|
* before locking the inode and then, if it should, we trigger a sync
|
||||||
|
* page cache readahead - we lock the inode only after that to avoid
|
||||||
|
* blocking for too long other tasks that possibly want to operate on
|
||||||
|
* other file ranges. But before we were able to get the inode lock,
|
||||||
|
* some other task may have punched a hole in the range, or we may have
|
||||||
|
* now an inline extent, in which case we should not defrag. So check
|
||||||
|
* for that here, where we have the inode and the range locked, and bail
|
||||||
|
* out if that happened.
|
||||||
|
*/
|
||||||
|
search_start = page_start;
|
||||||
|
while (search_start < page_end) {
|
||||||
|
struct extent_map *em;
|
||||||
|
|
||||||
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start,
|
||||||
|
page_end - search_start);
|
||||||
|
if (IS_ERR(em)) {
|
||||||
|
ret = PTR_ERR(em);
|
||||||
|
goto out_unlock_range;
|
||||||
|
}
|
||||||
|
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||||
|
free_extent_map(em);
|
||||||
|
/* Ok, 0 means we did not defrag anything */
|
||||||
|
ret = 0;
|
||||||
|
goto out_unlock_range;
|
||||||
|
}
|
||||||
|
search_start = extent_map_end(em);
|
||||||
|
free_extent_map(em);
|
||||||
|
}
|
||||||
|
|
||||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
|
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
|
||||||
page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
|
page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
|
||||||
EXTENT_DEFRAG, 0, 0, &cached_state);
|
EXTENT_DEFRAG, 0, 0, &cached_state);
|
||||||
|
@ -1401,6 +1448,10 @@ again:
|
||||||
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
|
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
|
||||||
extent_changeset_free(data_reserved);
|
extent_changeset_free(data_reserved);
|
||||||
return i_done;
|
return i_done;
|
||||||
|
|
||||||
|
out_unlock_range:
|
||||||
|
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
|
||||||
|
page_start, page_end - 1, &cached_state);
|
||||||
out:
|
out:
|
||||||
for (i = 0; i < i_done; i++) {
|
for (i = 0; i < i_done; i++) {
|
||||||
unlock_page(pages[i]);
|
unlock_page(pages[i]);
|
||||||
|
@ -1678,7 +1729,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
|
||||||
btrfs_info(fs_info, "resizing devid %llu", devid);
|
btrfs_info(fs_info, "resizing devid %llu", devid);
|
||||||
}
|
}
|
||||||
|
|
||||||
device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
|
device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
|
||||||
if (!device) {
|
if (!device) {
|
||||||
btrfs_info(fs_info, "resizer unable to find device %llu",
|
btrfs_info(fs_info, "resizer unable to find device %llu",
|
||||||
devid);
|
devid);
|
||||||
|
@ -3321,7 +3372,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid,
|
dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid,
|
||||||
NULL, true);
|
NULL);
|
||||||
|
|
||||||
if (!dev) {
|
if (!dev) {
|
||||||
ret = -ENODEV;
|
ret = -ENODEV;
|
||||||
|
@ -3393,7 +3444,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out_free;
|
goto out_free;
|
||||||
}
|
}
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
trans = btrfs_start_transaction(root, 1);
|
trans = btrfs_start_transaction(root, 1);
|
||||||
if (IS_ERR(trans)) {
|
if (IS_ERR(trans)) {
|
||||||
|
|
|
@ -17,404 +17,89 @@
|
||||||
* Extent buffer locking
|
* Extent buffer locking
|
||||||
* =====================
|
* =====================
|
||||||
*
|
*
|
||||||
* The locks use a custom scheme that allows to do more operations than are
|
* We use a rw_semaphore for tree locking, and the semantics are exactly the
|
||||||
* available fromt current locking primitives. The building blocks are still
|
* same:
|
||||||
* rwlock and wait queues.
|
|
||||||
*
|
|
||||||
* Required semantics:
|
|
||||||
*
|
*
|
||||||
* - reader/writer exclusion
|
* - reader/writer exclusion
|
||||||
* - writer/writer exclusion
|
* - writer/writer exclusion
|
||||||
* - reader/reader sharing
|
* - reader/reader sharing
|
||||||
* - spinning lock semantics
|
|
||||||
* - blocking lock semantics
|
|
||||||
* - try-lock semantics for readers and writers
|
* - try-lock semantics for readers and writers
|
||||||
* - one level nesting, allowing read lock to be taken by the same thread that
|
|
||||||
* already has write lock
|
|
||||||
*
|
|
||||||
* The extent buffer locks (also called tree locks) manage access to eb data
|
|
||||||
* related to the storage in the b-tree (keys, items, but not the individual
|
|
||||||
* members of eb).
|
|
||||||
* We want concurrency of many readers and safe updates. The underlying locking
|
|
||||||
* is done by read-write spinlock and the blocking part is implemented using
|
|
||||||
* counters and wait queues.
|
|
||||||
*
|
|
||||||
* spinning semantics - the low-level rwlock is held so all other threads that
|
|
||||||
* want to take it are spinning on it.
|
|
||||||
*
|
|
||||||
* blocking semantics - the low-level rwlock is not held but the counter
|
|
||||||
* denotes how many times the blocking lock was held;
|
|
||||||
* sleeping is possible
|
|
||||||
*
|
|
||||||
* Write lock always allows only one thread to access the data.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Debugging
|
|
||||||
* ---------
|
|
||||||
*
|
|
||||||
* There are additional state counters that are asserted in various contexts,
|
|
||||||
* removed from non-debug build to reduce extent_buffer size and for
|
|
||||||
* performance reasons.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Lock recursion
|
|
||||||
* --------------
|
|
||||||
*
|
|
||||||
* A write operation on a tree might indirectly start a look up on the same
|
|
||||||
* tree. This can happen when btrfs_cow_block locks the tree and needs to
|
|
||||||
* lookup free extents.
|
|
||||||
*
|
|
||||||
* btrfs_cow_block
|
|
||||||
* ..
|
|
||||||
* alloc_tree_block_no_bg_flush
|
|
||||||
* btrfs_alloc_tree_block
|
|
||||||
* btrfs_reserve_extent
|
|
||||||
* ..
|
|
||||||
* load_free_space_cache
|
|
||||||
* ..
|
|
||||||
* btrfs_lookup_file_extent
|
|
||||||
* btrfs_search_slot
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Locking pattern - spinning
|
|
||||||
* --------------------------
|
|
||||||
*
|
|
||||||
* The simple locking scenario, the +--+ denotes the spinning section.
|
|
||||||
*
|
|
||||||
* +- btrfs_tree_lock
|
|
||||||
* | - extent_buffer::rwlock is held
|
|
||||||
* | - no heavy operations should happen, eg. IO, memory allocations, large
|
|
||||||
* | structure traversals
|
|
||||||
* +- btrfs_tree_unock
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Locking pattern - blocking
|
|
||||||
* --------------------------
|
|
||||||
*
|
|
||||||
* The blocking write uses the following scheme. The +--+ denotes the spinning
|
|
||||||
* section.
|
|
||||||
*
|
|
||||||
* +- btrfs_tree_lock
|
|
||||||
* |
|
|
||||||
* +- btrfs_set_lock_blocking_write
|
|
||||||
*
|
|
||||||
* - allowed: IO, memory allocations, etc.
|
|
||||||
*
|
|
||||||
* -- btrfs_tree_unlock - note, no explicit unblocking necessary
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Blocking read is similar.
|
|
||||||
*
|
|
||||||
* +- btrfs_tree_read_lock
|
|
||||||
* |
|
|
||||||
* +- btrfs_set_lock_blocking_read
|
|
||||||
*
|
|
||||||
* - heavy operations allowed
|
|
||||||
*
|
|
||||||
* +- btrfs_tree_read_unlock_blocking
|
|
||||||
* |
|
|
||||||
* +- btrfs_tree_read_unlock
|
|
||||||
*
|
*
|
||||||
|
* The rwsem implementation does opportunistic spinning which reduces number of
|
||||||
|
* times the locking task needs to sleep.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
|
||||||
static inline void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
WARN_ON(eb->spinning_writers);
|
|
||||||
eb->spinning_writers++;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
WARN_ON(eb->spinning_writers != 1);
|
|
||||||
eb->spinning_writers--;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
WARN_ON(eb->spinning_writers);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
atomic_inc(&eb->spinning_readers);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void btrfs_assert_spinning_readers_put(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
|
|
||||||
atomic_dec(&eb->spinning_readers);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
atomic_inc(&eb->read_locks);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
atomic_dec(&eb->read_locks);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
BUG_ON(!atomic_read(&eb->read_locks));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
eb->write_locks++;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
eb->write_locks--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { }
|
|
||||||
static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { }
|
|
||||||
static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { }
|
|
||||||
static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { }
|
|
||||||
static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { }
|
|
||||||
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { }
|
|
||||||
static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { }
|
|
||||||
static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { }
|
|
||||||
static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { }
|
|
||||||
static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mark already held read lock as blocking. Can be nested in write lock by the
|
* __btrfs_tree_read_lock - lock extent buffer for read
|
||||||
* same thread.
|
* @eb: the eb to be locked
|
||||||
|
* @nest: the nesting level to be used for lockdep
|
||||||
*
|
*
|
||||||
* Use when there are potentially long operations ahead so other thread waiting
|
* This takes the read lock on the extent buffer, using the specified nesting
|
||||||
* on the lock will not actively spin but sleep instead.
|
* level for lockdep purposes.
|
||||||
*
|
|
||||||
* The rwlock is released and blocking reader counter is increased.
|
|
||||||
*/
|
*/
|
||||||
void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
|
void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
|
||||||
{
|
|
||||||
trace_btrfs_set_lock_blocking_read(eb);
|
|
||||||
/*
|
|
||||||
* No lock is required. The lock owner may change if we have a read
|
|
||||||
* lock, but it won't change to or away from us. If we have the write
|
|
||||||
* lock, we are the owner and it'll never change.
|
|
||||||
*/
|
|
||||||
if (eb->lock_recursed && current->pid == eb->lock_owner)
|
|
||||||
return;
|
|
||||||
btrfs_assert_tree_read_locked(eb);
|
|
||||||
atomic_inc(&eb->blocking_readers);
|
|
||||||
btrfs_assert_spinning_readers_put(eb);
|
|
||||||
read_unlock(&eb->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Mark already held write lock as blocking.
|
|
||||||
*
|
|
||||||
* Use when there are potentially long operations ahead so other threads
|
|
||||||
* waiting on the lock will not actively spin but sleep instead.
|
|
||||||
*
|
|
||||||
* The rwlock is released and blocking writers is set.
|
|
||||||
*/
|
|
||||||
void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
trace_btrfs_set_lock_blocking_write(eb);
|
|
||||||
/*
|
|
||||||
* No lock is required. The lock owner may change if we have a read
|
|
||||||
* lock, but it won't change to or away from us. If we have the write
|
|
||||||
* lock, we are the owner and it'll never change.
|
|
||||||
*/
|
|
||||||
if (eb->lock_recursed && current->pid == eb->lock_owner)
|
|
||||||
return;
|
|
||||||
if (eb->blocking_writers == 0) {
|
|
||||||
btrfs_assert_spinning_writers_put(eb);
|
|
||||||
btrfs_assert_tree_locked(eb);
|
|
||||||
WRITE_ONCE(eb->blocking_writers, 1);
|
|
||||||
write_unlock(&eb->lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Lock the extent buffer for read. Wait for any writers (spinning or blocking).
|
|
||||||
* Can be nested in write lock by the same thread.
|
|
||||||
*
|
|
||||||
* Use when the locked section does only lightweight actions and busy waiting
|
|
||||||
* would be cheaper than making other threads do the wait/wake loop.
|
|
||||||
*
|
|
||||||
* The rwlock is held upon exit.
|
|
||||||
*/
|
|
||||||
void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest,
|
|
||||||
bool recurse)
|
|
||||||
{
|
{
|
||||||
u64 start_ns = 0;
|
u64 start_ns = 0;
|
||||||
|
|
||||||
if (trace_btrfs_tree_read_lock_enabled())
|
if (trace_btrfs_tree_read_lock_enabled())
|
||||||
start_ns = ktime_get_ns();
|
start_ns = ktime_get_ns();
|
||||||
again:
|
|
||||||
read_lock(&eb->lock);
|
down_read_nested(&eb->lock, nest);
|
||||||
BUG_ON(eb->blocking_writers == 0 &&
|
eb->lock_owner = current->pid;
|
||||||
current->pid == eb->lock_owner);
|
|
||||||
if (eb->blocking_writers) {
|
|
||||||
if (current->pid == eb->lock_owner) {
|
|
||||||
/*
|
|
||||||
* This extent is already write-locked by our thread.
|
|
||||||
* We allow an additional read lock to be added because
|
|
||||||
* it's for the same thread. btrfs_find_all_roots()
|
|
||||||
* depends on this as it may be called on a partly
|
|
||||||
* (write-)locked tree.
|
|
||||||
*/
|
|
||||||
WARN_ON(!recurse);
|
|
||||||
BUG_ON(eb->lock_recursed);
|
|
||||||
eb->lock_recursed = true;
|
|
||||||
read_unlock(&eb->lock);
|
|
||||||
trace_btrfs_tree_read_lock(eb, start_ns);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
read_unlock(&eb->lock);
|
|
||||||
wait_event(eb->write_lock_wq,
|
|
||||||
READ_ONCE(eb->blocking_writers) == 0);
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
btrfs_assert_tree_read_locks_get(eb);
|
|
||||||
btrfs_assert_spinning_readers_get(eb);
|
|
||||||
trace_btrfs_tree_read_lock(eb, start_ns);
|
trace_btrfs_tree_read_lock(eb, start_ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
void btrfs_tree_read_lock(struct extent_buffer *eb)
|
void btrfs_tree_read_lock(struct extent_buffer *eb)
|
||||||
{
|
{
|
||||||
__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, false);
|
__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lock extent buffer for read, optimistically expecting that there are no
|
* Try-lock for read.
|
||||||
* contending blocking writers. If there are, don't wait.
|
|
||||||
*
|
|
||||||
* Return 1 if the rwlock has been taken, 0 otherwise
|
|
||||||
*/
|
|
||||||
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
if (READ_ONCE(eb->blocking_writers))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
read_lock(&eb->lock);
|
|
||||||
/* Refetch value after lock */
|
|
||||||
if (READ_ONCE(eb->blocking_writers)) {
|
|
||||||
read_unlock(&eb->lock);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
btrfs_assert_tree_read_locks_get(eb);
|
|
||||||
btrfs_assert_spinning_readers_get(eb);
|
|
||||||
trace_btrfs_tree_read_lock_atomic(eb);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Try-lock for read. Don't block or wait for contending writers.
|
|
||||||
*
|
*
|
||||||
* Retrun 1 if the rwlock has been taken, 0 otherwise
|
* Retrun 1 if the rwlock has been taken, 0 otherwise
|
||||||
*/
|
*/
|
||||||
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
|
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
|
||||||
{
|
{
|
||||||
if (READ_ONCE(eb->blocking_writers))
|
if (down_read_trylock(&eb->lock)) {
|
||||||
return 0;
|
eb->lock_owner = current->pid;
|
||||||
|
trace_btrfs_try_tree_read_lock(eb);
|
||||||
if (!read_trylock(&eb->lock))
|
return 1;
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* Refetch value after lock */
|
|
||||||
if (READ_ONCE(eb->blocking_writers)) {
|
|
||||||
read_unlock(&eb->lock);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
btrfs_assert_tree_read_locks_get(eb);
|
return 0;
|
||||||
btrfs_assert_spinning_readers_get(eb);
|
|
||||||
trace_btrfs_try_tree_read_lock(eb);
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try-lock for write. May block until the lock is uncontended, but does not
|
* Try-lock for write.
|
||||||
* wait until it is free.
|
|
||||||
*
|
*
|
||||||
* Retrun 1 if the rwlock has been taken, 0 otherwise
|
* Retrun 1 if the rwlock has been taken, 0 otherwise
|
||||||
*/
|
*/
|
||||||
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
|
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
|
||||||
{
|
{
|
||||||
if (READ_ONCE(eb->blocking_writers) || atomic_read(&eb->blocking_readers))
|
if (down_write_trylock(&eb->lock)) {
|
||||||
return 0;
|
eb->lock_owner = current->pid;
|
||||||
|
trace_btrfs_try_tree_write_lock(eb);
|
||||||
write_lock(&eb->lock);
|
return 1;
|
||||||
/* Refetch value after lock */
|
|
||||||
if (READ_ONCE(eb->blocking_writers) || atomic_read(&eb->blocking_readers)) {
|
|
||||||
write_unlock(&eb->lock);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
btrfs_assert_tree_write_locks_get(eb);
|
return 0;
|
||||||
btrfs_assert_spinning_writers_get(eb);
|
|
||||||
eb->lock_owner = current->pid;
|
|
||||||
trace_btrfs_try_tree_write_lock(eb);
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Release read lock. Must be used only if the lock is in spinning mode. If
|
* Release read lock.
|
||||||
* the read lock is nested, must pair with read lock before the write unlock.
|
|
||||||
*
|
|
||||||
* The rwlock is not held upon exit.
|
|
||||||
*/
|
*/
|
||||||
void btrfs_tree_read_unlock(struct extent_buffer *eb)
|
void btrfs_tree_read_unlock(struct extent_buffer *eb)
|
||||||
{
|
{
|
||||||
trace_btrfs_tree_read_unlock(eb);
|
trace_btrfs_tree_read_unlock(eb);
|
||||||
/*
|
eb->lock_owner = 0;
|
||||||
* if we're nested, we have the write lock. No new locking
|
up_read(&eb->lock);
|
||||||
* is needed as long as we are the lock owner.
|
|
||||||
* The write unlock will do a barrier for us, and the lock_recursed
|
|
||||||
* field only matters to the lock owner.
|
|
||||||
*/
|
|
||||||
if (eb->lock_recursed && current->pid == eb->lock_owner) {
|
|
||||||
eb->lock_recursed = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
btrfs_assert_tree_read_locked(eb);
|
|
||||||
btrfs_assert_spinning_readers_put(eb);
|
|
||||||
btrfs_assert_tree_read_locks_put(eb);
|
|
||||||
read_unlock(&eb->lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Release read lock, previously set to blocking by a pairing call to
|
* __btrfs_tree_lock - lock eb for write
|
||||||
* btrfs_set_lock_blocking_read(). Can be nested in write lock by the same
|
* @eb: the eb to lock
|
||||||
* thread.
|
* @nest: the nesting to use for the lock
|
||||||
*
|
*
|
||||||
* State of rwlock is unchanged, last reader wakes waiting threads.
|
* Returns with the eb->lock write locked.
|
||||||
*/
|
|
||||||
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
|
|
||||||
{
|
|
||||||
trace_btrfs_tree_read_unlock_blocking(eb);
|
|
||||||
/*
|
|
||||||
* if we're nested, we have the write lock. No new locking
|
|
||||||
* is needed as long as we are the lock owner.
|
|
||||||
* The write unlock will do a barrier for us, and the lock_recursed
|
|
||||||
* field only matters to the lock owner.
|
|
||||||
*/
|
|
||||||
if (eb->lock_recursed && current->pid == eb->lock_owner) {
|
|
||||||
eb->lock_recursed = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
btrfs_assert_tree_read_locked(eb);
|
|
||||||
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
|
|
||||||
/* atomic_dec_and_test implies a barrier */
|
|
||||||
if (atomic_dec_and_test(&eb->blocking_readers))
|
|
||||||
cond_wake_up_nomb(&eb->read_lock_wq);
|
|
||||||
btrfs_assert_tree_read_locks_put(eb);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Lock for write. Wait for all blocking and spinning readers and writers. This
|
|
||||||
* starts context where reader lock could be nested by the same thread.
|
|
||||||
*
|
|
||||||
* The rwlock is held for write upon exit.
|
|
||||||
*/
|
*/
|
||||||
void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
|
void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
|
||||||
__acquires(&eb->lock)
|
__acquires(&eb->lock)
|
||||||
|
@ -424,19 +109,7 @@ void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
|
||||||
if (trace_btrfs_tree_lock_enabled())
|
if (trace_btrfs_tree_lock_enabled())
|
||||||
start_ns = ktime_get_ns();
|
start_ns = ktime_get_ns();
|
||||||
|
|
||||||
WARN_ON(eb->lock_owner == current->pid);
|
down_write_nested(&eb->lock, nest);
|
||||||
again:
|
|
||||||
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
|
|
||||||
wait_event(eb->write_lock_wq, READ_ONCE(eb->blocking_writers) == 0);
|
|
||||||
write_lock(&eb->lock);
|
|
||||||
/* Refetch value after lock */
|
|
||||||
if (atomic_read(&eb->blocking_readers) ||
|
|
||||||
READ_ONCE(eb->blocking_writers)) {
|
|
||||||
write_unlock(&eb->lock);
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
btrfs_assert_spinning_writers_get(eb);
|
|
||||||
btrfs_assert_tree_write_locks_get(eb);
|
|
||||||
eb->lock_owner = current->pid;
|
eb->lock_owner = current->pid;
|
||||||
trace_btrfs_tree_lock(eb, start_ns);
|
trace_btrfs_tree_lock(eb, start_ns);
|
||||||
}
|
}
|
||||||
|
@ -447,68 +120,13 @@ void btrfs_tree_lock(struct extent_buffer *eb)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Release the write lock, either blocking or spinning (ie. there's no need
|
* Release the write lock.
|
||||||
* for an explicit blocking unlock, like btrfs_tree_read_unlock_blocking).
|
|
||||||
* This also ends the context for nesting, the read lock must have been
|
|
||||||
* released already.
|
|
||||||
*
|
|
||||||
* Tasks blocked and waiting are woken, rwlock is not held upon exit.
|
|
||||||
*/
|
*/
|
||||||
void btrfs_tree_unlock(struct extent_buffer *eb)
|
void btrfs_tree_unlock(struct extent_buffer *eb)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* This is read both locked and unlocked but always by the same thread
|
|
||||||
* that already owns the lock so we don't need to use READ_ONCE
|
|
||||||
*/
|
|
||||||
int blockers = eb->blocking_writers;
|
|
||||||
|
|
||||||
BUG_ON(blockers > 1);
|
|
||||||
|
|
||||||
btrfs_assert_tree_locked(eb);
|
|
||||||
trace_btrfs_tree_unlock(eb);
|
trace_btrfs_tree_unlock(eb);
|
||||||
eb->lock_owner = 0;
|
eb->lock_owner = 0;
|
||||||
btrfs_assert_tree_write_locks_put(eb);
|
up_write(&eb->lock);
|
||||||
|
|
||||||
if (blockers) {
|
|
||||||
btrfs_assert_no_spinning_writers(eb);
|
|
||||||
/* Unlocked write */
|
|
||||||
WRITE_ONCE(eb->blocking_writers, 0);
|
|
||||||
/*
|
|
||||||
* We need to order modifying blocking_writers above with
|
|
||||||
* actually waking up the sleepers to ensure they see the
|
|
||||||
* updated value of blocking_writers
|
|
||||||
*/
|
|
||||||
cond_wake_up(&eb->write_lock_wq);
|
|
||||||
} else {
|
|
||||||
btrfs_assert_spinning_writers_put(eb);
|
|
||||||
write_unlock(&eb->lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set all locked nodes in the path to blocking locks. This should be done
|
|
||||||
* before scheduling
|
|
||||||
*/
|
|
||||||
void btrfs_set_path_blocking(struct btrfs_path *p)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
|
|
||||||
if (!p->nodes[i] || !p->locks[i])
|
|
||||||
continue;
|
|
||||||
/*
|
|
||||||
* If we currently have a spinning reader or writer lock this
|
|
||||||
* will bump the count of blocking holders and drop the
|
|
||||||
* spinlock.
|
|
||||||
*/
|
|
||||||
if (p->locks[i] == BTRFS_READ_LOCK) {
|
|
||||||
btrfs_set_lock_blocking_read(p->nodes[i]);
|
|
||||||
p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
|
|
||||||
} else if (p->locks[i] == BTRFS_WRITE_LOCK) {
|
|
||||||
btrfs_set_lock_blocking_write(p->nodes[i]);
|
|
||||||
p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -564,14 +182,13 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
|
||||||
*
|
*
|
||||||
* Return: root extent buffer with read lock held
|
* Return: root extent buffer with read lock held
|
||||||
*/
|
*/
|
||||||
struct extent_buffer *__btrfs_read_lock_root_node(struct btrfs_root *root,
|
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
|
||||||
bool recurse)
|
|
||||||
{
|
{
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
eb = btrfs_root_node(root);
|
eb = btrfs_root_node(root);
|
||||||
__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, recurse);
|
btrfs_tree_read_lock(eb);
|
||||||
if (eb == root->node)
|
if (eb == root->node)
|
||||||
break;
|
break;
|
||||||
btrfs_tree_read_unlock(eb);
|
btrfs_tree_read_unlock(eb);
|
||||||
|
|
|
@ -13,8 +13,6 @@
|
||||||
|
|
||||||
#define BTRFS_WRITE_LOCK 1
|
#define BTRFS_WRITE_LOCK 1
|
||||||
#define BTRFS_READ_LOCK 2
|
#define BTRFS_READ_LOCK 2
|
||||||
#define BTRFS_WRITE_LOCK_BLOCKING 3
|
|
||||||
#define BTRFS_READ_LOCK_BLOCKING 4
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are limited in number of subclasses by MAX_LOCKDEP_SUBCLASSES, which at
|
* We are limited in number of subclasses by MAX_LOCKDEP_SUBCLASSES, which at
|
||||||
|
@ -89,42 +87,28 @@ void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest);
|
||||||
void btrfs_tree_lock(struct extent_buffer *eb);
|
void btrfs_tree_lock(struct extent_buffer *eb);
|
||||||
void btrfs_tree_unlock(struct extent_buffer *eb);
|
void btrfs_tree_unlock(struct extent_buffer *eb);
|
||||||
|
|
||||||
void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest,
|
void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest);
|
||||||
bool recurse);
|
|
||||||
void btrfs_tree_read_lock(struct extent_buffer *eb);
|
void btrfs_tree_read_lock(struct extent_buffer *eb);
|
||||||
void btrfs_tree_read_unlock(struct extent_buffer *eb);
|
void btrfs_tree_read_unlock(struct extent_buffer *eb);
|
||||||
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
|
|
||||||
void btrfs_set_lock_blocking_read(struct extent_buffer *eb);
|
|
||||||
void btrfs_set_lock_blocking_write(struct extent_buffer *eb);
|
|
||||||
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
|
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
|
||||||
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
|
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
|
||||||
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb);
|
|
||||||
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
|
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
|
||||||
struct extent_buffer *__btrfs_read_lock_root_node(struct btrfs_root *root,
|
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
|
||||||
bool recurse);
|
|
||||||
|
|
||||||
static inline struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
|
|
||||||
{
|
|
||||||
return __btrfs_read_lock_root_node(root, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) {
|
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) {
|
||||||
BUG_ON(!eb->write_locks);
|
lockdep_assert_held(&eb->lock);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
|
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void btrfs_set_path_blocking(struct btrfs_path *p);
|
|
||||||
void btrfs_unlock_up_safe(struct btrfs_path *path, int level);
|
void btrfs_unlock_up_safe(struct btrfs_path *path, int level);
|
||||||
|
|
||||||
static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
|
static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
|
||||||
{
|
{
|
||||||
if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
|
if (rw == BTRFS_WRITE_LOCK)
|
||||||
btrfs_tree_unlock(eb);
|
btrfs_tree_unlock(eb);
|
||||||
else if (rw == BTRFS_READ_LOCK_BLOCKING)
|
|
||||||
btrfs_tree_read_unlock_blocking(eb);
|
|
||||||
else if (rw == BTRFS_READ_LOCK)
|
else if (rw == BTRFS_READ_LOCK)
|
||||||
btrfs_tree_read_unlock(eb);
|
btrfs_tree_read_unlock(eb);
|
||||||
else
|
else
|
||||||
|
|
|
@ -854,51 +854,6 @@ out:
|
||||||
return entry;
|
return entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* search the ordered extents for one corresponding to 'offset' and
|
|
||||||
* try to find a checksum. This is used because we allow pages to
|
|
||||||
* be reclaimed before their checksum is actually put into the btree
|
|
||||||
*/
|
|
||||||
int btrfs_find_ordered_sum(struct btrfs_inode *inode, u64 offset,
|
|
||||||
u64 disk_bytenr, u8 *sum, int len)
|
|
||||||
{
|
|
||||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
|
||||||
struct btrfs_ordered_sum *ordered_sum;
|
|
||||||
struct btrfs_ordered_extent *ordered;
|
|
||||||
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
|
|
||||||
unsigned long num_sectors;
|
|
||||||
unsigned long i;
|
|
||||||
u32 sectorsize = btrfs_inode_sectorsize(inode);
|
|
||||||
const u8 blocksize_bits = inode->vfs_inode.i_sb->s_blocksize_bits;
|
|
||||||
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
|
||||||
int index = 0;
|
|
||||||
|
|
||||||
ordered = btrfs_lookup_ordered_extent(inode, offset);
|
|
||||||
if (!ordered)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
spin_lock_irq(&tree->lock);
|
|
||||||
list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
|
|
||||||
if (disk_bytenr >= ordered_sum->bytenr &&
|
|
||||||
disk_bytenr < ordered_sum->bytenr + ordered_sum->len) {
|
|
||||||
i = (disk_bytenr - ordered_sum->bytenr) >> blocksize_bits;
|
|
||||||
num_sectors = ordered_sum->len >> blocksize_bits;
|
|
||||||
num_sectors = min_t(int, len - index, num_sectors - i);
|
|
||||||
memcpy(sum + index, ordered_sum->sums + i * csum_size,
|
|
||||||
num_sectors * csum_size);
|
|
||||||
|
|
||||||
index += (int)num_sectors * csum_size;
|
|
||||||
if (index == len)
|
|
||||||
goto out;
|
|
||||||
disk_bytenr += num_sectors * sectorsize;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out:
|
|
||||||
spin_unlock_irq(&tree->lock);
|
|
||||||
btrfs_put_ordered_extent(ordered);
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* btrfs_flush_ordered_range - Lock the passed range and ensures all pending
|
* btrfs_flush_ordered_range - Lock the passed range and ensures all pending
|
||||||
* ordered extents in it are run to completion.
|
* ordered extents in it are run to completion.
|
||||||
|
|
|
@ -137,9 +137,8 @@ static inline int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info,
|
||||||
unsigned long bytes)
|
unsigned long bytes)
|
||||||
{
|
{
|
||||||
int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
|
int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
|
||||||
int csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
|
||||||
|
|
||||||
return sizeof(struct btrfs_ordered_sum) + num_sectors * csum_size;
|
return sizeof(struct btrfs_ordered_sum) + num_sectors * fs_info->csum_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
@ -184,8 +183,6 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
|
||||||
u64 len);
|
u64 len);
|
||||||
void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
|
void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
|
||||||
struct list_head *list);
|
struct list_head *list);
|
||||||
int btrfs_find_ordered_sum(struct btrfs_inode *inode, u64 offset,
|
|
||||||
u64 disk_bytenr, u8 *sum, int len);
|
|
||||||
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
||||||
const u64 range_start, const u64 range_len);
|
const u64 range_start, const u64 range_len);
|
||||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||||
|
|
|
@ -177,8 +177,7 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
|
||||||
__le64 subvol_id;
|
__le64 subvol_id;
|
||||||
|
|
||||||
read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id));
|
read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id));
|
||||||
pr_info("\t\tsubvol_id %llu\n",
|
pr_info("\t\tsubvol_id %llu\n", le64_to_cpu(subvol_id));
|
||||||
(unsigned long long)le64_to_cpu(subvol_id));
|
|
||||||
item_size -= sizeof(u64);
|
item_size -= sizeof(u64);
|
||||||
offset += sizeof(u64);
|
offset += sizeof(u64);
|
||||||
}
|
}
|
||||||
|
@ -191,15 +190,8 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
|
||||||
static void print_eb_refs_lock(struct extent_buffer *eb)
|
static void print_eb_refs_lock(struct extent_buffer *eb)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
btrfs_info(eb->fs_info,
|
btrfs_info(eb->fs_info, "refs %u lock_owner %u current %u",
|
||||||
"refs %u lock (w:%d r:%d bw:%d br:%d sw:%d sr:%d) lock_owner %u current %u",
|
atomic_read(&eb->refs), eb->lock_owner, current->pid);
|
||||||
atomic_read(&eb->refs), eb->write_locks,
|
|
||||||
atomic_read(&eb->read_locks),
|
|
||||||
eb->blocking_writers,
|
|
||||||
atomic_read(&eb->blocking_readers),
|
|
||||||
eb->spinning_writers,
|
|
||||||
atomic_read(&eb->spinning_readers),
|
|
||||||
eb->lock_owner, current->pid);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -398,6 +390,7 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
|
||||||
|
|
||||||
btrfs_node_key_to_cpu(c, &first_key, i);
|
btrfs_node_key_to_cpu(c, &first_key, i);
|
||||||
next = read_tree_block(fs_info, btrfs_node_blockptr(c, i),
|
next = read_tree_block(fs_info, btrfs_node_blockptr(c, i),
|
||||||
|
btrfs_header_owner(c),
|
||||||
btrfs_node_ptr_generation(c, i),
|
btrfs_node_ptr_generation(c, i),
|
||||||
level - 1, &first_key);
|
level - 1, &first_key);
|
||||||
if (IS_ERR(next)) {
|
if (IS_ERR(next)) {
|
||||||
|
|
|
@ -894,8 +894,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
key.objectid = 0;
|
key.objectid = 0;
|
||||||
key.offset = 0;
|
key.offset = 0;
|
||||||
key.type = 0;
|
key.type = 0;
|
||||||
|
@ -1944,34 +1942,22 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
|
||||||
struct btrfs_key dst_key;
|
struct btrfs_key dst_key;
|
||||||
|
|
||||||
if (src_path->nodes[cur_level] == NULL) {
|
if (src_path->nodes[cur_level] == NULL) {
|
||||||
struct btrfs_key first_key;
|
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
int parent_slot;
|
int parent_slot;
|
||||||
u64 child_gen;
|
|
||||||
u64 child_bytenr;
|
|
||||||
|
|
||||||
eb = src_path->nodes[cur_level + 1];
|
eb = src_path->nodes[cur_level + 1];
|
||||||
parent_slot = src_path->slots[cur_level + 1];
|
parent_slot = src_path->slots[cur_level + 1];
|
||||||
child_bytenr = btrfs_node_blockptr(eb, parent_slot);
|
|
||||||
child_gen = btrfs_node_ptr_generation(eb, parent_slot);
|
|
||||||
btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
|
|
||||||
|
|
||||||
eb = read_tree_block(fs_info, child_bytenr, child_gen,
|
eb = btrfs_read_node_slot(eb, parent_slot);
|
||||||
cur_level, &first_key);
|
|
||||||
if (IS_ERR(eb)) {
|
if (IS_ERR(eb)) {
|
||||||
ret = PTR_ERR(eb);
|
ret = PTR_ERR(eb);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!extent_buffer_uptodate(eb)) {
|
|
||||||
free_extent_buffer(eb);
|
|
||||||
ret = -EIO;
|
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
src_path->nodes[cur_level] = eb;
|
src_path->nodes[cur_level] = eb;
|
||||||
|
|
||||||
btrfs_tree_read_lock(eb);
|
btrfs_tree_read_lock(eb);
|
||||||
btrfs_set_lock_blocking_read(eb);
|
src_path->locks[cur_level] = BTRFS_READ_LOCK;
|
||||||
src_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
src_path->slots[cur_level] = dst_path->slots[cur_level];
|
src_path->slots[cur_level] = dst_path->slots[cur_level];
|
||||||
|
@ -2066,10 +2052,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
|
||||||
|
|
||||||
/* Read the tree block if needed */
|
/* Read the tree block if needed */
|
||||||
if (dst_path->nodes[cur_level] == NULL) {
|
if (dst_path->nodes[cur_level] == NULL) {
|
||||||
struct btrfs_key first_key;
|
|
||||||
int parent_slot;
|
int parent_slot;
|
||||||
u64 child_gen;
|
u64 child_gen;
|
||||||
u64 child_bytenr;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* dst_path->nodes[root_level] must be initialized before
|
* dst_path->nodes[root_level] must be initialized before
|
||||||
|
@ -2088,31 +2072,23 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
|
||||||
*/
|
*/
|
||||||
eb = dst_path->nodes[cur_level + 1];
|
eb = dst_path->nodes[cur_level + 1];
|
||||||
parent_slot = dst_path->slots[cur_level + 1];
|
parent_slot = dst_path->slots[cur_level + 1];
|
||||||
child_bytenr = btrfs_node_blockptr(eb, parent_slot);
|
|
||||||
child_gen = btrfs_node_ptr_generation(eb, parent_slot);
|
child_gen = btrfs_node_ptr_generation(eb, parent_slot);
|
||||||
btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
|
|
||||||
|
|
||||||
/* This node is old, no need to trace */
|
/* This node is old, no need to trace */
|
||||||
if (child_gen < last_snapshot)
|
if (child_gen < last_snapshot)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
eb = read_tree_block(fs_info, child_bytenr, child_gen,
|
eb = btrfs_read_node_slot(eb, parent_slot);
|
||||||
cur_level, &first_key);
|
|
||||||
if (IS_ERR(eb)) {
|
if (IS_ERR(eb)) {
|
||||||
ret = PTR_ERR(eb);
|
ret = PTR_ERR(eb);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!extent_buffer_uptodate(eb)) {
|
|
||||||
free_extent_buffer(eb);
|
|
||||||
ret = -EIO;
|
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
dst_path->nodes[cur_level] = eb;
|
dst_path->nodes[cur_level] = eb;
|
||||||
dst_path->slots[cur_level] = 0;
|
dst_path->slots[cur_level] = 0;
|
||||||
|
|
||||||
btrfs_tree_read_lock(eb);
|
btrfs_tree_read_lock(eb);
|
||||||
btrfs_set_lock_blocking_read(eb);
|
dst_path->locks[cur_level] = BTRFS_READ_LOCK;
|
||||||
dst_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING;
|
|
||||||
need_cleanup = true;
|
need_cleanup = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2256,38 +2232,28 @@ walk_down:
|
||||||
level = root_level;
|
level = root_level;
|
||||||
while (level >= 0) {
|
while (level >= 0) {
|
||||||
if (path->nodes[level] == NULL) {
|
if (path->nodes[level] == NULL) {
|
||||||
struct btrfs_key first_key;
|
|
||||||
int parent_slot;
|
int parent_slot;
|
||||||
u64 child_gen;
|
|
||||||
u64 child_bytenr;
|
u64 child_bytenr;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to get child blockptr/gen from parent before
|
* We need to get child blockptr from parent before we
|
||||||
* we can read it.
|
* can read it.
|
||||||
*/
|
*/
|
||||||
eb = path->nodes[level + 1];
|
eb = path->nodes[level + 1];
|
||||||
parent_slot = path->slots[level + 1];
|
parent_slot = path->slots[level + 1];
|
||||||
child_bytenr = btrfs_node_blockptr(eb, parent_slot);
|
child_bytenr = btrfs_node_blockptr(eb, parent_slot);
|
||||||
child_gen = btrfs_node_ptr_generation(eb, parent_slot);
|
|
||||||
btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
|
|
||||||
|
|
||||||
eb = read_tree_block(fs_info, child_bytenr, child_gen,
|
eb = btrfs_read_node_slot(eb, parent_slot);
|
||||||
level, &first_key);
|
|
||||||
if (IS_ERR(eb)) {
|
if (IS_ERR(eb)) {
|
||||||
ret = PTR_ERR(eb);
|
ret = PTR_ERR(eb);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!extent_buffer_uptodate(eb)) {
|
|
||||||
free_extent_buffer(eb);
|
|
||||||
ret = -EIO;
|
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
path->nodes[level] = eb;
|
path->nodes[level] = eb;
|
||||||
path->slots[level] = 0;
|
path->slots[level] = 0;
|
||||||
|
|
||||||
btrfs_tree_read_lock(eb);
|
btrfs_tree_read_lock(eb);
|
||||||
btrfs_set_lock_blocking_read(eb);
|
path->locks[level] = BTRFS_READ_LOCK;
|
||||||
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
|
|
||||||
|
|
||||||
ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
|
ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
|
||||||
fs_info->nodesize,
|
fs_info->nodesize,
|
||||||
|
@ -4242,7 +4208,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
|
||||||
spin_unlock(&blocks->lock);
|
spin_unlock(&blocks->lock);
|
||||||
|
|
||||||
/* Read out reloc subtree root */
|
/* Read out reloc subtree root */
|
||||||
reloc_eb = read_tree_block(fs_info, block->reloc_bytenr,
|
reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, 0,
|
||||||
block->reloc_generation, block->level,
|
block->reloc_generation, block->level,
|
||||||
&block->first_key);
|
&block->first_key);
|
||||||
if (IS_ERR(reloc_eb)) {
|
if (IS_ERR(reloc_eb)) {
|
||||||
|
|
|
@ -1097,7 +1097,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
|
||||||
|
|
||||||
/* see if we can add this page onto our existing bio */
|
/* see if we can add this page onto our existing bio */
|
||||||
if (last) {
|
if (last) {
|
||||||
u64 last_end = (u64)last->bi_iter.bi_sector << 9;
|
u64 last_end = last->bi_iter.bi_sector << 9;
|
||||||
last_end += last->bi_iter.bi_size;
|
last_end += last->bi_iter.bi_size;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1163,7 +1163,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
|
||||||
struct bvec_iter iter;
|
struct bvec_iter iter;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
start = (u64)bio->bi_iter.bi_sector << 9;
|
start = bio->bi_iter.bi_sector << 9;
|
||||||
stripe_offset = start - rbio->bbio->raid_map[0];
|
stripe_offset = start - rbio->bbio->raid_map[0];
|
||||||
page_index = stripe_offset >> PAGE_SHIFT;
|
page_index = stripe_offset >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
@ -1374,7 +1374,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
|
||||||
static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
|
static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
|
||||||
struct bio *bio)
|
struct bio *bio)
|
||||||
{
|
{
|
||||||
u64 logical = (u64)bio->bi_iter.bi_sector << 9;
|
u64 logical = bio->bi_iter.bi_sector << 9;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < rbio->nr_data; i++) {
|
for (i = 0; i < rbio->nr_data; i++) {
|
||||||
|
@ -2150,7 +2150,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||||
if (rbio->faila == -1) {
|
if (rbio->faila == -1) {
|
||||||
btrfs_warn(fs_info,
|
btrfs_warn(fs_info,
|
||||||
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
|
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
|
||||||
__func__, (u64)bio->bi_iter.bi_sector << 9,
|
__func__, bio->bi_iter.bi_sector << 9,
|
||||||
(u64)bio->bi_iter.bi_size, bbio->map_type);
|
(u64)bio->bi_iter.bi_size, bbio->map_type);
|
||||||
if (generic_io)
|
if (generic_io)
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bbio(bbio);
|
||||||
|
|
|
@ -52,6 +52,7 @@ struct reada_extctl {
|
||||||
|
|
||||||
struct reada_extent {
|
struct reada_extent {
|
||||||
u64 logical;
|
u64 logical;
|
||||||
|
u64 owner_root;
|
||||||
struct btrfs_key top;
|
struct btrfs_key top;
|
||||||
struct list_head extctl;
|
struct list_head extctl;
|
||||||
int refcnt;
|
int refcnt;
|
||||||
|
@ -59,6 +60,7 @@ struct reada_extent {
|
||||||
struct reada_zone *zones[BTRFS_MAX_MIRRORS];
|
struct reada_zone *zones[BTRFS_MAX_MIRRORS];
|
||||||
int nzones;
|
int nzones;
|
||||||
int scheduled;
|
int scheduled;
|
||||||
|
int level;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct reada_zone {
|
struct reada_zone {
|
||||||
|
@ -87,7 +89,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info);
|
||||||
static void __reada_start_machine(struct btrfs_fs_info *fs_info);
|
static void __reada_start_machine(struct btrfs_fs_info *fs_info);
|
||||||
|
|
||||||
static int reada_add_block(struct reada_control *rc, u64 logical,
|
static int reada_add_block(struct reada_control *rc, u64 logical,
|
||||||
struct btrfs_key *top, u64 generation);
|
struct btrfs_key *top, u64 owner_root,
|
||||||
|
u64 generation, int level);
|
||||||
|
|
||||||
/* recurses */
|
/* recurses */
|
||||||
/* in case of err, eb might be NULL */
|
/* in case of err, eb might be NULL */
|
||||||
|
@ -165,7 +168,9 @@ static void __readahead_hook(struct btrfs_fs_info *fs_info,
|
||||||
if (rec->generation == generation &&
|
if (rec->generation == generation &&
|
||||||
btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
|
btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
|
||||||
btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
|
btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
|
||||||
reada_add_block(rc, bytenr, &next_key, n_gen);
|
reada_add_block(rc, bytenr, &next_key,
|
||||||
|
btrfs_header_owner(eb), n_gen,
|
||||||
|
btrfs_header_level(eb) - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -298,7 +303,8 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
|
||||||
|
|
||||||
static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||||
u64 logical,
|
u64 logical,
|
||||||
struct btrfs_key *top)
|
struct btrfs_key *top,
|
||||||
|
u64 owner_root, int level)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
struct reada_extent *re = NULL;
|
struct reada_extent *re = NULL;
|
||||||
|
@ -331,6 +337,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||||
INIT_LIST_HEAD(&re->extctl);
|
INIT_LIST_HEAD(&re->extctl);
|
||||||
spin_lock_init(&re->lock);
|
spin_lock_init(&re->lock);
|
||||||
re->refcnt = 1;
|
re->refcnt = 1;
|
||||||
|
re->owner_root = owner_root;
|
||||||
|
re->level = level;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* map block
|
* map block
|
||||||
|
@ -531,6 +539,8 @@ static void reada_zone_release(struct kref *kref)
|
||||||
{
|
{
|
||||||
struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
|
struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
|
||||||
|
|
||||||
|
lockdep_assert_held(&zone->device->fs_info->reada_lock);
|
||||||
|
|
||||||
radix_tree_delete(&zone->device->reada_zones,
|
radix_tree_delete(&zone->device->reada_zones,
|
||||||
zone->end >> PAGE_SHIFT);
|
zone->end >> PAGE_SHIFT);
|
||||||
|
|
||||||
|
@ -546,14 +556,15 @@ static void reada_control_release(struct kref *kref)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int reada_add_block(struct reada_control *rc, u64 logical,
|
static int reada_add_block(struct reada_control *rc, u64 logical,
|
||||||
struct btrfs_key *top, u64 generation)
|
struct btrfs_key *top, u64 owner_root,
|
||||||
|
u64 generation, int level)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = rc->fs_info;
|
struct btrfs_fs_info *fs_info = rc->fs_info;
|
||||||
struct reada_extent *re;
|
struct reada_extent *re;
|
||||||
struct reada_extctl *rec;
|
struct reada_extctl *rec;
|
||||||
|
|
||||||
/* takes one ref */
|
/* takes one ref */
|
||||||
re = reada_find_extent(fs_info, logical, top);
|
re = reada_find_extent(fs_info, logical, top, owner_root, level);
|
||||||
if (!re)
|
if (!re)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
@ -645,12 +656,13 @@ static int reada_pick_zone(struct btrfs_device *dev)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
|
static int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||||
int mirror_num, struct extent_buffer **eb)
|
u64 owner_root, int level, int mirror_num,
|
||||||
|
struct extent_buffer **eb)
|
||||||
{
|
{
|
||||||
struct extent_buffer *buf = NULL;
|
struct extent_buffer *buf = NULL;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
buf = btrfs_find_create_tree_block(fs_info, bytenr);
|
buf = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
|
||||||
if (IS_ERR(buf))
|
if (IS_ERR(buf))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -738,7 +750,8 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
|
||||||
logical = re->logical;
|
logical = re->logical;
|
||||||
|
|
||||||
atomic_inc(&dev->reada_in_flight);
|
atomic_inc(&dev->reada_in_flight);
|
||||||
ret = reada_tree_block_flagged(fs_info, logical, mirror_num, &eb);
|
ret = reada_tree_block_flagged(fs_info, logical, re->owner_root,
|
||||||
|
re->level, mirror_num, &eb);
|
||||||
if (ret)
|
if (ret)
|
||||||
__readahead_hook(fs_info, re, NULL, ret);
|
__readahead_hook(fs_info, re, NULL, ret);
|
||||||
else if (eb)
|
else if (eb)
|
||||||
|
@ -945,6 +958,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
|
||||||
u64 start;
|
u64 start;
|
||||||
u64 generation;
|
u64 generation;
|
||||||
int ret;
|
int ret;
|
||||||
|
int level;
|
||||||
struct extent_buffer *node;
|
struct extent_buffer *node;
|
||||||
static struct btrfs_key max_key = {
|
static struct btrfs_key max_key = {
|
||||||
.objectid = (u64)-1,
|
.objectid = (u64)-1,
|
||||||
|
@ -967,9 +981,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
|
||||||
node = btrfs_root_node(root);
|
node = btrfs_root_node(root);
|
||||||
start = node->start;
|
start = node->start;
|
||||||
generation = btrfs_header_generation(node);
|
generation = btrfs_header_generation(node);
|
||||||
|
level = btrfs_header_level(node);
|
||||||
free_extent_buffer(node);
|
free_extent_buffer(node);
|
||||||
|
|
||||||
ret = reada_add_block(rc, start, &max_key, generation);
|
ret = reada_add_block(rc, start, &max_key, root->root_key.objectid,
|
||||||
|
generation, level);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
kfree(rc);
|
kfree(rc);
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
|
|
|
@ -551,34 +551,19 @@ static int process_leaf(struct btrfs_root *root,
|
||||||
static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
|
static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
|
||||||
int level, u64 *bytenr, u64 *num_bytes)
|
int level, u64 *bytenr, u64 *num_bytes)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
u64 block_bytenr, gen;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
while (level >= 0) {
|
while (level >= 0) {
|
||||||
if (level) {
|
if (level) {
|
||||||
struct btrfs_key first_key;
|
eb = btrfs_read_node_slot(path->nodes[level],
|
||||||
|
path->slots[level]);
|
||||||
block_bytenr = btrfs_node_blockptr(path->nodes[level],
|
|
||||||
path->slots[level]);
|
|
||||||
gen = btrfs_node_ptr_generation(path->nodes[level],
|
|
||||||
path->slots[level]);
|
|
||||||
btrfs_node_key_to_cpu(path->nodes[level], &first_key,
|
|
||||||
path->slots[level]);
|
|
||||||
eb = read_tree_block(fs_info, block_bytenr, gen,
|
|
||||||
level - 1, &first_key);
|
|
||||||
if (IS_ERR(eb))
|
if (IS_ERR(eb))
|
||||||
return PTR_ERR(eb);
|
return PTR_ERR(eb);
|
||||||
if (!extent_buffer_uptodate(eb)) {
|
|
||||||
free_extent_buffer(eb);
|
|
||||||
return -EIO;
|
|
||||||
}
|
|
||||||
btrfs_tree_read_lock(eb);
|
btrfs_tree_read_lock(eb);
|
||||||
btrfs_set_lock_blocking_read(eb);
|
|
||||||
path->nodes[level-1] = eb;
|
path->nodes[level-1] = eb;
|
||||||
path->slots[level-1] = 0;
|
path->slots[level-1] = 0;
|
||||||
path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
|
path->locks[level-1] = BTRFS_READ_LOCK;
|
||||||
} else {
|
} else {
|
||||||
ret = process_leaf(root, path, bytenr, num_bytes);
|
ret = process_leaf(root, path, bytenr, num_bytes);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -799,8 +784,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
|
||||||
if (!be) {
|
if (!be) {
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
|
"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
|
||||||
action, (unsigned long long)bytenr,
|
action, bytenr, num_bytes);
|
||||||
(unsigned long long)num_bytes);
|
|
||||||
dump_ref_action(fs_info, ra);
|
dump_ref_action(fs_info, ra);
|
||||||
kfree(ref);
|
kfree(ref);
|
||||||
kfree(ra);
|
kfree(ra);
|
||||||
|
@ -1001,11 +985,10 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
eb = btrfs_read_lock_root_node(fs_info->extent_root);
|
eb = btrfs_read_lock_root_node(fs_info->extent_root);
|
||||||
btrfs_set_lock_blocking_read(eb);
|
|
||||||
level = btrfs_header_level(eb);
|
level = btrfs_header_level(eb);
|
||||||
path->nodes[level] = eb;
|
path->nodes[level] = eb;
|
||||||
path->slots[level] = 0;
|
path->slots[level] = 0;
|
||||||
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
|
path->locks[level] = BTRFS_READ_LOCK;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -31,10 +31,10 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
|
||||||
endoff = destoff + olen;
|
endoff = destoff + olen;
|
||||||
if (endoff > inode->i_size) {
|
if (endoff > inode->i_size) {
|
||||||
i_size_write(inode, endoff);
|
i_size_write(inode, endoff);
|
||||||
btrfs_inode_safe_disk_i_size_write(inode, 0);
|
btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
btrfs_end_transaction(trans);
|
btrfs_end_transaction(trans);
|
||||||
|
@ -163,6 +163,7 @@ static int clone_copy_inline_extent(struct inode *dst,
|
||||||
const u64 aligned_end = ALIGN(new_key->offset + datal,
|
const u64 aligned_end = ALIGN(new_key->offset + datal,
|
||||||
fs_info->sectorsize);
|
fs_info->sectorsize);
|
||||||
struct btrfs_trans_handle *trans = NULL;
|
struct btrfs_trans_handle *trans = NULL;
|
||||||
|
struct btrfs_drop_extents_args drop_args = { 0 };
|
||||||
int ret;
|
int ret;
|
||||||
struct btrfs_key key;
|
struct btrfs_key key;
|
||||||
|
|
||||||
|
@ -252,7 +253,11 @@ copy_inline_extent:
|
||||||
trans = NULL;
|
trans = NULL;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
|
drop_args.path = path;
|
||||||
|
drop_args.start = drop_start;
|
||||||
|
drop_args.end = aligned_end;
|
||||||
|
drop_args.drop_cache = true;
|
||||||
|
ret = btrfs_drop_extents(trans, root, BTRFS_I(dst), &drop_args);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
|
ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
|
||||||
|
@ -263,7 +268,7 @@ copy_inline_extent:
|
||||||
btrfs_item_ptr_offset(path->nodes[0],
|
btrfs_item_ptr_offset(path->nodes[0],
|
||||||
path->slots[0]),
|
path->slots[0]),
|
||||||
size);
|
size);
|
||||||
inode_add_bytes(dst, datal);
|
btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found);
|
||||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
|
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
|
||||||
ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
|
ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
|
||||||
out:
|
out:
|
||||||
|
@ -347,7 +352,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
|
||||||
u64 drop_start;
|
u64 drop_start;
|
||||||
|
|
||||||
/* Note the key will change type as we walk through the tree */
|
/* Note the key will change type as we walk through the tree */
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
|
ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
|
||||||
0, 0);
|
0, 0);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
|
@ -417,7 +421,6 @@ process_slot:
|
||||||
size);
|
size);
|
||||||
|
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
path->leave_spinning = 0;
|
|
||||||
|
|
||||||
memcpy(&new_key, &key, sizeof(new_key));
|
memcpy(&new_key, &key, sizeof(new_key));
|
||||||
new_key.objectid = btrfs_ino(BTRFS_I(inode));
|
new_key.objectid = btrfs_ino(BTRFS_I(inode));
|
||||||
|
@ -533,7 +536,6 @@ process_slot:
|
||||||
* mixing buffered and direct IO writes against this file.
|
* mixing buffered and direct IO writes against this file.
|
||||||
*/
|
*/
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
path->leave_spinning = 0;
|
|
||||||
|
|
||||||
ret = btrfs_replace_file_extents(inode, path, last_dest_end,
|
ret = btrfs_replace_file_extents(inode, path, last_dest_end,
|
||||||
destoff + len - 1, NULL, &trans);
|
destoff + len - 1, NULL, &trans);
|
||||||
|
@ -652,7 +654,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
|
||||||
if (destoff > inode->i_size) {
|
if (destoff > inode->i_size) {
|
||||||
const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
|
const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
|
||||||
|
|
||||||
ret = btrfs_cont_expand(inode, inode->i_size, destoff);
|
ret = btrfs_cont_expand(BTRFS_I(inode), inode->i_size, destoff);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
#include "btrfs_inode.h"
|
#include "btrfs_inode.h"
|
||||||
#include "async-thread.h"
|
#include "async-thread.h"
|
||||||
#include "free-space-cache.h"
|
#include "free-space-cache.h"
|
||||||
#include "inode-map.h"
|
|
||||||
#include "qgroup.h"
|
#include "qgroup.h"
|
||||||
#include "print-tree.h"
|
#include "print-tree.h"
|
||||||
#include "delalloc-space.h"
|
#include "delalloc-space.h"
|
||||||
|
@ -783,7 +782,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
|
||||||
btrfs_set_root_refs(root_item, 0);
|
btrfs_set_root_refs(root_item, 0);
|
||||||
memset(&root_item->drop_progress, 0,
|
memset(&root_item->drop_progress, 0,
|
||||||
sizeof(struct btrfs_disk_key));
|
sizeof(struct btrfs_disk_key));
|
||||||
root_item->drop_level = 0;
|
btrfs_set_root_drop_level(root_item, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_tree_unlock(eb);
|
btrfs_tree_unlock(eb);
|
||||||
|
@ -1196,7 +1195,6 @@ again:
|
||||||
btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
|
btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
|
||||||
|
|
||||||
eb = btrfs_lock_root_node(dest);
|
eb = btrfs_lock_root_node(dest);
|
||||||
btrfs_set_lock_blocking_write(eb);
|
|
||||||
level = btrfs_header_level(eb);
|
level = btrfs_header_level(eb);
|
||||||
|
|
||||||
if (level < lowest_level) {
|
if (level < lowest_level) {
|
||||||
|
@ -1210,7 +1208,6 @@ again:
|
||||||
BTRFS_NESTING_COW);
|
BTRFS_NESTING_COW);
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
}
|
}
|
||||||
btrfs_set_lock_blocking_write(eb);
|
|
||||||
|
|
||||||
if (next_key) {
|
if (next_key) {
|
||||||
next_key->objectid = (u64)-1;
|
next_key->objectid = (u64)-1;
|
||||||
|
@ -1220,8 +1217,6 @@ again:
|
||||||
|
|
||||||
parent = eb;
|
parent = eb;
|
||||||
while (1) {
|
while (1) {
|
||||||
struct btrfs_key first_key;
|
|
||||||
|
|
||||||
level = btrfs_header_level(parent);
|
level = btrfs_header_level(parent);
|
||||||
BUG_ON(level < lowest_level);
|
BUG_ON(level < lowest_level);
|
||||||
|
|
||||||
|
@ -1237,7 +1232,6 @@ again:
|
||||||
old_bytenr = btrfs_node_blockptr(parent, slot);
|
old_bytenr = btrfs_node_blockptr(parent, slot);
|
||||||
blocksize = fs_info->nodesize;
|
blocksize = fs_info->nodesize;
|
||||||
old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
|
old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
|
||||||
btrfs_node_key_to_cpu(parent, &first_key, slot);
|
|
||||||
|
|
||||||
if (level <= max_level) {
|
if (level <= max_level) {
|
||||||
eb = path->nodes[level];
|
eb = path->nodes[level];
|
||||||
|
@ -1262,15 +1256,10 @@ again:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen,
|
eb = btrfs_read_node_slot(parent, slot);
|
||||||
level - 1, &first_key);
|
|
||||||
if (IS_ERR(eb)) {
|
if (IS_ERR(eb)) {
|
||||||
ret = PTR_ERR(eb);
|
ret = PTR_ERR(eb);
|
||||||
break;
|
break;
|
||||||
} else if (!extent_buffer_uptodate(eb)) {
|
|
||||||
ret = -EIO;
|
|
||||||
free_extent_buffer(eb);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
btrfs_tree_lock(eb);
|
btrfs_tree_lock(eb);
|
||||||
if (cow) {
|
if (cow) {
|
||||||
|
@ -1279,7 +1268,6 @@ again:
|
||||||
BTRFS_NESTING_COW);
|
BTRFS_NESTING_COW);
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
}
|
}
|
||||||
btrfs_set_lock_blocking_write(eb);
|
|
||||||
|
|
||||||
btrfs_tree_unlock(parent);
|
btrfs_tree_unlock(parent);
|
||||||
free_extent_buffer(parent);
|
free_extent_buffer(parent);
|
||||||
|
@ -1418,10 +1406,8 @@ static noinline_for_stack
|
||||||
int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
|
int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
|
||||||
int *level)
|
int *level)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
||||||
struct extent_buffer *eb = NULL;
|
struct extent_buffer *eb = NULL;
|
||||||
int i;
|
int i;
|
||||||
u64 bytenr;
|
|
||||||
u64 ptr_gen = 0;
|
u64 ptr_gen = 0;
|
||||||
u64 last_snapshot;
|
u64 last_snapshot;
|
||||||
u32 nritems;
|
u32 nritems;
|
||||||
|
@ -1429,8 +1415,6 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
|
||||||
last_snapshot = btrfs_root_last_snapshot(&root->root_item);
|
last_snapshot = btrfs_root_last_snapshot(&root->root_item);
|
||||||
|
|
||||||
for (i = *level; i > 0; i--) {
|
for (i = *level; i > 0; i--) {
|
||||||
struct btrfs_key first_key;
|
|
||||||
|
|
||||||
eb = path->nodes[i];
|
eb = path->nodes[i];
|
||||||
nritems = btrfs_header_nritems(eb);
|
nritems = btrfs_header_nritems(eb);
|
||||||
while (path->slots[i] < nritems) {
|
while (path->slots[i] < nritems) {
|
||||||
|
@ -1450,16 +1434,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bytenr = btrfs_node_blockptr(eb, path->slots[i]);
|
eb = btrfs_read_node_slot(eb, path->slots[i]);
|
||||||
btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]);
|
if (IS_ERR(eb))
|
||||||
eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1,
|
|
||||||
&first_key);
|
|
||||||
if (IS_ERR(eb)) {
|
|
||||||
return PTR_ERR(eb);
|
return PTR_ERR(eb);
|
||||||
} else if (!extent_buffer_uptodate(eb)) {
|
|
||||||
free_extent_buffer(eb);
|
|
||||||
return -EIO;
|
|
||||||
}
|
|
||||||
BUG_ON(btrfs_header_level(eb) != i - 1);
|
BUG_ON(btrfs_header_level(eb) != i - 1);
|
||||||
path->nodes[i - 1] = eb;
|
path->nodes[i - 1] = eb;
|
||||||
path->slots[i - 1] = 0;
|
path->slots[i - 1] = 0;
|
||||||
|
@ -1575,7 +1552,7 @@ static void insert_dirty_subvol(struct btrfs_trans_handle *trans,
|
||||||
reloc_root_item = &reloc_root->root_item;
|
reloc_root_item = &reloc_root->root_item;
|
||||||
memset(&reloc_root_item->drop_progress, 0,
|
memset(&reloc_root_item->drop_progress, 0,
|
||||||
sizeof(reloc_root_item->drop_progress));
|
sizeof(reloc_root_item->drop_progress));
|
||||||
reloc_root_item->drop_level = 0;
|
btrfs_set_root_drop_level(reloc_root_item, 0);
|
||||||
btrfs_set_root_refs(reloc_root_item, 0);
|
btrfs_set_root_refs(reloc_root_item, 0);
|
||||||
btrfs_update_reloc_root(trans, root);
|
btrfs_update_reloc_root(trans, root);
|
||||||
|
|
||||||
|
@ -1652,8 +1629,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
||||||
int level;
|
int level;
|
||||||
int max_level;
|
int max_level;
|
||||||
int replaced = 0;
|
int replaced = 0;
|
||||||
int ret;
|
int ret = 0;
|
||||||
int err = 0;
|
|
||||||
u32 min_reserved;
|
u32 min_reserved;
|
||||||
|
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
|
@ -1672,7 +1648,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
||||||
} else {
|
} else {
|
||||||
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
|
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
|
||||||
|
|
||||||
level = root_item->drop_level;
|
level = btrfs_root_drop_level(root_item);
|
||||||
BUG_ON(level == 0);
|
BUG_ON(level == 0);
|
||||||
path->lowest_level = level;
|
path->lowest_level = level;
|
||||||
ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
|
ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
|
||||||
|
@ -1704,13 +1680,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
||||||
while (1) {
|
while (1) {
|
||||||
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
|
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
|
||||||
BTRFS_RESERVE_FLUSH_LIMIT);
|
BTRFS_RESERVE_FLUSH_LIMIT);
|
||||||
if (ret) {
|
if (ret)
|
||||||
err = ret;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
|
||||||
trans = btrfs_start_transaction(root, 0);
|
trans = btrfs_start_transaction(root, 0);
|
||||||
if (IS_ERR(trans)) {
|
if (IS_ERR(trans)) {
|
||||||
err = PTR_ERR(trans);
|
ret = PTR_ERR(trans);
|
||||||
trans = NULL;
|
trans = NULL;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -1732,10 +1706,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
||||||
max_level = level;
|
max_level = level;
|
||||||
|
|
||||||
ret = walk_down_reloc_tree(reloc_root, path, &level);
|
ret = walk_down_reloc_tree(reloc_root, path, &level);
|
||||||
if (ret < 0) {
|
if (ret < 0)
|
||||||
err = ret;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1746,11 +1718,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
||||||
ret = replace_path(trans, rc, root, reloc_root, path,
|
ret = replace_path(trans, rc, root, reloc_root, path,
|
||||||
&next_key, level, max_level);
|
&next_key, level, max_level);
|
||||||
}
|
}
|
||||||
if (ret < 0) {
|
if (ret < 0)
|
||||||
err = ret;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
|
||||||
|
|
||||||
if (ret > 0) {
|
if (ret > 0) {
|
||||||
level = ret;
|
level = ret;
|
||||||
btrfs_node_key_to_cpu(path->nodes[level], &key,
|
btrfs_node_key_to_cpu(path->nodes[level], &key,
|
||||||
|
@ -1769,7 +1738,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
||||||
*/
|
*/
|
||||||
btrfs_node_key(path->nodes[level], &root_item->drop_progress,
|
btrfs_node_key(path->nodes[level], &root_item->drop_progress,
|
||||||
path->slots[level]);
|
path->slots[level]);
|
||||||
root_item->drop_level = level;
|
btrfs_set_root_drop_level(root_item, level);
|
||||||
|
|
||||||
btrfs_end_transaction_throttle(trans);
|
btrfs_end_transaction_throttle(trans);
|
||||||
trans = NULL;
|
trans = NULL;
|
||||||
|
@ -1789,12 +1758,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
||||||
BTRFS_NESTING_COW);
|
BTRFS_NESTING_COW);
|
||||||
btrfs_tree_unlock(leaf);
|
btrfs_tree_unlock(leaf);
|
||||||
free_extent_buffer(leaf);
|
free_extent_buffer(leaf);
|
||||||
if (ret < 0)
|
|
||||||
err = ret;
|
|
||||||
out:
|
out:
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
|
|
||||||
if (err == 0)
|
if (ret == 0)
|
||||||
insert_dirty_subvol(trans, rc, root);
|
insert_dirty_subvol(trans, rc, root);
|
||||||
|
|
||||||
if (trans)
|
if (trans)
|
||||||
|
@ -1805,7 +1772,7 @@ out:
|
||||||
if (replaced && rc->stage == UPDATE_DATA_PTRS)
|
if (replaced && rc->stage == UPDATE_DATA_PTRS)
|
||||||
invalidate_extent_cache(root, &key, &next_key);
|
invalidate_extent_cache(root, &key, &next_key);
|
||||||
|
|
||||||
return err;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline_for_stack
|
static noinline_for_stack
|
||||||
|
@ -2205,7 +2172,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_key *key,
|
struct btrfs_key *key,
|
||||||
struct btrfs_path *path, int lowest)
|
struct btrfs_path *path, int lowest)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
|
|
||||||
struct btrfs_backref_node *upper;
|
struct btrfs_backref_node *upper;
|
||||||
struct btrfs_backref_edge *edge;
|
struct btrfs_backref_edge *edge;
|
||||||
struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
|
struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
|
||||||
|
@ -2213,17 +2179,14 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
u32 blocksize;
|
u32 blocksize;
|
||||||
u64 bytenr;
|
u64 bytenr;
|
||||||
u64 generation;
|
|
||||||
int slot;
|
int slot;
|
||||||
int ret;
|
int ret = 0;
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
BUG_ON(lowest && node->eb);
|
BUG_ON(lowest && node->eb);
|
||||||
|
|
||||||
path->lowest_level = node->level + 1;
|
path->lowest_level = node->level + 1;
|
||||||
rc->backref_cache.path[node->level] = node;
|
rc->backref_cache.path[node->level] = node;
|
||||||
list_for_each_entry(edge, &node->upper, list[LOWER]) {
|
list_for_each_entry(edge, &node->upper, list[LOWER]) {
|
||||||
struct btrfs_key first_key;
|
|
||||||
struct btrfs_ref ref = { 0 };
|
struct btrfs_ref ref = { 0 };
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
@ -2235,10 +2198,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||||
if (upper->eb && !upper->locked) {
|
if (upper->eb && !upper->locked) {
|
||||||
if (!lowest) {
|
if (!lowest) {
|
||||||
ret = btrfs_bin_search(upper->eb, key, &slot);
|
ret = btrfs_bin_search(upper->eb, key, &slot);
|
||||||
if (ret < 0) {
|
if (ret < 0)
|
||||||
err = ret;
|
|
||||||
goto next;
|
goto next;
|
||||||
}
|
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
bytenr = btrfs_node_blockptr(upper->eb, slot);
|
bytenr = btrfs_node_blockptr(upper->eb, slot);
|
||||||
if (node->eb->start == bytenr)
|
if (node->eb->start == bytenr)
|
||||||
|
@ -2250,10 +2211,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||||
if (!upper->eb) {
|
if (!upper->eb) {
|
||||||
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
|
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
if (ret < 0)
|
if (ret > 0)
|
||||||
err = ret;
|
ret = -ENOENT;
|
||||||
else
|
|
||||||
err = -ENOENT;
|
|
||||||
|
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
break;
|
break;
|
||||||
|
@ -2273,10 +2232,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
} else {
|
} else {
|
||||||
ret = btrfs_bin_search(upper->eb, key, &slot);
|
ret = btrfs_bin_search(upper->eb, key, &slot);
|
||||||
if (ret < 0) {
|
if (ret < 0)
|
||||||
err = ret;
|
|
||||||
goto next;
|
goto next;
|
||||||
}
|
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2287,7 +2244,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||||
"lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
|
"lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
|
||||||
bytenr, node->bytenr, slot,
|
bytenr, node->bytenr, slot,
|
||||||
upper->eb->start);
|
upper->eb->start);
|
||||||
err = -EIO;
|
ret = -EIO;
|
||||||
goto next;
|
goto next;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -2296,30 +2253,20 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||||
}
|
}
|
||||||
|
|
||||||
blocksize = root->fs_info->nodesize;
|
blocksize = root->fs_info->nodesize;
|
||||||
generation = btrfs_node_ptr_generation(upper->eb, slot);
|
eb = btrfs_read_node_slot(upper->eb, slot);
|
||||||
btrfs_node_key_to_cpu(upper->eb, &first_key, slot);
|
|
||||||
eb = read_tree_block(fs_info, bytenr, generation,
|
|
||||||
upper->level - 1, &first_key);
|
|
||||||
if (IS_ERR(eb)) {
|
if (IS_ERR(eb)) {
|
||||||
err = PTR_ERR(eb);
|
ret = PTR_ERR(eb);
|
||||||
goto next;
|
|
||||||
} else if (!extent_buffer_uptodate(eb)) {
|
|
||||||
free_extent_buffer(eb);
|
|
||||||
err = -EIO;
|
|
||||||
goto next;
|
goto next;
|
||||||
}
|
}
|
||||||
btrfs_tree_lock(eb);
|
btrfs_tree_lock(eb);
|
||||||
btrfs_set_lock_blocking_write(eb);
|
|
||||||
|
|
||||||
if (!node->eb) {
|
if (!node->eb) {
|
||||||
ret = btrfs_cow_block(trans, root, eb, upper->eb,
|
ret = btrfs_cow_block(trans, root, eb, upper->eb,
|
||||||
slot, &eb, BTRFS_NESTING_COW);
|
slot, &eb, BTRFS_NESTING_COW);
|
||||||
btrfs_tree_unlock(eb);
|
btrfs_tree_unlock(eb);
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
if (ret < 0) {
|
if (ret < 0)
|
||||||
err = ret;
|
|
||||||
goto next;
|
goto next;
|
||||||
}
|
|
||||||
BUG_ON(node->eb != eb);
|
BUG_ON(node->eb != eb);
|
||||||
} else {
|
} else {
|
||||||
btrfs_set_node_blockptr(upper->eb, slot,
|
btrfs_set_node_blockptr(upper->eb, slot,
|
||||||
|
@ -2345,19 +2292,19 @@ next:
|
||||||
btrfs_backref_drop_node_buffer(upper);
|
btrfs_backref_drop_node_buffer(upper);
|
||||||
else
|
else
|
||||||
btrfs_backref_unlock_node_buffer(upper);
|
btrfs_backref_unlock_node_buffer(upper);
|
||||||
if (err)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!err && node->pending) {
|
if (!ret && node->pending) {
|
||||||
btrfs_backref_drop_node_buffer(node);
|
btrfs_backref_drop_node_buffer(node);
|
||||||
list_move_tail(&node->list, &rc->backref_cache.changed);
|
list_move_tail(&node->list, &rc->backref_cache.changed);
|
||||||
node->pending = 0;
|
node->pending = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
path->lowest_level = 0;
|
path->lowest_level = 0;
|
||||||
BUG_ON(err == -ENOSPC);
|
BUG_ON(ret == -ENOSPC);
|
||||||
return err;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int link_to_upper(struct btrfs_trans_handle *trans,
|
static int link_to_upper(struct btrfs_trans_handle *trans,
|
||||||
|
@ -2446,7 +2393,7 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
|
||||||
{
|
{
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
|
|
||||||
eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
|
eb = read_tree_block(fs_info, block->bytenr, 0, block->key.offset,
|
||||||
block->level, NULL);
|
block->level, NULL);
|
||||||
if (IS_ERR(eb)) {
|
if (IS_ERR(eb)) {
|
||||||
return PTR_ERR(eb);
|
return PTR_ERR(eb);
|
||||||
|
@ -2546,7 +2493,8 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
|
||||||
/* Kick in readahead for tree blocks with missing keys */
|
/* Kick in readahead for tree blocks with missing keys */
|
||||||
rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
|
rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
|
||||||
if (!block->key_ready)
|
if (!block->key_ready)
|
||||||
readahead_tree_block(fs_info, block->bytenr);
|
btrfs_readahead_tree_block(fs_info, block->bytenr, 0, 0,
|
||||||
|
block->level);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get first keys */
|
/* Get first keys */
|
||||||
|
@ -3071,7 +3019,7 @@ int add_data_references(struct reloc_control *rc,
|
||||||
while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
|
while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
|
|
||||||
eb = read_tree_block(fs_info, ref_node->val, 0, 0, NULL);
|
eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL);
|
||||||
if (IS_ERR(eb)) {
|
if (IS_ERR(eb)) {
|
||||||
ret = PTR_ERR(eb);
|
ret = PTR_ERR(eb);
|
||||||
break;
|
break;
|
||||||
|
@ -3694,7 +3642,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
|
||||||
|
|
||||||
memset(&root->root_item.drop_progress, 0,
|
memset(&root->root_item.drop_progress, 0,
|
||||||
sizeof(root->root_item.drop_progress));
|
sizeof(root->root_item.drop_progress));
|
||||||
root->root_item.drop_level = 0;
|
btrfs_set_root_drop_level(&root->root_item, 0);
|
||||||
btrfs_set_root_refs(&root->root_item, 0);
|
btrfs_set_root_refs(&root->root_item, 0);
|
||||||
ret = btrfs_update_root(trans, fs_info->tree_root,
|
ret = btrfs_update_root(trans, fs_info->tree_root,
|
||||||
&root->root_key, &root->root_item);
|
&root->root_key, &root->root_item);
|
||||||
|
|
342
fs/btrfs/scrub.c
342
fs/btrfs/scrub.c
|
@ -20,6 +20,7 @@
|
||||||
#include "rcu-string.h"
|
#include "rcu-string.h"
|
||||||
#include "raid56.h"
|
#include "raid56.h"
|
||||||
#include "block-group.h"
|
#include "block-group.h"
|
||||||
|
#include "zoned.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is only the first step towards a full-features scrub. It reads all
|
* This is only the first step towards a full-features scrub. It reads all
|
||||||
|
@ -71,11 +72,9 @@ struct scrub_page {
|
||||||
u64 physical;
|
u64 physical;
|
||||||
u64 physical_for_dev_replace;
|
u64 physical_for_dev_replace;
|
||||||
atomic_t refs;
|
atomic_t refs;
|
||||||
struct {
|
u8 mirror_num;
|
||||||
unsigned int mirror_num:8;
|
int have_csum:1;
|
||||||
unsigned int have_csum:1;
|
int io_error:1;
|
||||||
unsigned int io_error:1;
|
|
||||||
};
|
|
||||||
u8 csum[BTRFS_CSUM_SIZE];
|
u8 csum[BTRFS_CSUM_SIZE];
|
||||||
|
|
||||||
struct scrub_recover *recover;
|
struct scrub_recover *recover;
|
||||||
|
@ -131,7 +130,7 @@ struct scrub_parity {
|
||||||
|
|
||||||
int nsectors;
|
int nsectors;
|
||||||
|
|
||||||
u64 stripe_len;
|
u32 stripe_len;
|
||||||
|
|
||||||
refcount_t refs;
|
refcount_t refs;
|
||||||
|
|
||||||
|
@ -161,7 +160,6 @@ struct scrub_ctx {
|
||||||
atomic_t workers_pending;
|
atomic_t workers_pending;
|
||||||
spinlock_t list_lock;
|
spinlock_t list_lock;
|
||||||
wait_queue_head_t list_wait;
|
wait_queue_head_t list_wait;
|
||||||
u16 csum_size;
|
|
||||||
struct list_head csum_list;
|
struct list_head csum_list;
|
||||||
atomic_t cancel_req;
|
atomic_t cancel_req;
|
||||||
int readonly;
|
int readonly;
|
||||||
|
@ -235,15 +233,15 @@ static void scrub_parity_get(struct scrub_parity *sparity);
|
||||||
static void scrub_parity_put(struct scrub_parity *sparity);
|
static void scrub_parity_put(struct scrub_parity *sparity);
|
||||||
static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
|
static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
|
||||||
struct scrub_page *spage);
|
struct scrub_page *spage);
|
||||||
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
|
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u32 len,
|
||||||
u64 physical, struct btrfs_device *dev, u64 flags,
|
u64 physical, struct btrfs_device *dev, u64 flags,
|
||||||
u64 gen, int mirror_num, u8 *csum, int force,
|
u64 gen, int mirror_num, u8 *csum,
|
||||||
u64 physical_for_dev_replace);
|
u64 physical_for_dev_replace);
|
||||||
static void scrub_bio_end_io(struct bio *bio);
|
static void scrub_bio_end_io(struct bio *bio);
|
||||||
static void scrub_bio_end_io_worker(struct btrfs_work *work);
|
static void scrub_bio_end_io_worker(struct btrfs_work *work);
|
||||||
static void scrub_block_complete(struct scrub_block *sblock);
|
static void scrub_block_complete(struct scrub_block *sblock);
|
||||||
static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
|
static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
|
||||||
u64 extent_logical, u64 extent_len,
|
u64 extent_logical, u32 extent_len,
|
||||||
u64 *extent_physical,
|
u64 *extent_physical,
|
||||||
struct btrfs_device **extent_dev,
|
struct btrfs_device **extent_dev,
|
||||||
int *extent_mirror_num);
|
int *extent_mirror_num);
|
||||||
|
@ -256,10 +254,10 @@ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
|
||||||
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
|
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
|
||||||
static void scrub_put_ctx(struct scrub_ctx *sctx);
|
static void scrub_put_ctx(struct scrub_ctx *sctx);
|
||||||
|
|
||||||
static inline int scrub_is_page_on_raid56(struct scrub_page *page)
|
static inline int scrub_is_page_on_raid56(struct scrub_page *spage)
|
||||||
{
|
{
|
||||||
return page->recover &&
|
return spage->recover &&
|
||||||
(page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
|
(spage->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
|
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
|
||||||
|
@ -610,7 +608,6 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
|
||||||
atomic_set(&sctx->bios_in_flight, 0);
|
atomic_set(&sctx->bios_in_flight, 0);
|
||||||
atomic_set(&sctx->workers_pending, 0);
|
atomic_set(&sctx->workers_pending, 0);
|
||||||
atomic_set(&sctx->cancel_req, 0);
|
atomic_set(&sctx->cancel_req, 0);
|
||||||
sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
|
||||||
|
|
||||||
spin_lock_init(&sctx->list_lock);
|
spin_lock_init(&sctx->list_lock);
|
||||||
spin_lock_init(&sctx->stat_lock);
|
spin_lock_init(&sctx->stat_lock);
|
||||||
|
@ -1092,11 +1089,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
|
||||||
success = 1;
|
success = 1;
|
||||||
for (page_num = 0; page_num < sblock_bad->page_count;
|
for (page_num = 0; page_num < sblock_bad->page_count;
|
||||||
page_num++) {
|
page_num++) {
|
||||||
struct scrub_page *page_bad = sblock_bad->pagev[page_num];
|
struct scrub_page *spage_bad = sblock_bad->pagev[page_num];
|
||||||
struct scrub_block *sblock_other = NULL;
|
struct scrub_block *sblock_other = NULL;
|
||||||
|
|
||||||
/* skip no-io-error page in scrub */
|
/* skip no-io-error page in scrub */
|
||||||
if (!page_bad->io_error && !sctx->is_dev_replace)
|
if (!spage_bad->io_error && !sctx->is_dev_replace)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
|
if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
|
||||||
|
@ -1108,7 +1105,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
|
||||||
* sblock_for_recheck array to target device.
|
* sblock_for_recheck array to target device.
|
||||||
*/
|
*/
|
||||||
sblock_other = NULL;
|
sblock_other = NULL;
|
||||||
} else if (page_bad->io_error) {
|
} else if (spage_bad->io_error) {
|
||||||
/* try to find no-io-error page in mirrors */
|
/* try to find no-io-error page in mirrors */
|
||||||
for (mirror_index = 0;
|
for (mirror_index = 0;
|
||||||
mirror_index < BTRFS_MAX_MIRRORS &&
|
mirror_index < BTRFS_MAX_MIRRORS &&
|
||||||
|
@ -1147,7 +1144,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
|
||||||
sblock_other,
|
sblock_other,
|
||||||
page_num, 0);
|
page_num, 0);
|
||||||
if (0 == ret)
|
if (0 == ret)
|
||||||
page_bad->io_error = 0;
|
spage_bad->io_error = 0;
|
||||||
else
|
else
|
||||||
success = 0;
|
success = 0;
|
||||||
}
|
}
|
||||||
|
@ -1325,13 +1322,13 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
||||||
for (mirror_index = 0; mirror_index < nmirrors;
|
for (mirror_index = 0; mirror_index < nmirrors;
|
||||||
mirror_index++) {
|
mirror_index++) {
|
||||||
struct scrub_block *sblock;
|
struct scrub_block *sblock;
|
||||||
struct scrub_page *page;
|
struct scrub_page *spage;
|
||||||
|
|
||||||
sblock = sblocks_for_recheck + mirror_index;
|
sblock = sblocks_for_recheck + mirror_index;
|
||||||
sblock->sctx = sctx;
|
sblock->sctx = sctx;
|
||||||
|
|
||||||
page = kzalloc(sizeof(*page), GFP_NOFS);
|
spage = kzalloc(sizeof(*spage), GFP_NOFS);
|
||||||
if (!page) {
|
if (!spage) {
|
||||||
leave_nomem:
|
leave_nomem:
|
||||||
spin_lock(&sctx->stat_lock);
|
spin_lock(&sctx->stat_lock);
|
||||||
sctx->stat.malloc_errors++;
|
sctx->stat.malloc_errors++;
|
||||||
|
@ -1339,17 +1336,17 @@ leave_nomem:
|
||||||
scrub_put_recover(fs_info, recover);
|
scrub_put_recover(fs_info, recover);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
scrub_page_get(page);
|
scrub_page_get(spage);
|
||||||
sblock->pagev[page_index] = page;
|
sblock->pagev[page_index] = spage;
|
||||||
page->sblock = sblock;
|
spage->sblock = sblock;
|
||||||
page->flags = flags;
|
spage->flags = flags;
|
||||||
page->generation = generation;
|
spage->generation = generation;
|
||||||
page->logical = logical;
|
spage->logical = logical;
|
||||||
page->have_csum = have_csum;
|
spage->have_csum = have_csum;
|
||||||
if (have_csum)
|
if (have_csum)
|
||||||
memcpy(page->csum,
|
memcpy(spage->csum,
|
||||||
original_sblock->pagev[0]->csum,
|
original_sblock->pagev[0]->csum,
|
||||||
sctx->csum_size);
|
sctx->fs_info->csum_size);
|
||||||
|
|
||||||
scrub_stripe_index_and_offset(logical,
|
scrub_stripe_index_and_offset(logical,
|
||||||
bbio->map_type,
|
bbio->map_type,
|
||||||
|
@ -1360,23 +1357,23 @@ leave_nomem:
|
||||||
mirror_index,
|
mirror_index,
|
||||||
&stripe_index,
|
&stripe_index,
|
||||||
&stripe_offset);
|
&stripe_offset);
|
||||||
page->physical = bbio->stripes[stripe_index].physical +
|
spage->physical = bbio->stripes[stripe_index].physical +
|
||||||
stripe_offset;
|
stripe_offset;
|
||||||
page->dev = bbio->stripes[stripe_index].dev;
|
spage->dev = bbio->stripes[stripe_index].dev;
|
||||||
|
|
||||||
BUG_ON(page_index >= original_sblock->page_count);
|
BUG_ON(page_index >= original_sblock->page_count);
|
||||||
page->physical_for_dev_replace =
|
spage->physical_for_dev_replace =
|
||||||
original_sblock->pagev[page_index]->
|
original_sblock->pagev[page_index]->
|
||||||
physical_for_dev_replace;
|
physical_for_dev_replace;
|
||||||
/* for missing devices, dev->bdev is NULL */
|
/* for missing devices, dev->bdev is NULL */
|
||||||
page->mirror_num = mirror_index + 1;
|
spage->mirror_num = mirror_index + 1;
|
||||||
sblock->page_count++;
|
sblock->page_count++;
|
||||||
page->page = alloc_page(GFP_NOFS);
|
spage->page = alloc_page(GFP_NOFS);
|
||||||
if (!page->page)
|
if (!spage->page)
|
||||||
goto leave_nomem;
|
goto leave_nomem;
|
||||||
|
|
||||||
scrub_get_recover(recover);
|
scrub_get_recover(recover);
|
||||||
page->recover = recover;
|
spage->recover = recover;
|
||||||
}
|
}
|
||||||
scrub_put_recover(fs_info, recover);
|
scrub_put_recover(fs_info, recover);
|
||||||
length -= sublen;
|
length -= sublen;
|
||||||
|
@ -1394,19 +1391,19 @@ static void scrub_bio_wait_endio(struct bio *bio)
|
||||||
|
|
||||||
static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
|
static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
|
||||||
struct bio *bio,
|
struct bio *bio,
|
||||||
struct scrub_page *page)
|
struct scrub_page *spage)
|
||||||
{
|
{
|
||||||
DECLARE_COMPLETION_ONSTACK(done);
|
DECLARE_COMPLETION_ONSTACK(done);
|
||||||
int ret;
|
int ret;
|
||||||
int mirror_num;
|
int mirror_num;
|
||||||
|
|
||||||
bio->bi_iter.bi_sector = page->logical >> 9;
|
bio->bi_iter.bi_sector = spage->logical >> 9;
|
||||||
bio->bi_private = &done;
|
bio->bi_private = &done;
|
||||||
bio->bi_end_io = scrub_bio_wait_endio;
|
bio->bi_end_io = scrub_bio_wait_endio;
|
||||||
|
|
||||||
mirror_num = page->sblock->pagev[0]->mirror_num;
|
mirror_num = spage->sblock->pagev[0]->mirror_num;
|
||||||
ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
|
ret = raid56_parity_recover(fs_info, bio, spage->recover->bbio,
|
||||||
page->recover->map_length,
|
spage->recover->map_length,
|
||||||
mirror_num, 0);
|
mirror_num, 0);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1431,10 +1428,10 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
|
||||||
bio_set_dev(bio, first_page->dev->bdev);
|
bio_set_dev(bio, first_page->dev->bdev);
|
||||||
|
|
||||||
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
||||||
struct scrub_page *page = sblock->pagev[page_num];
|
struct scrub_page *spage = sblock->pagev[page_num];
|
||||||
|
|
||||||
WARN_ON(!page->page);
|
WARN_ON(!spage->page);
|
||||||
bio_add_page(bio, page->page, PAGE_SIZE, 0);
|
bio_add_page(bio, spage->page, PAGE_SIZE, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
|
if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
|
||||||
|
@ -1475,24 +1472,24 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
||||||
|
|
||||||
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
struct scrub_page *page = sblock->pagev[page_num];
|
struct scrub_page *spage = sblock->pagev[page_num];
|
||||||
|
|
||||||
if (page->dev->bdev == NULL) {
|
if (spage->dev->bdev == NULL) {
|
||||||
page->io_error = 1;
|
spage->io_error = 1;
|
||||||
sblock->no_io_error_seen = 0;
|
sblock->no_io_error_seen = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON(!page->page);
|
WARN_ON(!spage->page);
|
||||||
bio = btrfs_io_bio_alloc(1);
|
bio = btrfs_io_bio_alloc(1);
|
||||||
bio_set_dev(bio, page->dev->bdev);
|
bio_set_dev(bio, spage->dev->bdev);
|
||||||
|
|
||||||
bio_add_page(bio, page->page, PAGE_SIZE, 0);
|
bio_add_page(bio, spage->page, PAGE_SIZE, 0);
|
||||||
bio->bi_iter.bi_sector = page->physical >> 9;
|
bio->bi_iter.bi_sector = spage->physical >> 9;
|
||||||
bio->bi_opf = REQ_OP_READ;
|
bio->bi_opf = REQ_OP_READ;
|
||||||
|
|
||||||
if (btrfsic_submit_bio_wait(bio)) {
|
if (btrfsic_submit_bio_wait(bio)) {
|
||||||
page->io_error = 1;
|
spage->io_error = 1;
|
||||||
sblock->no_io_error_seen = 0;
|
sblock->no_io_error_seen = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1548,36 +1545,36 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
|
||||||
struct scrub_block *sblock_good,
|
struct scrub_block *sblock_good,
|
||||||
int page_num, int force_write)
|
int page_num, int force_write)
|
||||||
{
|
{
|
||||||
struct scrub_page *page_bad = sblock_bad->pagev[page_num];
|
struct scrub_page *spage_bad = sblock_bad->pagev[page_num];
|
||||||
struct scrub_page *page_good = sblock_good->pagev[page_num];
|
struct scrub_page *spage_good = sblock_good->pagev[page_num];
|
||||||
struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
|
struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
|
||||||
|
|
||||||
BUG_ON(page_bad->page == NULL);
|
BUG_ON(spage_bad->page == NULL);
|
||||||
BUG_ON(page_good->page == NULL);
|
BUG_ON(spage_good->page == NULL);
|
||||||
if (force_write || sblock_bad->header_error ||
|
if (force_write || sblock_bad->header_error ||
|
||||||
sblock_bad->checksum_error || page_bad->io_error) {
|
sblock_bad->checksum_error || spage_bad->io_error) {
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!page_bad->dev->bdev) {
|
if (!spage_bad->dev->bdev) {
|
||||||
btrfs_warn_rl(fs_info,
|
btrfs_warn_rl(fs_info,
|
||||||
"scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
|
"scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
bio = btrfs_io_bio_alloc(1);
|
bio = btrfs_io_bio_alloc(1);
|
||||||
bio_set_dev(bio, page_bad->dev->bdev);
|
bio_set_dev(bio, spage_bad->dev->bdev);
|
||||||
bio->bi_iter.bi_sector = page_bad->physical >> 9;
|
bio->bi_iter.bi_sector = spage_bad->physical >> 9;
|
||||||
bio->bi_opf = REQ_OP_WRITE;
|
bio->bi_opf = REQ_OP_WRITE;
|
||||||
|
|
||||||
ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
|
ret = bio_add_page(bio, spage_good->page, PAGE_SIZE, 0);
|
||||||
if (PAGE_SIZE != ret) {
|
if (PAGE_SIZE != ret) {
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (btrfsic_submit_bio_wait(bio)) {
|
if (btrfsic_submit_bio_wait(bio)) {
|
||||||
btrfs_dev_stat_inc_and_print(page_bad->dev,
|
btrfs_dev_stat_inc_and_print(spage_bad->dev,
|
||||||
BTRFS_DEV_STAT_WRITE_ERRS);
|
BTRFS_DEV_STAT_WRITE_ERRS);
|
||||||
atomic64_inc(&fs_info->dev_replace.num_write_errors);
|
atomic64_inc(&fs_info->dev_replace.num_write_errors);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
|
@ -1798,11 +1795,15 @@ static int scrub_checksum_data(struct scrub_block *sblock)
|
||||||
|
|
||||||
shash->tfm = fs_info->csum_shash;
|
shash->tfm = fs_info->csum_shash;
|
||||||
crypto_shash_init(shash);
|
crypto_shash_init(shash);
|
||||||
crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
|
|
||||||
|
|
||||||
if (memcmp(csum, spage->csum, sctx->csum_size))
|
/*
|
||||||
|
* In scrub_pages() and scrub_pages_for_parity() we ensure each spage
|
||||||
|
* only contains one sector of data.
|
||||||
|
*/
|
||||||
|
crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
|
||||||
|
|
||||||
|
if (memcmp(csum, spage->csum, fs_info->csum_size))
|
||||||
sblock->checksum_error = 1;
|
sblock->checksum_error = 1;
|
||||||
|
|
||||||
return sblock->checksum_error;
|
return sblock->checksum_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1814,16 +1815,26 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
|
||||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||||
u8 calculated_csum[BTRFS_CSUM_SIZE];
|
u8 calculated_csum[BTRFS_CSUM_SIZE];
|
||||||
u8 on_disk_csum[BTRFS_CSUM_SIZE];
|
u8 on_disk_csum[BTRFS_CSUM_SIZE];
|
||||||
const int num_pages = sctx->fs_info->nodesize >> PAGE_SHIFT;
|
/*
|
||||||
|
* This is done in sectorsize steps even for metadata as there's a
|
||||||
|
* constraint for nodesize to be aligned to sectorsize. This will need
|
||||||
|
* to change so we don't misuse data and metadata units like that.
|
||||||
|
*/
|
||||||
|
const u32 sectorsize = sctx->fs_info->sectorsize;
|
||||||
|
const int num_sectors = fs_info->nodesize >> fs_info->sectorsize_bits;
|
||||||
int i;
|
int i;
|
||||||
struct scrub_page *spage;
|
struct scrub_page *spage;
|
||||||
char *kaddr;
|
char *kaddr;
|
||||||
|
|
||||||
BUG_ON(sblock->page_count < 1);
|
BUG_ON(sblock->page_count < 1);
|
||||||
|
|
||||||
|
/* Each member in pagev is just one block, not a full page */
|
||||||
|
ASSERT(sblock->page_count == num_sectors);
|
||||||
|
|
||||||
spage = sblock->pagev[0];
|
spage = sblock->pagev[0];
|
||||||
kaddr = page_address(spage->page);
|
kaddr = page_address(spage->page);
|
||||||
h = (struct btrfs_header *)kaddr;
|
h = (struct btrfs_header *)kaddr;
|
||||||
memcpy(on_disk_csum, h->csum, sctx->csum_size);
|
memcpy(on_disk_csum, h->csum, sctx->fs_info->csum_size);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we don't use the getter functions here, as we
|
* we don't use the getter functions here, as we
|
||||||
|
@ -1848,15 +1859,15 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
|
||||||
shash->tfm = fs_info->csum_shash;
|
shash->tfm = fs_info->csum_shash;
|
||||||
crypto_shash_init(shash);
|
crypto_shash_init(shash);
|
||||||
crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
|
crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
|
||||||
PAGE_SIZE - BTRFS_CSUM_SIZE);
|
sectorsize - BTRFS_CSUM_SIZE);
|
||||||
|
|
||||||
for (i = 1; i < num_pages; i++) {
|
for (i = 1; i < num_sectors; i++) {
|
||||||
kaddr = page_address(sblock->pagev[i]->page);
|
kaddr = page_address(sblock->pagev[i]->page);
|
||||||
crypto_shash_update(shash, kaddr, PAGE_SIZE);
|
crypto_shash_update(shash, kaddr, sectorsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
crypto_shash_final(shash, calculated_csum);
|
crypto_shash_final(shash, calculated_csum);
|
||||||
if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
|
if (memcmp(calculated_csum, on_disk_csum, sctx->fs_info->csum_size))
|
||||||
sblock->checksum_error = 1;
|
sblock->checksum_error = 1;
|
||||||
|
|
||||||
return sblock->header_error || sblock->checksum_error;
|
return sblock->header_error || sblock->checksum_error;
|
||||||
|
@ -1893,7 +1904,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
|
||||||
crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE,
|
crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE,
|
||||||
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum);
|
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum);
|
||||||
|
|
||||||
if (memcmp(calculated_csum, s->csum, sctx->csum_size))
|
if (memcmp(calculated_csum, s->csum, sctx->fs_info->csum_size))
|
||||||
++fail_cor;
|
++fail_cor;
|
||||||
|
|
||||||
if (fail_cor + fail_gen) {
|
if (fail_cor + fail_gen) {
|
||||||
|
@ -2150,12 +2161,13 @@ bbio_out:
|
||||||
spin_unlock(&sctx->stat_lock);
|
spin_unlock(&sctx->stat_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
|
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u32 len,
|
||||||
u64 physical, struct btrfs_device *dev, u64 flags,
|
u64 physical, struct btrfs_device *dev, u64 flags,
|
||||||
u64 gen, int mirror_num, u8 *csum, int force,
|
u64 gen, int mirror_num, u8 *csum,
|
||||||
u64 physical_for_dev_replace)
|
u64 physical_for_dev_replace)
|
||||||
{
|
{
|
||||||
struct scrub_block *sblock;
|
struct scrub_block *sblock;
|
||||||
|
const u32 sectorsize = sctx->fs_info->sectorsize;
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
|
sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
|
||||||
|
@ -2174,7 +2186,12 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
|
||||||
|
|
||||||
for (index = 0; len > 0; index++) {
|
for (index = 0; len > 0; index++) {
|
||||||
struct scrub_page *spage;
|
struct scrub_page *spage;
|
||||||
u64 l = min_t(u64, len, PAGE_SIZE);
|
/*
|
||||||
|
* Here we will allocate one page for one sector to scrub.
|
||||||
|
* This is fine if PAGE_SIZE == sectorsize, but will cost
|
||||||
|
* more memory for PAGE_SIZE > sectorsize case.
|
||||||
|
*/
|
||||||
|
u32 l = min(sectorsize, len);
|
||||||
|
|
||||||
spage = kzalloc(sizeof(*spage), GFP_KERNEL);
|
spage = kzalloc(sizeof(*spage), GFP_KERNEL);
|
||||||
if (!spage) {
|
if (!spage) {
|
||||||
|
@ -2198,7 +2215,7 @@ leave_nomem:
|
||||||
spage->mirror_num = mirror_num;
|
spage->mirror_num = mirror_num;
|
||||||
if (csum) {
|
if (csum) {
|
||||||
spage->have_csum = 1;
|
spage->have_csum = 1;
|
||||||
memcpy(spage->csum, csum, sctx->csum_size);
|
memcpy(spage->csum, csum, sctx->fs_info->csum_size);
|
||||||
} else {
|
} else {
|
||||||
spage->have_csum = 0;
|
spage->have_csum = 0;
|
||||||
}
|
}
|
||||||
|
@ -2231,7 +2248,7 @@ leave_nomem:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (force)
|
if (flags & BTRFS_EXTENT_FLAG_SUPER)
|
||||||
scrub_submit(sctx);
|
scrub_submit(sctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2295,12 +2312,11 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
|
||||||
|
|
||||||
static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
|
static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
|
||||||
unsigned long *bitmap,
|
unsigned long *bitmap,
|
||||||
u64 start, u64 len)
|
u64 start, u32 len)
|
||||||
{
|
{
|
||||||
u64 offset;
|
u64 offset;
|
||||||
u64 nsectors64;
|
|
||||||
u32 nsectors;
|
u32 nsectors;
|
||||||
int sectorsize = sparity->sctx->fs_info->sectorsize;
|
u32 sectorsize_bits = sparity->sctx->fs_info->sectorsize_bits;
|
||||||
|
|
||||||
if (len >= sparity->stripe_len) {
|
if (len >= sparity->stripe_len) {
|
||||||
bitmap_set(bitmap, 0, sparity->nsectors);
|
bitmap_set(bitmap, 0, sparity->nsectors);
|
||||||
|
@ -2309,11 +2325,8 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
|
||||||
|
|
||||||
start -= sparity->logic_start;
|
start -= sparity->logic_start;
|
||||||
start = div64_u64_rem(start, sparity->stripe_len, &offset);
|
start = div64_u64_rem(start, sparity->stripe_len, &offset);
|
||||||
offset = div_u64(offset, sectorsize);
|
offset = offset >> sectorsize_bits;
|
||||||
nsectors64 = div_u64(len, sectorsize);
|
nsectors = len >> sectorsize_bits;
|
||||||
|
|
||||||
ASSERT(nsectors64 < UINT_MAX);
|
|
||||||
nsectors = (u32)nsectors64;
|
|
||||||
|
|
||||||
if (offset + nsectors <= sparity->nsectors) {
|
if (offset + nsectors <= sparity->nsectors) {
|
||||||
bitmap_set(bitmap, offset, nsectors);
|
bitmap_set(bitmap, offset, nsectors);
|
||||||
|
@ -2325,13 +2338,13 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
|
static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
|
||||||
u64 start, u64 len)
|
u64 start, u32 len)
|
||||||
{
|
{
|
||||||
__scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
|
__scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
|
static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
|
||||||
u64 start, u64 len)
|
u64 start, u32 len)
|
||||||
{
|
{
|
||||||
__scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
|
__scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
|
||||||
}
|
}
|
||||||
|
@ -2359,48 +2372,77 @@ static void scrub_block_complete(struct scrub_block *sblock)
|
||||||
u64 end = sblock->pagev[sblock->page_count - 1]->logical +
|
u64 end = sblock->pagev[sblock->page_count - 1]->logical +
|
||||||
PAGE_SIZE;
|
PAGE_SIZE;
|
||||||
|
|
||||||
|
ASSERT(end - start <= U32_MAX);
|
||||||
scrub_parity_mark_sectors_error(sblock->sparity,
|
scrub_parity_mark_sectors_error(sblock->sparity,
|
||||||
start, end - start);
|
start, end - start);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void drop_csum_range(struct scrub_ctx *sctx, struct btrfs_ordered_sum *sum)
|
||||||
|
{
|
||||||
|
sctx->stat.csum_discards += sum->len >> sctx->fs_info->sectorsize_bits;
|
||||||
|
list_del(&sum->list);
|
||||||
|
kfree(sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the desired csum for range [logical, logical + sectorsize), and store
|
||||||
|
* the csum into @csum.
|
||||||
|
*
|
||||||
|
* The search source is sctx->csum_list, which is a pre-populated list
|
||||||
|
* storing bytenr ordered csum ranges. We're reponsible to cleanup any range
|
||||||
|
* that is before @logical.
|
||||||
|
*
|
||||||
|
* Return 0 if there is no csum for the range.
|
||||||
|
* Return 1 if there is csum for the range and copied to @csum.
|
||||||
|
*/
|
||||||
static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
|
static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
|
||||||
{
|
{
|
||||||
struct btrfs_ordered_sum *sum = NULL;
|
bool found = false;
|
||||||
unsigned long index;
|
|
||||||
unsigned long num_sectors;
|
|
||||||
|
|
||||||
while (!list_empty(&sctx->csum_list)) {
|
while (!list_empty(&sctx->csum_list)) {
|
||||||
|
struct btrfs_ordered_sum *sum = NULL;
|
||||||
|
unsigned long index;
|
||||||
|
unsigned long num_sectors;
|
||||||
|
|
||||||
sum = list_first_entry(&sctx->csum_list,
|
sum = list_first_entry(&sctx->csum_list,
|
||||||
struct btrfs_ordered_sum, list);
|
struct btrfs_ordered_sum, list);
|
||||||
|
/* The current csum range is beyond our range, no csum found */
|
||||||
if (sum->bytenr > logical)
|
if (sum->bytenr > logical)
|
||||||
return 0;
|
|
||||||
if (sum->bytenr + sum->len > logical)
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
++sctx->stat.csum_discards;
|
/*
|
||||||
list_del(&sum->list);
|
* The current sum is before our bytenr, since scrub is always
|
||||||
kfree(sum);
|
* done in bytenr order, the csum will never be used anymore,
|
||||||
sum = NULL;
|
* clean it up so that later calls won't bother with the range,
|
||||||
|
* and continue search the next range.
|
||||||
|
*/
|
||||||
|
if (sum->bytenr + sum->len <= logical) {
|
||||||
|
drop_csum_range(sctx, sum);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now the csum range covers our bytenr, copy the csum */
|
||||||
|
found = true;
|
||||||
|
index = (logical - sum->bytenr) >> sctx->fs_info->sectorsize_bits;
|
||||||
|
num_sectors = sum->len >> sctx->fs_info->sectorsize_bits;
|
||||||
|
|
||||||
|
memcpy(csum, sum->sums + index * sctx->fs_info->csum_size,
|
||||||
|
sctx->fs_info->csum_size);
|
||||||
|
|
||||||
|
/* Cleanup the range if we're at the end of the csum range */
|
||||||
|
if (index == num_sectors - 1)
|
||||||
|
drop_csum_range(sctx, sum);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (!sum)
|
if (!found)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
|
|
||||||
ASSERT(index < UINT_MAX);
|
|
||||||
|
|
||||||
num_sectors = sum->len / sctx->fs_info->sectorsize;
|
|
||||||
memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
|
|
||||||
if (index == num_sectors - 1) {
|
|
||||||
list_del(&sum->list);
|
|
||||||
kfree(sum);
|
|
||||||
}
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* scrub extent tries to collect up to 64 kB for each bio */
|
/* scrub extent tries to collect up to 64 kB for each bio */
|
||||||
static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
|
static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
|
||||||
u64 logical, u64 len,
|
u64 logical, u32 len,
|
||||||
u64 physical, struct btrfs_device *dev, u64 flags,
|
u64 physical, struct btrfs_device *dev, u64 flags,
|
||||||
u64 gen, int mirror_num, u64 physical_for_dev_replace)
|
u64 gen, int mirror_num, u64 physical_for_dev_replace)
|
||||||
{
|
{
|
||||||
|
@ -2432,7 +2474,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
|
||||||
}
|
}
|
||||||
|
|
||||||
while (len) {
|
while (len) {
|
||||||
u64 l = min_t(u64, len, blocksize);
|
u32 l = min(len, blocksize);
|
||||||
int have_csum = 0;
|
int have_csum = 0;
|
||||||
|
|
||||||
if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
||||||
|
@ -2442,7 +2484,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
|
||||||
++sctx->stat.no_csum;
|
++sctx->stat.no_csum;
|
||||||
}
|
}
|
||||||
ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
|
ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
|
||||||
mirror_num, have_csum ? csum : NULL, 0,
|
mirror_num, have_csum ? csum : NULL,
|
||||||
physical_for_dev_replace);
|
physical_for_dev_replace);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -2455,14 +2497,17 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int scrub_pages_for_parity(struct scrub_parity *sparity,
|
static int scrub_pages_for_parity(struct scrub_parity *sparity,
|
||||||
u64 logical, u64 len,
|
u64 logical, u32 len,
|
||||||
u64 physical, struct btrfs_device *dev,
|
u64 physical, struct btrfs_device *dev,
|
||||||
u64 flags, u64 gen, int mirror_num, u8 *csum)
|
u64 flags, u64 gen, int mirror_num, u8 *csum)
|
||||||
{
|
{
|
||||||
struct scrub_ctx *sctx = sparity->sctx;
|
struct scrub_ctx *sctx = sparity->sctx;
|
||||||
struct scrub_block *sblock;
|
struct scrub_block *sblock;
|
||||||
|
const u32 sectorsize = sctx->fs_info->sectorsize;
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
|
ASSERT(IS_ALIGNED(len, sectorsize));
|
||||||
|
|
||||||
sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
|
sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
|
||||||
if (!sblock) {
|
if (!sblock) {
|
||||||
spin_lock(&sctx->stat_lock);
|
spin_lock(&sctx->stat_lock);
|
||||||
|
@ -2481,7 +2526,6 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
|
||||||
|
|
||||||
for (index = 0; len > 0; index++) {
|
for (index = 0; len > 0; index++) {
|
||||||
struct scrub_page *spage;
|
struct scrub_page *spage;
|
||||||
u64 l = min_t(u64, len, PAGE_SIZE);
|
|
||||||
|
|
||||||
spage = kzalloc(sizeof(*spage), GFP_KERNEL);
|
spage = kzalloc(sizeof(*spage), GFP_KERNEL);
|
||||||
if (!spage) {
|
if (!spage) {
|
||||||
|
@ -2508,7 +2552,7 @@ leave_nomem:
|
||||||
spage->mirror_num = mirror_num;
|
spage->mirror_num = mirror_num;
|
||||||
if (csum) {
|
if (csum) {
|
||||||
spage->have_csum = 1;
|
spage->have_csum = 1;
|
||||||
memcpy(spage->csum, csum, sctx->csum_size);
|
memcpy(spage->csum, csum, sctx->fs_info->csum_size);
|
||||||
} else {
|
} else {
|
||||||
spage->have_csum = 0;
|
spage->have_csum = 0;
|
||||||
}
|
}
|
||||||
|
@ -2516,9 +2560,12 @@ leave_nomem:
|
||||||
spage->page = alloc_page(GFP_KERNEL);
|
spage->page = alloc_page(GFP_KERNEL);
|
||||||
if (!spage->page)
|
if (!spage->page)
|
||||||
goto leave_nomem;
|
goto leave_nomem;
|
||||||
len -= l;
|
|
||||||
logical += l;
|
|
||||||
physical += l;
|
/* Iterate over the stripe range in sectorsize steps */
|
||||||
|
len -= sectorsize;
|
||||||
|
logical += sectorsize;
|
||||||
|
physical += sectorsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON(sblock->page_count == 0);
|
WARN_ON(sblock->page_count == 0);
|
||||||
|
@ -2539,7 +2586,7 @@ leave_nomem:
|
||||||
}
|
}
|
||||||
|
|
||||||
static int scrub_extent_for_parity(struct scrub_parity *sparity,
|
static int scrub_extent_for_parity(struct scrub_parity *sparity,
|
||||||
u64 logical, u64 len,
|
u64 logical, u32 len,
|
||||||
u64 physical, struct btrfs_device *dev,
|
u64 physical, struct btrfs_device *dev,
|
||||||
u64 flags, u64 gen, int mirror_num)
|
u64 flags, u64 gen, int mirror_num)
|
||||||
{
|
{
|
||||||
|
@ -2563,7 +2610,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
|
||||||
}
|
}
|
||||||
|
|
||||||
while (len) {
|
while (len) {
|
||||||
u64 l = min_t(u64, len, blocksize);
|
u32 l = min(len, blocksize);
|
||||||
int have_csum = 0;
|
int have_csum = 0;
|
||||||
|
|
||||||
if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
||||||
|
@ -2767,7 +2814,8 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
|
||||||
u64 generation;
|
u64 generation;
|
||||||
u64 extent_logical;
|
u64 extent_logical;
|
||||||
u64 extent_physical;
|
u64 extent_physical;
|
||||||
u64 extent_len;
|
/* Check the comment in scrub_stripe() for why u32 is enough here */
|
||||||
|
u32 extent_len;
|
||||||
u64 mapped_length;
|
u64 mapped_length;
|
||||||
struct btrfs_device *extent_dev;
|
struct btrfs_device *extent_dev;
|
||||||
struct scrub_parity *sparity;
|
struct scrub_parity *sparity;
|
||||||
|
@ -2776,7 +2824,8 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
|
||||||
int extent_mirror_num;
|
int extent_mirror_num;
|
||||||
int stop_loop = 0;
|
int stop_loop = 0;
|
||||||
|
|
||||||
nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
|
ASSERT(map->stripe_len <= U32_MAX);
|
||||||
|
nsectors = map->stripe_len >> fs_info->sectorsize_bits;
|
||||||
bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
|
bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
|
||||||
sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
|
sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
|
||||||
GFP_NOFS);
|
GFP_NOFS);
|
||||||
|
@ -2787,6 +2836,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASSERT(map->stripe_len <= U32_MAX);
|
||||||
sparity->stripe_len = map->stripe_len;
|
sparity->stripe_len = map->stripe_len;
|
||||||
sparity->nsectors = nsectors;
|
sparity->nsectors = nsectors;
|
||||||
sparity->sctx = sctx;
|
sparity->sctx = sctx;
|
||||||
|
@ -2881,6 +2931,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
|
||||||
}
|
}
|
||||||
again:
|
again:
|
||||||
extent_logical = key.objectid;
|
extent_logical = key.objectid;
|
||||||
|
ASSERT(bytes <= U32_MAX);
|
||||||
extent_len = bytes;
|
extent_len = bytes;
|
||||||
|
|
||||||
if (extent_logical < logic_start) {
|
if (extent_logical < logic_start) {
|
||||||
|
@ -2959,9 +3010,11 @@ next:
|
||||||
logic_start += map->stripe_len;
|
logic_start += map->stripe_len;
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
if (ret < 0)
|
if (ret < 0) {
|
||||||
|
ASSERT(logic_end - logic_start <= U32_MAX);
|
||||||
scrub_parity_mark_sectors_error(sparity, logic_start,
|
scrub_parity_mark_sectors_error(sparity, logic_start,
|
||||||
logic_end - logic_start);
|
logic_end - logic_start);
|
||||||
|
}
|
||||||
scrub_parity_put(sparity);
|
scrub_parity_put(sparity);
|
||||||
scrub_submit(sctx);
|
scrub_submit(sctx);
|
||||||
mutex_lock(&sctx->wr_lock);
|
mutex_lock(&sctx->wr_lock);
|
||||||
|
@ -3003,7 +3056,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||||
u64 offset;
|
u64 offset;
|
||||||
u64 extent_logical;
|
u64 extent_logical;
|
||||||
u64 extent_physical;
|
u64 extent_physical;
|
||||||
u64 extent_len;
|
/*
|
||||||
|
* Unlike chunk length, extent length should never go beyond
|
||||||
|
* BTRFS_MAX_EXTENT_SIZE, thus u32 is enough here.
|
||||||
|
*/
|
||||||
|
u32 extent_len;
|
||||||
u64 stripe_logical;
|
u64 stripe_logical;
|
||||||
u64 stripe_end;
|
u64 stripe_end;
|
||||||
struct btrfs_device *extent_dev;
|
struct btrfs_device *extent_dev;
|
||||||
|
@ -3084,17 +3141,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||||
key_end.offset = (u64)-1;
|
key_end.offset = (u64)-1;
|
||||||
reada1 = btrfs_reada_add(root, &key, &key_end);
|
reada1 = btrfs_reada_add(root, &key, &key_end);
|
||||||
|
|
||||||
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
|
||||||
key.type = BTRFS_EXTENT_CSUM_KEY;
|
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||||
key.offset = logical;
|
key.type = BTRFS_EXTENT_CSUM_KEY;
|
||||||
key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
key.offset = logical;
|
||||||
key_end.type = BTRFS_EXTENT_CSUM_KEY;
|
key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||||
key_end.offset = logic_end;
|
key_end.type = BTRFS_EXTENT_CSUM_KEY;
|
||||||
reada2 = btrfs_reada_add(csum_root, &key, &key_end);
|
key_end.offset = logic_end;
|
||||||
|
reada2 = btrfs_reada_add(csum_root, &key, &key_end);
|
||||||
|
} else {
|
||||||
|
reada2 = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if (!IS_ERR(reada1))
|
if (!IS_ERR(reada1))
|
||||||
btrfs_reada_wait(reada1);
|
btrfs_reada_wait(reada1);
|
||||||
if (!IS_ERR(reada2))
|
if (!IS_ERR_OR_NULL(reada2))
|
||||||
btrfs_reada_wait(reada2);
|
btrfs_reada_wait(reada2);
|
||||||
|
|
||||||
|
|
||||||
|
@ -3248,6 +3309,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||||
|
|
||||||
again:
|
again:
|
||||||
extent_logical = key.objectid;
|
extent_logical = key.objectid;
|
||||||
|
ASSERT(bytes <= U32_MAX);
|
||||||
extent_len = bytes;
|
extent_len = bytes;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3704,10 +3766,12 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
|
||||||
if (bytenr + BTRFS_SUPER_INFO_SIZE >
|
if (bytenr + BTRFS_SUPER_INFO_SIZE >
|
||||||
scrub_dev->commit_total_bytes)
|
scrub_dev->commit_total_bytes)
|
||||||
break;
|
break;
|
||||||
|
if (!btrfs_check_super_location(scrub_dev, bytenr))
|
||||||
|
continue;
|
||||||
|
|
||||||
ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
|
ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
|
||||||
scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
|
scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
|
||||||
NULL, 1, bytenr);
|
NULL, bytenr);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -3821,14 +3885,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fs_info->sectorsize != PAGE_SIZE) {
|
|
||||||
/* not supported for data w/o checksums */
|
|
||||||
btrfs_err_rl(fs_info,
|
|
||||||
"scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
|
|
||||||
fs_info->sectorsize, PAGE_SIZE);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fs_info->nodesize >
|
if (fs_info->nodesize >
|
||||||
PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
|
PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
|
||||||
fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
|
fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
|
||||||
|
@ -3855,7 +3911,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||||
goto out_free_ctx;
|
goto out_free_ctx;
|
||||||
|
|
||||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||||
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
|
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
|
||||||
if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
|
if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
|
||||||
!is_dev_replace)) {
|
!is_dev_replace)) {
|
||||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||||
|
@ -4032,7 +4088,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
|
||||||
struct scrub_ctx *sctx = NULL;
|
struct scrub_ctx *sctx = NULL;
|
||||||
|
|
||||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||||
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
|
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
|
||||||
if (dev)
|
if (dev)
|
||||||
sctx = dev->scrub_ctx;
|
sctx = dev->scrub_ctx;
|
||||||
if (sctx)
|
if (sctx)
|
||||||
|
@ -4043,7 +4099,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
|
static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
|
||||||
u64 extent_logical, u64 extent_len,
|
u64 extent_logical, u32 extent_len,
|
||||||
u64 *extent_physical,
|
u64 *extent_physical,
|
||||||
struct btrfs_device **extent_dev,
|
struct btrfs_device **extent_dev,
|
||||||
int *extent_mirror_num)
|
int *extent_mirror_num)
|
||||||
|
|
|
@ -2410,7 +2410,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
|
||||||
sctx->send_root->root_item.uuid);
|
sctx->send_root->root_item.uuid);
|
||||||
|
|
||||||
TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
|
TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
|
||||||
le64_to_cpu(sctx->send_root->root_item.ctransid));
|
btrfs_root_ctransid(&sctx->send_root->root_item));
|
||||||
if (parent_root) {
|
if (parent_root) {
|
||||||
if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
|
if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
|
||||||
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
|
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
|
||||||
|
@ -2419,7 +2419,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
|
||||||
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
|
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
|
||||||
parent_root->root_item.uuid);
|
parent_root->root_item.uuid);
|
||||||
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
|
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
|
||||||
le64_to_cpu(sctx->parent_root->root_item.ctransid));
|
btrfs_root_ctransid(&sctx->parent_root->root_item));
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = send_cmd(sctx);
|
ret = send_cmd(sctx);
|
||||||
|
@ -5101,7 +5101,7 @@ static int send_clone(struct send_ctx *sctx,
|
||||||
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
|
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
|
||||||
clone_root->root->root_item.uuid);
|
clone_root->root->root_item.uuid);
|
||||||
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
|
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
|
||||||
le64_to_cpu(clone_root->root->root_item.ctransid));
|
btrfs_root_ctransid(&clone_root->root->root_item));
|
||||||
TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
|
TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
|
||||||
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
|
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
|
||||||
clone_root->offset);
|
clone_root->offset);
|
||||||
|
|
|
@ -57,8 +57,9 @@ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
|
||||||
const void *ptr, unsigned long off) \
|
const void *ptr, unsigned long off) \
|
||||||
{ \
|
{ \
|
||||||
const unsigned long member_offset = (unsigned long)ptr + off; \
|
const unsigned long member_offset = (unsigned long)ptr + off; \
|
||||||
const unsigned long idx = member_offset >> PAGE_SHIFT; \
|
const unsigned long idx = get_eb_page_index(member_offset); \
|
||||||
const unsigned long oip = offset_in_page(member_offset); \
|
const unsigned long oip = get_eb_offset_in_page(token->eb, \
|
||||||
|
member_offset); \
|
||||||
const int size = sizeof(u##bits); \
|
const int size = sizeof(u##bits); \
|
||||||
u8 lebytes[sizeof(u##bits)]; \
|
u8 lebytes[sizeof(u##bits)]; \
|
||||||
const int part = PAGE_SIZE - oip; \
|
const int part = PAGE_SIZE - oip; \
|
||||||
|
@ -85,8 +86,8 @@ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
|
||||||
const void *ptr, unsigned long off) \
|
const void *ptr, unsigned long off) \
|
||||||
{ \
|
{ \
|
||||||
const unsigned long member_offset = (unsigned long)ptr + off; \
|
const unsigned long member_offset = (unsigned long)ptr + off; \
|
||||||
const unsigned long oip = offset_in_page(member_offset); \
|
const unsigned long oip = get_eb_offset_in_page(eb, member_offset); \
|
||||||
const unsigned long idx = member_offset >> PAGE_SHIFT; \
|
const unsigned long idx = get_eb_page_index(member_offset); \
|
||||||
char *kaddr = page_address(eb->pages[idx]); \
|
char *kaddr = page_address(eb->pages[idx]); \
|
||||||
const int size = sizeof(u##bits); \
|
const int size = sizeof(u##bits); \
|
||||||
const int part = PAGE_SIZE - oip; \
|
const int part = PAGE_SIZE - oip; \
|
||||||
|
@ -106,8 +107,9 @@ void btrfs_set_token_##bits(struct btrfs_map_token *token, \
|
||||||
u##bits val) \
|
u##bits val) \
|
||||||
{ \
|
{ \
|
||||||
const unsigned long member_offset = (unsigned long)ptr + off; \
|
const unsigned long member_offset = (unsigned long)ptr + off; \
|
||||||
const unsigned long idx = member_offset >> PAGE_SHIFT; \
|
const unsigned long idx = get_eb_page_index(member_offset); \
|
||||||
const unsigned long oip = offset_in_page(member_offset); \
|
const unsigned long oip = get_eb_offset_in_page(token->eb, \
|
||||||
|
member_offset); \
|
||||||
const int size = sizeof(u##bits); \
|
const int size = sizeof(u##bits); \
|
||||||
u8 lebytes[sizeof(u##bits)]; \
|
u8 lebytes[sizeof(u##bits)]; \
|
||||||
const int part = PAGE_SIZE - oip; \
|
const int part = PAGE_SIZE - oip; \
|
||||||
|
@ -136,8 +138,8 @@ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
|
||||||
unsigned long off, u##bits val) \
|
unsigned long off, u##bits val) \
|
||||||
{ \
|
{ \
|
||||||
const unsigned long member_offset = (unsigned long)ptr + off; \
|
const unsigned long member_offset = (unsigned long)ptr + off; \
|
||||||
const unsigned long oip = offset_in_page(member_offset); \
|
const unsigned long oip = get_eb_offset_in_page(eb, member_offset); \
|
||||||
const unsigned long idx = member_offset >> PAGE_SHIFT; \
|
const unsigned long idx = get_eb_page_index(member_offset); \
|
||||||
char *kaddr = page_address(eb->pages[idx]); \
|
char *kaddr = page_address(eb->pages[idx]); \
|
||||||
const int size = sizeof(u##bits); \
|
const int size = sizeof(u##bits); \
|
||||||
const int part = PAGE_SIZE - oip; \
|
const int part = PAGE_SIZE - oip; \
|
||||||
|
|
179
fs/btrfs/super.c
179
fs/btrfs/super.c
|
@ -44,6 +44,7 @@
|
||||||
#include "backref.h"
|
#include "backref.h"
|
||||||
#include "space-info.h"
|
#include "space-info.h"
|
||||||
#include "sysfs.h"
|
#include "sysfs.h"
|
||||||
|
#include "zoned.h"
|
||||||
#include "tests/btrfs-tests.h"
|
#include "tests/btrfs-tests.h"
|
||||||
#include "block-group.h"
|
#include "block-group.h"
|
||||||
#include "discard.h"
|
#include "discard.h"
|
||||||
|
@ -240,9 +241,13 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
|
||||||
vaf.fmt = fmt;
|
vaf.fmt = fmt;
|
||||||
vaf.va = &args;
|
vaf.va = &args;
|
||||||
|
|
||||||
if (__ratelimit(ratelimit))
|
if (__ratelimit(ratelimit)) {
|
||||||
printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
|
if (fs_info)
|
||||||
fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
|
printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
|
||||||
|
fs_info->sb->s_id, &vaf);
|
||||||
|
else
|
||||||
|
printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
|
||||||
|
}
|
||||||
|
|
||||||
va_end(args);
|
va_end(args);
|
||||||
}
|
}
|
||||||
|
@ -333,7 +338,6 @@ enum {
|
||||||
Opt_device,
|
Opt_device,
|
||||||
Opt_fatal_errors,
|
Opt_fatal_errors,
|
||||||
Opt_flushoncommit, Opt_noflushoncommit,
|
Opt_flushoncommit, Opt_noflushoncommit,
|
||||||
Opt_inode_cache, Opt_noinode_cache,
|
|
||||||
Opt_max_inline,
|
Opt_max_inline,
|
||||||
Opt_barrier, Opt_nobarrier,
|
Opt_barrier, Opt_nobarrier,
|
||||||
Opt_datacow, Opt_nodatacow,
|
Opt_datacow, Opt_nodatacow,
|
||||||
|
@ -360,9 +364,13 @@ enum {
|
||||||
Opt_rescue,
|
Opt_rescue,
|
||||||
Opt_usebackuproot,
|
Opt_usebackuproot,
|
||||||
Opt_nologreplay,
|
Opt_nologreplay,
|
||||||
|
Opt_ignorebadroots,
|
||||||
|
Opt_ignoredatacsums,
|
||||||
|
Opt_rescue_all,
|
||||||
|
|
||||||
/* Deprecated options */
|
/* Deprecated options */
|
||||||
Opt_recovery,
|
Opt_recovery,
|
||||||
|
Opt_inode_cache, Opt_noinode_cache,
|
||||||
|
|
||||||
/* Debugging options */
|
/* Debugging options */
|
||||||
Opt_check_integrity,
|
Opt_check_integrity,
|
||||||
|
@ -455,9 +463,25 @@ static const match_table_t tokens = {
|
||||||
static const match_table_t rescue_tokens = {
|
static const match_table_t rescue_tokens = {
|
||||||
{Opt_usebackuproot, "usebackuproot"},
|
{Opt_usebackuproot, "usebackuproot"},
|
||||||
{Opt_nologreplay, "nologreplay"},
|
{Opt_nologreplay, "nologreplay"},
|
||||||
|
{Opt_ignorebadroots, "ignorebadroots"},
|
||||||
|
{Opt_ignorebadroots, "ibadroots"},
|
||||||
|
{Opt_ignoredatacsums, "ignoredatacsums"},
|
||||||
|
{Opt_ignoredatacsums, "idatacsums"},
|
||||||
|
{Opt_rescue_all, "all"},
|
||||||
{Opt_err, NULL},
|
{Opt_err, NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool check_ro_option(struct btrfs_fs_info *fs_info, unsigned long opt,
|
||||||
|
const char *opt_name)
|
||||||
|
{
|
||||||
|
if (fs_info->mount_opt & opt) {
|
||||||
|
btrfs_err(fs_info, "%s must be used with ro mount option",
|
||||||
|
opt_name);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
|
static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
|
||||||
{
|
{
|
||||||
char *opts;
|
char *opts;
|
||||||
|
@ -487,6 +511,23 @@ static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
|
||||||
btrfs_set_and_info(info, NOLOGREPLAY,
|
btrfs_set_and_info(info, NOLOGREPLAY,
|
||||||
"disabling log replay at mount time");
|
"disabling log replay at mount time");
|
||||||
break;
|
break;
|
||||||
|
case Opt_ignorebadroots:
|
||||||
|
btrfs_set_and_info(info, IGNOREBADROOTS,
|
||||||
|
"ignoring bad roots");
|
||||||
|
break;
|
||||||
|
case Opt_ignoredatacsums:
|
||||||
|
btrfs_set_and_info(info, IGNOREDATACSUMS,
|
||||||
|
"ignoring data csums");
|
||||||
|
break;
|
||||||
|
case Opt_rescue_all:
|
||||||
|
btrfs_info(info, "enabling all of the rescue options");
|
||||||
|
btrfs_set_and_info(info, IGNOREDATACSUMS,
|
||||||
|
"ignoring data csums");
|
||||||
|
btrfs_set_and_info(info, IGNOREBADROOTS,
|
||||||
|
"ignoring bad roots");
|
||||||
|
btrfs_set_and_info(info, NOLOGREPLAY,
|
||||||
|
"disabling log replay at mount time");
|
||||||
|
break;
|
||||||
case Opt_err:
|
case Opt_err:
|
||||||
btrfs_info(info, "unrecognized rescue option '%s'", p);
|
btrfs_info(info, "unrecognized rescue option '%s'", p);
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
|
@ -511,7 +552,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||||
{
|
{
|
||||||
substring_t args[MAX_OPT_ARGS];
|
substring_t args[MAX_OPT_ARGS];
|
||||||
char *p, *num;
|
char *p, *num;
|
||||||
u64 cache_gen;
|
|
||||||
int intarg;
|
int intarg;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
char *compress_type;
|
char *compress_type;
|
||||||
|
@ -521,11 +561,17 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||||
bool saved_compress_force;
|
bool saved_compress_force;
|
||||||
int no_compress = 0;
|
int no_compress = 0;
|
||||||
|
|
||||||
cache_gen = btrfs_super_cache_generation(info->super_copy);
|
|
||||||
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
|
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
|
||||||
btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
|
btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
|
||||||
else if (cache_gen)
|
else if (btrfs_free_space_cache_v1_active(info)) {
|
||||||
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
|
if (btrfs_is_zoned(info)) {
|
||||||
|
btrfs_info(info,
|
||||||
|
"zoned: clearing existing space cache");
|
||||||
|
btrfs_set_super_cache_generation(info->super_copy, 0);
|
||||||
|
} else {
|
||||||
|
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Even the options are empty, we still need to do extra check
|
* Even the options are empty, we still need to do extra check
|
||||||
|
@ -832,14 +878,9 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Opt_inode_cache:
|
case Opt_inode_cache:
|
||||||
btrfs_warn(info,
|
|
||||||
"the 'inode_cache' option is deprecated and will have no effect from 5.11");
|
|
||||||
btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
|
|
||||||
"enabling inode map caching");
|
|
||||||
break;
|
|
||||||
case Opt_noinode_cache:
|
case Opt_noinode_cache:
|
||||||
btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
|
btrfs_warn(info,
|
||||||
"disabling inode map caching");
|
"the 'inode_cache' option is deprecated and has no effect since 5.11");
|
||||||
break;
|
break;
|
||||||
case Opt_clear_cache:
|
case Opt_clear_cache:
|
||||||
btrfs_set_and_info(info, CLEAR_CACHE,
|
btrfs_set_and_info(info, CLEAR_CACHE,
|
||||||
|
@ -968,14 +1009,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
check:
|
check:
|
||||||
/*
|
/* We're read-only, don't have to check. */
|
||||||
* Extra check for current option against current flag
|
if (new_flags & SB_RDONLY)
|
||||||
*/
|
goto out;
|
||||||
if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & SB_RDONLY)) {
|
|
||||||
btrfs_err(info,
|
if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") ||
|
||||||
"nologreplay must be used with ro mount option");
|
check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") ||
|
||||||
|
check_ro_option(info, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums"))
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
}
|
|
||||||
out:
|
out:
|
||||||
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
|
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
|
||||||
!btrfs_test_opt(info, FREE_SPACE_TREE) &&
|
!btrfs_test_opt(info, FREE_SPACE_TREE) &&
|
||||||
|
@ -984,6 +1025,8 @@ out:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
if (!ret)
|
||||||
|
ret = btrfs_check_mountopts_zoned(info);
|
||||||
if (!ret && btrfs_test_opt(info, SPACE_CACHE))
|
if (!ret && btrfs_test_opt(info, SPACE_CACHE))
|
||||||
btrfs_info(info, "disk space caching is enabled");
|
btrfs_info(info, "disk space caching is enabled");
|
||||||
if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
|
if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
|
||||||
|
@ -1127,7 +1170,6 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
name = kmalloc(PATH_MAX, GFP_KERNEL);
|
name = kmalloc(PATH_MAX, GFP_KERNEL);
|
||||||
if (!name) {
|
if (!name) {
|
||||||
|
@ -1256,7 +1298,6 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find the "default" dir item which points to the root item that we
|
* Find the "default" dir item which points to the root item that we
|
||||||
|
@ -1383,11 +1424,18 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
|
||||||
return btrfs_commit_transaction(trans);
|
return btrfs_commit_transaction(trans);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void print_rescue_option(struct seq_file *seq, const char *s, bool *printed)
|
||||||
|
{
|
||||||
|
seq_printf(seq, "%s%s", (*printed) ? ":" : ",rescue=", s);
|
||||||
|
*printed = true;
|
||||||
|
}
|
||||||
|
|
||||||
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
|
struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
|
||||||
const char *compress_type;
|
const char *compress_type;
|
||||||
const char *subvol_name;
|
const char *subvol_name;
|
||||||
|
bool printed = false;
|
||||||
|
|
||||||
if (btrfs_test_opt(info, DEGRADED))
|
if (btrfs_test_opt(info, DEGRADED))
|
||||||
seq_puts(seq, ",degraded");
|
seq_puts(seq, ",degraded");
|
||||||
|
@ -1420,7 +1468,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
||||||
if (btrfs_test_opt(info, NOTREELOG))
|
if (btrfs_test_opt(info, NOTREELOG))
|
||||||
seq_puts(seq, ",notreelog");
|
seq_puts(seq, ",notreelog");
|
||||||
if (btrfs_test_opt(info, NOLOGREPLAY))
|
if (btrfs_test_opt(info, NOLOGREPLAY))
|
||||||
seq_puts(seq, ",rescue=nologreplay");
|
print_rescue_option(seq, "nologreplay", &printed);
|
||||||
|
if (btrfs_test_opt(info, USEBACKUPROOT))
|
||||||
|
print_rescue_option(seq, "usebackuproot", &printed);
|
||||||
|
if (btrfs_test_opt(info, IGNOREBADROOTS))
|
||||||
|
print_rescue_option(seq, "ignorebadroots", &printed);
|
||||||
|
if (btrfs_test_opt(info, IGNOREDATACSUMS))
|
||||||
|
print_rescue_option(seq, "ignoredatacsums", &printed);
|
||||||
if (btrfs_test_opt(info, FLUSHONCOMMIT))
|
if (btrfs_test_opt(info, FLUSHONCOMMIT))
|
||||||
seq_puts(seq, ",flushoncommit");
|
seq_puts(seq, ",flushoncommit");
|
||||||
if (btrfs_test_opt(info, DISCARD_SYNC))
|
if (btrfs_test_opt(info, DISCARD_SYNC))
|
||||||
|
@ -1429,9 +1483,9 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
||||||
seq_puts(seq, ",discard=async");
|
seq_puts(seq, ",discard=async");
|
||||||
if (!(info->sb->s_flags & SB_POSIXACL))
|
if (!(info->sb->s_flags & SB_POSIXACL))
|
||||||
seq_puts(seq, ",noacl");
|
seq_puts(seq, ",noacl");
|
||||||
if (btrfs_test_opt(info, SPACE_CACHE))
|
if (btrfs_free_space_cache_v1_active(info))
|
||||||
seq_puts(seq, ",space_cache");
|
seq_puts(seq, ",space_cache");
|
||||||
else if (btrfs_test_opt(info, FREE_SPACE_TREE))
|
else if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
|
||||||
seq_puts(seq, ",space_cache=v2");
|
seq_puts(seq, ",space_cache=v2");
|
||||||
else
|
else
|
||||||
seq_puts(seq, ",nospace_cache");
|
seq_puts(seq, ",nospace_cache");
|
||||||
|
@ -1445,8 +1499,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
||||||
seq_puts(seq, ",enospc_debug");
|
seq_puts(seq, ",enospc_debug");
|
||||||
if (btrfs_test_opt(info, AUTO_DEFRAG))
|
if (btrfs_test_opt(info, AUTO_DEFRAG))
|
||||||
seq_puts(seq, ",autodefrag");
|
seq_puts(seq, ",autodefrag");
|
||||||
if (btrfs_test_opt(info, INODE_MAP_CACHE))
|
|
||||||
seq_puts(seq, ",inode_cache");
|
|
||||||
if (btrfs_test_opt(info, SKIP_BALANCE))
|
if (btrfs_test_opt(info, SKIP_BALANCE))
|
||||||
seq_puts(seq, ",skip_balance");
|
seq_puts(seq, ",skip_balance");
|
||||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||||
|
@ -1810,6 +1862,8 @@ static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
|
||||||
static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
|
static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
|
||||||
unsigned long old_opts)
|
unsigned long old_opts)
|
||||||
{
|
{
|
||||||
|
const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to cleanup all defragable inodes if the autodefragment is
|
* We need to cleanup all defragable inodes if the autodefragment is
|
||||||
* close or the filesystem is read only.
|
* close or the filesystem is read only.
|
||||||
|
@ -1826,12 +1880,15 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
|
||||||
else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
|
else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
|
||||||
!btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
!btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||||
btrfs_discard_cleanup(fs_info);
|
btrfs_discard_cleanup(fs_info);
|
||||||
|
|
||||||
|
/* If we toggled space cache */
|
||||||
|
if (cache_opt != btrfs_free_space_cache_v1_active(fs_info))
|
||||||
|
btrfs_set_free_space_cache_v1_active(fs_info, cache_opt);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
||||||
struct btrfs_root *root = fs_info->tree_root;
|
|
||||||
unsigned old_flags = sb->s_flags;
|
unsigned old_flags = sb->s_flags;
|
||||||
unsigned long old_opts = fs_info->mount_opt;
|
unsigned long old_opts = fs_info->mount_opt;
|
||||||
unsigned long old_compress_type = fs_info->compress_type;
|
unsigned long old_compress_type = fs_info->compress_type;
|
||||||
|
@ -1862,6 +1919,22 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||||
btrfs_resize_thread_pool(fs_info,
|
btrfs_resize_thread_pool(fs_info,
|
||||||
fs_info->thread_pool_size, old_thread_pool_size);
|
fs_info->thread_pool_size, old_thread_pool_size);
|
||||||
|
|
||||||
|
if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
|
||||||
|
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
|
||||||
|
(!sb_rdonly(sb) || (*flags & SB_RDONLY))) {
|
||||||
|
btrfs_warn(fs_info,
|
||||||
|
"remount supports changing free space tree only from ro to rw");
|
||||||
|
/* Make sure free space cache options match the state on disk */
|
||||||
|
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
|
||||||
|
btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE);
|
||||||
|
btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
|
||||||
|
}
|
||||||
|
if (btrfs_free_space_cache_v1_active(fs_info)) {
|
||||||
|
btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE);
|
||||||
|
btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
|
if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@ -1924,39 +1997,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||||
goto restore;
|
goto restore;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_cleanup_fs_roots(fs_info);
|
/*
|
||||||
|
* NOTE: when remounting with a change that does writes, don't
|
||||||
|
* put it anywhere above this point, as we are not sure to be
|
||||||
|
* safe to write until we pass the above checks.
|
||||||
|
*/
|
||||||
|
ret = btrfs_start_pre_rw_mount(fs_info);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto restore;
|
goto restore;
|
||||||
|
|
||||||
/* recover relocation */
|
|
||||||
mutex_lock(&fs_info->cleaner_mutex);
|
|
||||||
ret = btrfs_recover_relocation(root);
|
|
||||||
mutex_unlock(&fs_info->cleaner_mutex);
|
|
||||||
if (ret)
|
|
||||||
goto restore;
|
|
||||||
|
|
||||||
ret = btrfs_resume_balance_async(fs_info);
|
|
||||||
if (ret)
|
|
||||||
goto restore;
|
|
||||||
|
|
||||||
ret = btrfs_resume_dev_replace_async(fs_info);
|
|
||||||
if (ret) {
|
|
||||||
btrfs_warn(fs_info, "failed to resume dev_replace");
|
|
||||||
goto restore;
|
|
||||||
}
|
|
||||||
|
|
||||||
btrfs_qgroup_rescan_resume(fs_info);
|
|
||||||
|
|
||||||
if (!fs_info->uuid_root) {
|
|
||||||
btrfs_info(fs_info, "creating UUID tree");
|
|
||||||
ret = btrfs_create_uuid_tree(fs_info);
|
|
||||||
if (ret) {
|
|
||||||
btrfs_warn(fs_info,
|
|
||||||
"failed to create the UUID tree %d",
|
|
||||||
ret);
|
|
||||||
goto restore;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb->s_flags &= ~SB_RDONLY;
|
sb->s_flags &= ~SB_RDONLY;
|
||||||
|
|
||||||
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
|
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
|
||||||
|
@ -1970,6 +2019,7 @@ out:
|
||||||
|
|
||||||
wake_up_process(fs_info->transaction_kthread);
|
wake_up_process(fs_info->transaction_kthread);
|
||||||
btrfs_remount_cleanup(fs_info, old_opts);
|
btrfs_remount_cleanup(fs_info, old_opts);
|
||||||
|
btrfs_clear_oneshot_options(fs_info);
|
||||||
clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
|
clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2156,7 +2206,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||||
u64 total_used = 0;
|
u64 total_used = 0;
|
||||||
u64 total_free_data = 0;
|
u64 total_free_data = 0;
|
||||||
u64 total_free_meta = 0;
|
u64 total_free_meta = 0;
|
||||||
int bits = dentry->d_sb->s_blocksize_bits;
|
u32 bits = fs_info->sectorsize_bits;
|
||||||
__be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
|
__be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
|
||||||
unsigned factor = 1;
|
unsigned factor = 1;
|
||||||
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
|
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
|
||||||
|
@ -2462,6 +2512,11 @@ static void __init btrfs_print_mod_info(void)
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||||
", ref-verify=on"
|
", ref-verify=on"
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_BLK_DEV_ZONED
|
||||||
|
", zoned=yes"
|
||||||
|
#else
|
||||||
|
", zoned=no"
|
||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
|
pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
|
||||||
|
@ -2523,8 +2578,6 @@ static int __init init_btrfs_fs(void)
|
||||||
if (err)
|
if (err)
|
||||||
goto free_end_io_wq;
|
goto free_end_io_wq;
|
||||||
|
|
||||||
btrfs_init_lockdep();
|
|
||||||
|
|
||||||
btrfs_print_mod_info();
|
btrfs_print_mod_info();
|
||||||
|
|
||||||
err = btrfs_run_sanity_tests();
|
err = btrfs_run_sanity_tests();
|
||||||
|
|
117
fs/btrfs/sysfs.c
117
fs/btrfs/sysfs.c
|
@ -263,6 +263,10 @@ BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
|
||||||
BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
|
BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
|
||||||
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
|
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
|
||||||
BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
|
BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
|
||||||
|
/* Remove once support for zoned allocation is feature complete */
|
||||||
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
|
BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
|
||||||
|
#endif
|
||||||
|
|
||||||
static struct attribute *btrfs_supported_feature_attrs[] = {
|
static struct attribute *btrfs_supported_feature_attrs[] = {
|
||||||
BTRFS_FEAT_ATTR_PTR(mixed_backref),
|
BTRFS_FEAT_ATTR_PTR(mixed_backref),
|
||||||
|
@ -278,6 +282,9 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
|
||||||
BTRFS_FEAT_ATTR_PTR(metadata_uuid),
|
BTRFS_FEAT_ATTR_PTR(metadata_uuid),
|
||||||
BTRFS_FEAT_ATTR_PTR(free_space_tree),
|
BTRFS_FEAT_ATTR_PTR(free_space_tree),
|
||||||
BTRFS_FEAT_ATTR_PTR(raid1c34),
|
BTRFS_FEAT_ATTR_PTR(raid1c34),
|
||||||
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
|
BTRFS_FEAT_ATTR_PTR(zoned),
|
||||||
|
#endif
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -329,10 +336,35 @@ static ssize_t send_stream_version_show(struct kobject *kobj,
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show);
|
BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show);
|
||||||
|
|
||||||
|
static const char *rescue_opts[] = {
|
||||||
|
"usebackuproot",
|
||||||
|
"nologreplay",
|
||||||
|
"ignorebadroots",
|
||||||
|
"ignoredatacsums",
|
||||||
|
"all",
|
||||||
|
};
|
||||||
|
|
||||||
|
static ssize_t supported_rescue_options_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
ssize_t ret = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(rescue_opts); i++)
|
||||||
|
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
|
||||||
|
(i ? " " : ""), rescue_opts[i]);
|
||||||
|
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
BTRFS_ATTR(static_feature, supported_rescue_options,
|
||||||
|
supported_rescue_options_show);
|
||||||
|
|
||||||
static struct attribute *btrfs_supported_static_feature_attrs[] = {
|
static struct attribute *btrfs_supported_static_feature_attrs[] = {
|
||||||
BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
|
BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
|
||||||
BTRFS_ATTR_PTR(static_feature, supported_checksums),
|
BTRFS_ATTR_PTR(static_feature, supported_checksums),
|
||||||
BTRFS_ATTR_PTR(static_feature, send_stream_version),
|
BTRFS_ATTR_PTR(static_feature, send_stream_version),
|
||||||
|
BTRFS_ATTR_PTR(static_feature, supported_rescue_options),
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -433,7 +465,8 @@ static ssize_t btrfs_discard_iops_limit_store(struct kobject *kobj,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
WRITE_ONCE(discard_ctl->iops_limit, iops_limit);
|
WRITE_ONCE(discard_ctl->iops_limit, iops_limit);
|
||||||
|
btrfs_discard_calc_delay(discard_ctl);
|
||||||
|
btrfs_discard_schedule_work(discard_ctl, true);
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
BTRFS_ATTR_RW(discard, iops_limit, btrfs_discard_iops_limit_show,
|
BTRFS_ATTR_RW(discard, iops_limit, btrfs_discard_iops_limit_show,
|
||||||
|
@ -463,7 +496,7 @@ static ssize_t btrfs_discard_kbps_limit_store(struct kobject *kobj,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
WRITE_ONCE(discard_ctl->kbps_limit, kbps_limit);
|
WRITE_ONCE(discard_ctl->kbps_limit, kbps_limit);
|
||||||
|
btrfs_discard_schedule_work(discard_ctl, true);
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
BTRFS_ATTR_RW(discard, kbps_limit, btrfs_discard_kbps_limit_show,
|
BTRFS_ATTR_RW(discard, kbps_limit, btrfs_discard_kbps_limit_show,
|
||||||
|
@ -854,6 +887,82 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(, exclusive_operation, btrfs_exclusive_operation_show);
|
BTRFS_ATTR(, exclusive_operation, btrfs_exclusive_operation_show);
|
||||||
|
|
||||||
|
static ssize_t btrfs_generation_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a, char *buf)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||||
|
|
||||||
|
return scnprintf(buf, PAGE_SIZE, "%llu\n", fs_info->generation);
|
||||||
|
}
|
||||||
|
BTRFS_ATTR(, generation, btrfs_generation_show);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look for an exact string @string in @buffer with possible leading or
|
||||||
|
* trailing whitespace
|
||||||
|
*/
|
||||||
|
static bool strmatch(const char *buffer, const char *string)
|
||||||
|
{
|
||||||
|
const size_t len = strlen(string);
|
||||||
|
|
||||||
|
/* Skip leading whitespace */
|
||||||
|
buffer = skip_spaces(buffer);
|
||||||
|
|
||||||
|
/* Match entire string, check if the rest is whitespace or empty */
|
||||||
|
if (strncmp(string, buffer, len) == 0 &&
|
||||||
|
strlen(skip_spaces(buffer + len)) == 0)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char * const btrfs_read_policy_name[] = { "pid" };
|
||||||
|
|
||||||
|
static ssize_t btrfs_read_policy_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a, char *buf)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
|
||||||
|
ssize_t ret = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
|
||||||
|
if (fs_devices->read_policy == i)
|
||||||
|
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s[%s]",
|
||||||
|
(ret == 0 ? "" : " "),
|
||||||
|
btrfs_read_policy_name[i]);
|
||||||
|
else
|
||||||
|
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
|
||||||
|
(ret == 0 ? "" : " "),
|
||||||
|
btrfs_read_policy_name[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t btrfs_read_policy_store(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
|
||||||
|
if (strmatch(buf, btrfs_read_policy_name[i])) {
|
||||||
|
if (i != fs_devices->read_policy) {
|
||||||
|
fs_devices->read_policy = i;
|
||||||
|
btrfs_info(fs_devices->fs_info,
|
||||||
|
"read policy set to '%s'",
|
||||||
|
btrfs_read_policy_name[i]);
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);
|
||||||
|
|
||||||
static const struct attribute *btrfs_attrs[] = {
|
static const struct attribute *btrfs_attrs[] = {
|
||||||
BTRFS_ATTR_PTR(, label),
|
BTRFS_ATTR_PTR(, label),
|
||||||
BTRFS_ATTR_PTR(, nodesize),
|
BTRFS_ATTR_PTR(, nodesize),
|
||||||
|
@ -863,6 +972,8 @@ static const struct attribute *btrfs_attrs[] = {
|
||||||
BTRFS_ATTR_PTR(, metadata_uuid),
|
BTRFS_ATTR_PTR(, metadata_uuid),
|
||||||
BTRFS_ATTR_PTR(, checksum),
|
BTRFS_ATTR_PTR(, checksum),
|
||||||
BTRFS_ATTR_PTR(, exclusive_operation),
|
BTRFS_ATTR_PTR(, exclusive_operation),
|
||||||
|
BTRFS_ATTR_PTR(, generation),
|
||||||
|
BTRFS_ATTR_PTR(, read_policy),
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1207,7 +1318,7 @@ static const char *alloc_name(u64 flags)
|
||||||
default:
|
default:
|
||||||
WARN_ON(1);
|
WARN_ON(1);
|
||||||
return "invalid-combination";
|
return "invalid-combination";
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -134,6 +134,7 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
||||||
|
|
||||||
fs_info->nodesize = nodesize;
|
fs_info->nodesize = nodesize;
|
||||||
fs_info->sectorsize = sectorsize;
|
fs_info->sectorsize = sectorsize;
|
||||||
|
fs_info->sectorsize_bits = ilog2(sectorsize);
|
||||||
set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
|
set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
|
||||||
|
|
||||||
test_mnt->mnt_sb->s_fs_info = fs_info;
|
test_mnt->mnt_sb->s_fs_info = fs_info;
|
||||||
|
@ -224,7 +225,7 @@ btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info,
|
||||||
INIT_LIST_HEAD(&cache->list);
|
INIT_LIST_HEAD(&cache->list);
|
||||||
INIT_LIST_HEAD(&cache->cluster_list);
|
INIT_LIST_HEAD(&cache->cluster_list);
|
||||||
INIT_LIST_HEAD(&cache->bg_list);
|
INIT_LIST_HEAD(&cache->bg_list);
|
||||||
btrfs_init_free_space_ctl(cache);
|
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
|
||||||
mutex_init(&cache->free_space_lock);
|
mutex_init(&cache->free_space_lock);
|
||||||
|
|
||||||
return cache;
|
return cache;
|
||||||
|
|
|
@ -379,54 +379,50 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
|
||||||
static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
|
static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info;
|
struct btrfs_fs_info *fs_info;
|
||||||
unsigned long len;
|
|
||||||
unsigned long *bitmap = NULL;
|
unsigned long *bitmap = NULL;
|
||||||
struct extent_buffer *eb = NULL;
|
struct extent_buffer *eb = NULL;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
test_msg("running extent buffer bitmap tests");
|
test_msg("running extent buffer bitmap tests");
|
||||||
|
|
||||||
/*
|
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
|
||||||
* In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than
|
|
||||||
* BTRFS_MAX_METADATA_BLOCKSIZE.
|
|
||||||
*/
|
|
||||||
len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE)
|
|
||||||
? sectorsize * 4 : sectorsize;
|
|
||||||
|
|
||||||
fs_info = btrfs_alloc_dummy_fs_info(len, len);
|
|
||||||
if (!fs_info) {
|
if (!fs_info) {
|
||||||
test_std_err(TEST_ALLOC_FS_INFO);
|
test_std_err(TEST_ALLOC_FS_INFO);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
bitmap = kmalloc(len, GFP_KERNEL);
|
bitmap = kmalloc(nodesize, GFP_KERNEL);
|
||||||
if (!bitmap) {
|
if (!bitmap) {
|
||||||
test_err("couldn't allocate test bitmap");
|
test_err("couldn't allocate test bitmap");
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
eb = __alloc_dummy_extent_buffer(fs_info, 0, len);
|
eb = __alloc_dummy_extent_buffer(fs_info, 0, nodesize);
|
||||||
if (!eb) {
|
if (!eb) {
|
||||||
test_std_err(TEST_ALLOC_ROOT);
|
test_std_err(TEST_ALLOC_ROOT);
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = __test_eb_bitmaps(bitmap, eb, len);
|
ret = __test_eb_bitmaps(bitmap, eb, nodesize);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* Do it over again with an extent buffer which isn't page-aligned. */
|
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
eb = __alloc_dummy_extent_buffer(fs_info, nodesize / 2, len);
|
|
||||||
|
/*
|
||||||
|
* Test again for case where the tree block is sectorsize aligned but
|
||||||
|
* not nodesize aligned.
|
||||||
|
*/
|
||||||
|
eb = __alloc_dummy_extent_buffer(fs_info, sectorsize, nodesize);
|
||||||
if (!eb) {
|
if (!eb) {
|
||||||
test_std_err(TEST_ALLOC_ROOT);
|
test_std_err(TEST_ALLOC_ROOT);
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = __test_eb_bitmaps(bitmap, eb, len);
|
ret = __test_eb_bitmaps(bitmap, eb, nodesize);
|
||||||
out:
|
out:
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
kfree(bitmap);
|
kfree(bitmap);
|
||||||
|
|
|
@ -399,7 +399,6 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group *cache,
|
||||||
u64 offset;
|
u64 offset;
|
||||||
u64 max_extent_size;
|
u64 max_extent_size;
|
||||||
const struct btrfs_free_space_op test_free_space_ops = {
|
const struct btrfs_free_space_op test_free_space_ops = {
|
||||||
.recalc_thresholds = cache->free_space_ctl->op->recalc_thresholds,
|
|
||||||
.use_bitmap = test_use_bitmap,
|
.use_bitmap = test_use_bitmap,
|
||||||
};
|
};
|
||||||
const struct btrfs_free_space_op *orig_free_space_ops;
|
const struct btrfs_free_space_op *orig_free_space_ops;
|
||||||
|
|
|
@ -36,7 +36,6 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_insert_empty_item(&trans, root, path, &ins, size);
|
ret = btrfs_insert_empty_item(&trans, root, path, &ins, size);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
test_err("couldn't insert ref %d", ret);
|
test_err("couldn't insert ref %d", ret);
|
||||||
|
@ -86,7 +85,6 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
|
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
test_err("couldn't find extent ref");
|
test_err("couldn't find extent ref");
|
||||||
|
@ -135,7 +133,6 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
|
||||||
test_std_err(TEST_ALLOC_ROOT);
|
test_std_err(TEST_ALLOC_ROOT);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
path->leave_spinning = 1;
|
|
||||||
|
|
||||||
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
|
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -170,7 +167,6 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
path->leave_spinning = 1;
|
|
||||||
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
|
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
test_err("couldn't find extent ref");
|
test_err("couldn't find extent ref");
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
#include "transaction.h"
|
#include "transaction.h"
|
||||||
#include "locking.h"
|
#include "locking.h"
|
||||||
#include "tree-log.h"
|
#include "tree-log.h"
|
||||||
#include "inode-map.h"
|
|
||||||
#include "volumes.h"
|
#include "volumes.h"
|
||||||
#include "dev-replace.h"
|
#include "dev-replace.h"
|
||||||
#include "qgroup.h"
|
#include "qgroup.h"
|
||||||
|
@ -155,6 +154,7 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
|
||||||
struct btrfs_transaction *cur_trans = trans->transaction;
|
struct btrfs_transaction *cur_trans = trans->transaction;
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
struct btrfs_root *root, *tmp;
|
struct btrfs_root *root, *tmp;
|
||||||
|
struct btrfs_caching_control *caching_ctl, *next;
|
||||||
|
|
||||||
down_write(&fs_info->commit_root_sem);
|
down_write(&fs_info->commit_root_sem);
|
||||||
list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
|
list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
|
||||||
|
@ -162,8 +162,6 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
|
||||||
list_del_init(&root->dirty_list);
|
list_del_init(&root->dirty_list);
|
||||||
free_extent_buffer(root->commit_root);
|
free_extent_buffer(root->commit_root);
|
||||||
root->commit_root = btrfs_root_node(root);
|
root->commit_root = btrfs_root_node(root);
|
||||||
if (is_fstree(root->root_key.objectid))
|
|
||||||
btrfs_unpin_free_ino(root);
|
|
||||||
extent_io_tree_release(&root->dirty_log_pages);
|
extent_io_tree_release(&root->dirty_log_pages);
|
||||||
btrfs_qgroup_clean_swapped_blocks(root);
|
btrfs_qgroup_clean_swapped_blocks(root);
|
||||||
}
|
}
|
||||||
|
@ -180,6 +178,47 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
|
||||||
spin_lock(&cur_trans->dropped_roots_lock);
|
spin_lock(&cur_trans->dropped_roots_lock);
|
||||||
}
|
}
|
||||||
spin_unlock(&cur_trans->dropped_roots_lock);
|
spin_unlock(&cur_trans->dropped_roots_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have to update the last_byte_to_unpin under the commit_root_sem,
|
||||||
|
* at the same time we swap out the commit roots.
|
||||||
|
*
|
||||||
|
* This is because we must have a real view of the last spot the caching
|
||||||
|
* kthreads were while caching. Consider the following views of the
|
||||||
|
* extent tree for a block group
|
||||||
|
*
|
||||||
|
* commit root
|
||||||
|
* +----+----+----+----+----+----+----+
|
||||||
|
* |\\\\| |\\\\|\\\\| |\\\\|\\\\|
|
||||||
|
* +----+----+----+----+----+----+----+
|
||||||
|
* 0 1 2 3 4 5 6 7
|
||||||
|
*
|
||||||
|
* new commit root
|
||||||
|
* +----+----+----+----+----+----+----+
|
||||||
|
* | | | |\\\\| | |\\\\|
|
||||||
|
* +----+----+----+----+----+----+----+
|
||||||
|
* 0 1 2 3 4 5 6 7
|
||||||
|
*
|
||||||
|
* If the cache_ctl->progress was at 3, then we are only allowed to
|
||||||
|
* unpin [0,1) and [2,3], because the caching thread has already
|
||||||
|
* processed those extents. We are not allowed to unpin [5,6), because
|
||||||
|
* the caching thread will re-start it's search from 3, and thus find
|
||||||
|
* the hole from [4,6) to add to the free space cache.
|
||||||
|
*/
|
||||||
|
spin_lock(&fs_info->block_group_cache_lock);
|
||||||
|
list_for_each_entry_safe(caching_ctl, next,
|
||||||
|
&fs_info->caching_block_groups, list) {
|
||||||
|
struct btrfs_block_group *cache = caching_ctl->block_group;
|
||||||
|
|
||||||
|
if (btrfs_block_group_done(cache)) {
|
||||||
|
cache->last_byte_to_unpin = (u64)-1;
|
||||||
|
list_del_init(&caching_ctl->list);
|
||||||
|
btrfs_put_caching_control(caching_ctl);
|
||||||
|
} else {
|
||||||
|
cache->last_byte_to_unpin = caching_ctl->progress;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&fs_info->block_group_cache_lock);
|
||||||
up_write(&fs_info->commit_root_sem);
|
up_write(&fs_info->commit_root_sem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -856,24 +895,24 @@ void btrfs_throttle(struct btrfs_fs_info *fs_info)
|
||||||
wait_current_trans(fs_info);
|
wait_current_trans(fs_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int should_end_transaction(struct btrfs_trans_handle *trans)
|
static bool should_end_transaction(struct btrfs_trans_handle *trans)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
|
|
||||||
if (btrfs_check_space_for_delayed_refs(fs_info))
|
if (btrfs_check_space_for_delayed_refs(fs_info))
|
||||||
return 1;
|
return true;
|
||||||
|
|
||||||
return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5);
|
return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
|
bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
|
||||||
{
|
{
|
||||||
struct btrfs_transaction *cur_trans = trans->transaction;
|
struct btrfs_transaction *cur_trans = trans->transaction;
|
||||||
|
|
||||||
smp_mb();
|
smp_mb();
|
||||||
if (cur_trans->state >= TRANS_STATE_COMMIT_START ||
|
if (cur_trans->state >= TRANS_STATE_COMMIT_START ||
|
||||||
cur_trans->delayed_refs.flushing)
|
cur_trans->delayed_refs.flushing)
|
||||||
return 1;
|
return true;
|
||||||
|
|
||||||
return should_end_transaction(trans);
|
return should_end_transaction(trans);
|
||||||
}
|
}
|
||||||
|
@ -1300,8 +1339,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
|
||||||
btrfs_free_log(trans, root);
|
btrfs_free_log(trans, root);
|
||||||
btrfs_update_reloc_root(trans, root);
|
btrfs_update_reloc_root(trans, root);
|
||||||
|
|
||||||
btrfs_save_ino_cache(root, trans);
|
|
||||||
|
|
||||||
/* see comments in should_cow_block() */
|
/* see comments in should_cow_block() */
|
||||||
clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
|
clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
|
||||||
smp_mb__after_atomic();
|
smp_mb__after_atomic();
|
||||||
|
@ -1598,8 +1635,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_set_lock_blocking_write(old);
|
|
||||||
|
|
||||||
ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
|
ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
|
||||||
/* clean up in any case */
|
/* clean up in any case */
|
||||||
btrfs_tree_unlock(old);
|
btrfs_tree_unlock(old);
|
||||||
|
@ -1681,7 +1716,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||||
dentry->d_name.len * 2);
|
dentry->d_name.len * 2);
|
||||||
parent_inode->i_mtime = parent_inode->i_ctime =
|
parent_inode->i_mtime = parent_inode->i_ctime =
|
||||||
current_time(parent_inode);
|
current_time(parent_inode);
|
||||||
ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
|
ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -1761,6 +1796,8 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
|
||||||
super->root_level = root_item->level;
|
super->root_level = root_item->level;
|
||||||
if (btrfs_test_opt(fs_info, SPACE_CACHE))
|
if (btrfs_test_opt(fs_info, SPACE_CACHE))
|
||||||
super->cache_generation = root_item->generation;
|
super->cache_generation = root_item->generation;
|
||||||
|
else if (test_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags))
|
||||||
|
super->cache_generation = 0;
|
||||||
if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
|
if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
|
||||||
super->uuid_tree_generation = root_item->generation;
|
super->uuid_tree_generation = root_item->generation;
|
||||||
}
|
}
|
||||||
|
@ -1956,10 +1993,8 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
|
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use writeback_inodes_sb here because if we used
|
* We use writeback_inodes_sb here because if we used
|
||||||
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
|
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
|
||||||
|
@ -1969,50 +2004,15 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
|
||||||
* from already being in a transaction and our join_transaction doesn't
|
* from already being in a transaction and our join_transaction doesn't
|
||||||
* have to re-take the fs freeze lock.
|
* have to re-take the fs freeze lock.
|
||||||
*/
|
*/
|
||||||
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
|
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
|
||||||
writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
|
writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
|
||||||
} else {
|
|
||||||
struct btrfs_pending_snapshot *pending;
|
|
||||||
struct list_head *head = &trans->transaction->pending_snapshots;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Flush dellaloc for any root that is going to be snapshotted.
|
|
||||||
* This is done to avoid a corrupted version of files, in the
|
|
||||||
* snapshots, that had both buffered and direct IO writes (even
|
|
||||||
* if they were done sequentially) due to an unordered update of
|
|
||||||
* the inode's size on disk.
|
|
||||||
*/
|
|
||||||
list_for_each_entry(pending, head, list) {
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = btrfs_start_delalloc_snapshot(pending->root);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans)
|
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
|
||||||
|
|
||||||
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
|
|
||||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||||
} else {
|
|
||||||
struct btrfs_pending_snapshot *pending;
|
|
||||||
struct list_head *head = &trans->transaction->pending_snapshots;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Wait for any dellaloc that we started previously for the roots
|
|
||||||
* that are going to be snapshotted. This is to avoid a corrupted
|
|
||||||
* version of files in the snapshots that had both buffered and
|
|
||||||
* direct IO writes (even if they were done sequentially).
|
|
||||||
*/
|
|
||||||
list_for_each_entry(pending, head, list)
|
|
||||||
btrfs_wait_ordered_extents(pending->root,
|
|
||||||
U64_MAX, 0, U64_MAX);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||||
|
@ -2150,7 +2150,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||||
|
|
||||||
extwriter_counter_dec(cur_trans, trans->type);
|
extwriter_counter_dec(cur_trans, trans->type);
|
||||||
|
|
||||||
ret = btrfs_start_delalloc_flush(trans);
|
ret = btrfs_start_delalloc_flush(fs_info);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto cleanup_transaction;
|
goto cleanup_transaction;
|
||||||
|
|
||||||
|
@ -2166,7 +2166,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||||
if (ret)
|
if (ret)
|
||||||
goto cleanup_transaction;
|
goto cleanup_transaction;
|
||||||
|
|
||||||
btrfs_wait_delalloc_flush(trans);
|
btrfs_wait_delalloc_flush(fs_info);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for all ordered extents started by a fast fsync that joined this
|
* Wait for all ordered extents started by a fast fsync that joined this
|
||||||
|
@ -2293,8 +2293,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||||
goto unlock_tree_log;
|
goto unlock_tree_log;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_prepare_extent_commit(fs_info);
|
|
||||||
|
|
||||||
cur_trans = fs_info->running_transaction;
|
cur_trans = fs_info->running_transaction;
|
||||||
|
|
||||||
btrfs_set_root_node(&fs_info->tree_root->root_item,
|
btrfs_set_root_node(&fs_info->tree_root->root_item,
|
||||||
|
@ -2435,10 +2433,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
|
||||||
btrfs_debug(fs_info, "cleaner removing %llu", root->root_key.objectid);
|
btrfs_debug(fs_info, "cleaner removing %llu", root->root_key.objectid);
|
||||||
|
|
||||||
btrfs_kill_all_delayed_nodes(root);
|
btrfs_kill_all_delayed_nodes(root);
|
||||||
if (root->ino_cache_inode) {
|
|
||||||
iput(root->ino_cache_inode);
|
|
||||||
root->ino_cache_inode = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (btrfs_header_backref_rev(root->node) <
|
if (btrfs_header_backref_rev(root->node) <
|
||||||
BTRFS_MIXED_BACKREF_REV)
|
BTRFS_MIXED_BACKREF_REV)
|
||||||
|
@ -2459,16 +2453,6 @@ void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info)
|
||||||
if (!prev)
|
if (!prev)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
bit = 1 << BTRFS_PENDING_SET_INODE_MAP_CACHE;
|
|
||||||
if (prev & bit)
|
|
||||||
btrfs_set_opt(fs_info->mount_opt, INODE_MAP_CACHE);
|
|
||||||
prev &= ~bit;
|
|
||||||
|
|
||||||
bit = 1 << BTRFS_PENDING_CLEAR_INODE_MAP_CACHE;
|
|
||||||
if (prev & bit)
|
|
||||||
btrfs_clear_opt(fs_info->mount_opt, INODE_MAP_CACHE);
|
|
||||||
prev &= ~bit;
|
|
||||||
|
|
||||||
bit = 1 << BTRFS_PENDING_COMMIT;
|
bit = 1 << BTRFS_PENDING_COMMIT;
|
||||||
if (prev & bit)
|
if (prev & bit)
|
||||||
btrfs_debug(fs_info, "pending commit done");
|
btrfs_debug(fs_info, "pending commit done");
|
||||||
|
|
|
@ -112,7 +112,6 @@ struct btrfs_transaction {
|
||||||
#define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH)
|
#define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH)
|
||||||
|
|
||||||
#define BTRFS_SEND_TRANS_STUB ((void *)1)
|
#define BTRFS_SEND_TRANS_STUB ((void *)1)
|
||||||
#define BTRFS_DIO_SYNC_STUB ((void *)2)
|
|
||||||
|
|
||||||
struct btrfs_trans_handle {
|
struct btrfs_trans_handle {
|
||||||
u64 transid;
|
u64 transid;
|
||||||
|
@ -219,7 +218,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
|
||||||
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
|
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
|
||||||
int wait_for_unblock);
|
int wait_for_unblock);
|
||||||
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
|
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
|
||||||
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
|
bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
|
||||||
void btrfs_throttle(struct btrfs_fs_info *fs_info);
|
void btrfs_throttle(struct btrfs_fs_info *fs_info);
|
||||||
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
|
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root);
|
struct btrfs_root *root);
|
||||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -52,7 +52,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
|
||||||
u32 nritems;
|
u32 nritems;
|
||||||
|
|
||||||
root_node = btrfs_lock_root_node(root);
|
root_node = btrfs_lock_root_node(root);
|
||||||
btrfs_set_lock_blocking_write(root_node);
|
|
||||||
nritems = btrfs_header_nritems(root_node);
|
nritems = btrfs_header_nritems(root_node);
|
||||||
root->defrag_max.objectid = 0;
|
root->defrag_max.objectid = 0;
|
||||||
/* from above we know this is not a leaf */
|
/* from above we know this is not a leaf */
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
#include "backref.h"
|
#include "backref.h"
|
||||||
#include "compression.h"
|
#include "compression.h"
|
||||||
#include "qgroup.h"
|
#include "qgroup.h"
|
||||||
#include "inode-map.h"
|
|
||||||
#include "block-group.h"
|
#include "block-group.h"
|
||||||
#include "space-info.h"
|
#include "space-info.h"
|
||||||
|
|
||||||
|
@ -139,8 +138,25 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_log_ctx *ctx)
|
struct btrfs_log_ctx *ctx)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
|
struct btrfs_root *tree_root = fs_info->tree_root;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First check if the log root tree was already created. If not, create
|
||||||
|
* it before locking the root's log_mutex, just to keep lockdep happy.
|
||||||
|
*/
|
||||||
|
if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &tree_root->state)) {
|
||||||
|
mutex_lock(&tree_root->log_mutex);
|
||||||
|
if (!fs_info->log_root_tree) {
|
||||||
|
ret = btrfs_init_log_root_tree(trans, fs_info);
|
||||||
|
if (!ret)
|
||||||
|
set_bit(BTRFS_ROOT_HAS_LOG_TREE, &tree_root->state);
|
||||||
|
}
|
||||||
|
mutex_unlock(&tree_root->log_mutex);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
mutex_lock(&root->log_mutex);
|
mutex_lock(&root->log_mutex);
|
||||||
|
|
||||||
if (root->log_root) {
|
if (root->log_root) {
|
||||||
|
@ -156,13 +172,6 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
|
||||||
set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
|
set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
mutex_lock(&fs_info->tree_log_mutex);
|
|
||||||
if (!fs_info->log_root_tree)
|
|
||||||
ret = btrfs_init_log_root_tree(trans, fs_info);
|
|
||||||
mutex_unlock(&fs_info->tree_log_mutex);
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
ret = btrfs_add_log_tree(trans, root);
|
ret = btrfs_add_log_tree(trans, root);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -172,7 +181,6 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
|
||||||
root->log_start_pid = current->pid;
|
root->log_start_pid = current->pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
atomic_inc(&root->log_batch);
|
|
||||||
atomic_inc(&root->log_writers);
|
atomic_inc(&root->log_writers);
|
||||||
if (ctx && !ctx->logging_new_name) {
|
if (ctx && !ctx->logging_new_name) {
|
||||||
int index = root->log_transid % 2;
|
int index = root->log_transid % 2;
|
||||||
|
@ -576,6 +584,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
|
||||||
struct extent_buffer *eb, int slot,
|
struct extent_buffer *eb, int slot,
|
||||||
struct btrfs_key *key)
|
struct btrfs_key *key)
|
||||||
{
|
{
|
||||||
|
struct btrfs_drop_extents_args drop_args = { 0 };
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
int found_type;
|
int found_type;
|
||||||
u64 extent_end;
|
u64 extent_end;
|
||||||
|
@ -653,7 +662,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
|
|
||||||
/* drop any overlapping extents */
|
/* drop any overlapping extents */
|
||||||
ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
|
drop_args.start = start;
|
||||||
|
drop_args.end = extent_end;
|
||||||
|
drop_args.drop_cache = true;
|
||||||
|
ret = btrfs_drop_extents(trans, root, BTRFS_I(inode), &drop_args);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@ -828,9 +840,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
inode_add_bytes(inode, nbytes);
|
|
||||||
update_inode:
|
update_inode:
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
btrfs_update_inode_bytes(BTRFS_I(inode), nbytes, drop_args.bytes_found);
|
||||||
|
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
out:
|
out:
|
||||||
if (inode)
|
if (inode)
|
||||||
iput(inode);
|
iput(inode);
|
||||||
|
@ -1529,7 +1541,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
btrfs_update_inode(trans, root, inode);
|
btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
}
|
}
|
||||||
|
|
||||||
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
|
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
|
||||||
|
@ -1564,18 +1576,6 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int insert_orphan_item(struct btrfs_trans_handle *trans,
|
|
||||||
struct btrfs_root *root, u64 ino)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = btrfs_insert_orphan_item(trans, root, ino);
|
|
||||||
if (ret == -EEXIST)
|
|
||||||
ret = 0;
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int count_inode_extrefs(struct btrfs_root *root,
|
static int count_inode_extrefs(struct btrfs_root *root,
|
||||||
struct btrfs_inode *inode, struct btrfs_path *path)
|
struct btrfs_inode *inode, struct btrfs_path *path)
|
||||||
{
|
{
|
||||||
|
@ -1716,7 +1716,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
if (nlink != inode->i_nlink) {
|
if (nlink != inode->i_nlink) {
|
||||||
set_nlink(inode, nlink);
|
set_nlink(inode, nlink);
|
||||||
btrfs_update_inode(trans, root, inode);
|
btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
}
|
}
|
||||||
BTRFS_I(inode)->index_cnt = (u64)-1;
|
BTRFS_I(inode)->index_cnt = (u64)-1;
|
||||||
|
|
||||||
|
@ -1727,7 +1727,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
ret = insert_orphan_item(trans, root, ino);
|
ret = btrfs_insert_orphan_item(trans, root, ino);
|
||||||
|
if (ret == -EEXIST)
|
||||||
|
ret = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
@ -1820,7 +1822,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
|
||||||
set_nlink(inode, 1);
|
set_nlink(inode, 1);
|
||||||
else
|
else
|
||||||
inc_nlink(inode);
|
inc_nlink(inode);
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
} else if (ret == -EEXIST) {
|
} else if (ret == -EEXIST) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1973,7 +1975,7 @@ out:
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
if (!ret && update_size) {
|
if (!ret && update_size) {
|
||||||
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2);
|
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2);
|
||||||
ret = btrfs_update_inode(trans, root, dir);
|
ret = btrfs_update_inode(trans, root, BTRFS_I(dir));
|
||||||
}
|
}
|
||||||
kfree(name);
|
kfree(name);
|
||||||
iput(dir);
|
iput(dir);
|
||||||
|
@ -2586,6 +2588,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
|
||||||
* those prealloc extents just after replaying them.
|
* those prealloc extents just after replaying them.
|
||||||
*/
|
*/
|
||||||
if (S_ISREG(mode)) {
|
if (S_ISREG(mode)) {
|
||||||
|
struct btrfs_drop_extents_args drop_args = { 0 };
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
u64 from;
|
u64 from;
|
||||||
|
|
||||||
|
@ -2596,12 +2599,18 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
|
||||||
}
|
}
|
||||||
from = ALIGN(i_size_read(inode),
|
from = ALIGN(i_size_read(inode),
|
||||||
root->fs_info->sectorsize);
|
root->fs_info->sectorsize);
|
||||||
ret = btrfs_drop_extents(wc->trans, root, inode,
|
drop_args.start = from;
|
||||||
from, (u64)-1, 1);
|
drop_args.end = (u64)-1;
|
||||||
|
drop_args.drop_cache = true;
|
||||||
|
ret = btrfs_drop_extents(wc->trans, root,
|
||||||
|
BTRFS_I(inode),
|
||||||
|
&drop_args);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
|
inode_sub_bytes(inode,
|
||||||
|
drop_args.bytes_found);
|
||||||
/* Update the inode's nbytes. */
|
/* Update the inode's nbytes. */
|
||||||
ret = btrfs_update_inode(wc->trans,
|
ret = btrfs_update_inode(wc->trans,
|
||||||
root, inode);
|
root, BTRFS_I(inode));
|
||||||
}
|
}
|
||||||
iput(inode);
|
iput(inode);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -2709,7 +2718,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
||||||
btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
|
btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
|
||||||
blocksize = fs_info->nodesize;
|
blocksize = fs_info->nodesize;
|
||||||
|
|
||||||
next = btrfs_find_create_tree_block(fs_info, bytenr);
|
next = btrfs_find_create_tree_block(fs_info, bytenr,
|
||||||
|
btrfs_header_owner(cur),
|
||||||
|
*level - 1);
|
||||||
if (IS_ERR(next))
|
if (IS_ERR(next))
|
||||||
return PTR_ERR(next);
|
return PTR_ERR(next);
|
||||||
|
|
||||||
|
@ -2732,7 +2743,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
if (trans) {
|
if (trans) {
|
||||||
btrfs_tree_lock(next);
|
btrfs_tree_lock(next);
|
||||||
btrfs_set_lock_blocking_write(next);
|
|
||||||
btrfs_clean_tree_block(next);
|
btrfs_clean_tree_block(next);
|
||||||
btrfs_wait_tree_block_writeback(next);
|
btrfs_wait_tree_block_writeback(next);
|
||||||
btrfs_tree_unlock(next);
|
btrfs_tree_unlock(next);
|
||||||
|
@ -2801,7 +2811,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
if (trans) {
|
if (trans) {
|
||||||
btrfs_tree_lock(next);
|
btrfs_tree_lock(next);
|
||||||
btrfs_set_lock_blocking_write(next);
|
|
||||||
btrfs_clean_tree_block(next);
|
btrfs_clean_tree_block(next);
|
||||||
btrfs_wait_tree_block_writeback(next);
|
btrfs_wait_tree_block_writeback(next);
|
||||||
btrfs_tree_unlock(next);
|
btrfs_tree_unlock(next);
|
||||||
|
@ -2883,7 +2892,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
if (trans) {
|
if (trans) {
|
||||||
btrfs_tree_lock(next);
|
btrfs_tree_lock(next);
|
||||||
btrfs_set_lock_blocking_write(next);
|
|
||||||
btrfs_clean_tree_block(next);
|
btrfs_clean_tree_block(next);
|
||||||
btrfs_wait_tree_block_writeback(next);
|
btrfs_wait_tree_block_writeback(next);
|
||||||
btrfs_tree_unlock(next);
|
btrfs_tree_unlock(next);
|
||||||
|
@ -3023,6 +3031,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
|
||||||
int log_transid = 0;
|
int log_transid = 0;
|
||||||
struct btrfs_log_ctx root_log_ctx;
|
struct btrfs_log_ctx root_log_ctx;
|
||||||
struct blk_plug plug;
|
struct blk_plug plug;
|
||||||
|
u64 log_root_start;
|
||||||
|
u64 log_root_level;
|
||||||
|
|
||||||
mutex_lock(&root->log_mutex);
|
mutex_lock(&root->log_mutex);
|
||||||
log_transid = ctx->log_transid;
|
log_transid = ctx->log_transid;
|
||||||
|
@ -3200,22 +3210,31 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
|
||||||
goto out_wake_log_root;
|
goto out_wake_log_root;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_set_super_log_root(fs_info->super_for_commit,
|
log_root_start = log_root_tree->node->start;
|
||||||
log_root_tree->node->start);
|
log_root_level = btrfs_header_level(log_root_tree->node);
|
||||||
btrfs_set_super_log_root_level(fs_info->super_for_commit,
|
|
||||||
btrfs_header_level(log_root_tree->node));
|
|
||||||
|
|
||||||
log_root_tree->log_transid++;
|
log_root_tree->log_transid++;
|
||||||
mutex_unlock(&log_root_tree->log_mutex);
|
mutex_unlock(&log_root_tree->log_mutex);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Nobody else is going to jump in and write the ctree
|
* Here we are guaranteed that nobody is going to write the superblock
|
||||||
* super here because the log_commit atomic below is protecting
|
* for the current transaction before us and that neither we do write
|
||||||
* us. We must be called with a transaction handle pinning
|
* our superblock before the previous transaction finishes its commit
|
||||||
* the running transaction open, so a full commit can't hop
|
* and writes its superblock, because:
|
||||||
* in and cause problems either.
|
*
|
||||||
|
* 1) We are holding a handle on the current transaction, so no body
|
||||||
|
* can commit it until we release the handle;
|
||||||
|
*
|
||||||
|
* 2) Before writing our superblock we acquire the tree_log_mutex, so
|
||||||
|
* if the previous transaction is still committing, and hasn't yet
|
||||||
|
* written its superblock, we wait for it to do it, because a
|
||||||
|
* transaction commit acquires the tree_log_mutex when the commit
|
||||||
|
* begins and releases it only after writing its superblock.
|
||||||
*/
|
*/
|
||||||
|
mutex_lock(&fs_info->tree_log_mutex);
|
||||||
|
btrfs_set_super_log_root(fs_info->super_for_commit, log_root_start);
|
||||||
|
btrfs_set_super_log_root_level(fs_info->super_for_commit, log_root_level);
|
||||||
ret = write_all_supers(fs_info, 1);
|
ret = write_all_supers(fs_info, 1);
|
||||||
|
mutex_unlock(&fs_info->tree_log_mutex);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_set_log_full_commit(trans);
|
btrfs_set_log_full_commit(trans);
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
|
@ -3300,6 +3319,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
|
||||||
if (fs_info->log_root_tree) {
|
if (fs_info->log_root_tree) {
|
||||||
free_log_tree(trans, fs_info->log_root_tree);
|
free_log_tree(trans, fs_info->log_root_tree);
|
||||||
fs_info->log_root_tree = NULL;
|
fs_info->log_root_tree = NULL;
|
||||||
|
clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &fs_info->tree_root->state);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -4196,6 +4216,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_path *path,
|
struct btrfs_path *path,
|
||||||
struct btrfs_log_ctx *ctx)
|
struct btrfs_log_ctx *ctx)
|
||||||
{
|
{
|
||||||
|
struct btrfs_drop_extents_args drop_args = { 0 };
|
||||||
struct btrfs_root *log = root->log_root;
|
struct btrfs_root *log = root->log_root;
|
||||||
struct btrfs_file_extent_item *fi;
|
struct btrfs_file_extent_item *fi;
|
||||||
struct extent_buffer *leaf;
|
struct extent_buffer *leaf;
|
||||||
|
@ -4204,19 +4225,21 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
|
||||||
u64 extent_offset = em->start - em->orig_start;
|
u64 extent_offset = em->start - em->orig_start;
|
||||||
u64 block_len;
|
u64 block_len;
|
||||||
int ret;
|
int ret;
|
||||||
int extent_inserted = 0;
|
|
||||||
|
|
||||||
ret = log_extent_csums(trans, inode, log, em, ctx);
|
ret = log_extent_csums(trans, inode, log, em, ctx);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
|
drop_args.path = path;
|
||||||
em->start + em->len, NULL, 0, 1,
|
drop_args.start = em->start;
|
||||||
sizeof(*fi), &extent_inserted);
|
drop_args.end = em->start + em->len;
|
||||||
|
drop_args.replace_extent = true;
|
||||||
|
drop_args.extent_item_size = sizeof(*fi);
|
||||||
|
ret = btrfs_drop_extents(trans, log, inode, &drop_args);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (!extent_inserted) {
|
if (!drop_args.extent_inserted) {
|
||||||
key.objectid = btrfs_ino(inode);
|
key.objectid = btrfs_ino(inode);
|
||||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||||
key.offset = em->start;
|
key.offset = em->start;
|
||||||
|
@ -4375,8 +4398,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
||||||
do {
|
do {
|
||||||
ret = btrfs_truncate_inode_items(trans,
|
ret = btrfs_truncate_inode_items(trans,
|
||||||
root->log_root,
|
root->log_root,
|
||||||
&inode->vfs_inode,
|
inode, truncate_offset,
|
||||||
truncate_offset,
|
|
||||||
BTRFS_EXTENT_DATA_KEY);
|
BTRFS_EXTENT_DATA_KEY);
|
||||||
} while (ret == -EAGAIN);
|
} while (ret == -EAGAIN);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -4415,14 +4437,12 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
|
||||||
struct extent_map *em, *n;
|
struct extent_map *em, *n;
|
||||||
struct list_head extents;
|
struct list_head extents;
|
||||||
struct extent_map_tree *tree = &inode->extent_tree;
|
struct extent_map_tree *tree = &inode->extent_tree;
|
||||||
u64 test_gen;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int num = 0;
|
int num = 0;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&extents);
|
INIT_LIST_HEAD(&extents);
|
||||||
|
|
||||||
write_lock(&tree->lock);
|
write_lock(&tree->lock);
|
||||||
test_gen = root->fs_info->last_trans_committed;
|
|
||||||
|
|
||||||
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
|
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
|
||||||
list_del_init(&em->list);
|
list_del_init(&em->list);
|
||||||
|
@ -4438,7 +4458,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
|
||||||
goto process;
|
goto process;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (em->generation <= test_gen)
|
if (em->generation < trans->transid)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* We log prealloc extents beyond eof later. */
|
/* We log prealloc extents beyond eof later. */
|
||||||
|
@ -4571,6 +4591,10 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
|
||||||
const u64 ino = btrfs_ino(inode);
|
const u64 ino = btrfs_ino(inode);
|
||||||
int ins_nr = 0;
|
int ins_nr = 0;
|
||||||
int start_slot = 0;
|
int start_slot = 0;
|
||||||
|
bool found_xattrs = false;
|
||||||
|
|
||||||
|
if (test_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags))
|
||||||
|
return 0;
|
||||||
|
|
||||||
key.objectid = ino;
|
key.objectid = ino;
|
||||||
key.type = BTRFS_XATTR_ITEM_KEY;
|
key.type = BTRFS_XATTR_ITEM_KEY;
|
||||||
|
@ -4609,6 +4633,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
|
||||||
start_slot = slot;
|
start_slot = slot;
|
||||||
ins_nr++;
|
ins_nr++;
|
||||||
path->slots[0]++;
|
path->slots[0]++;
|
||||||
|
found_xattrs = true;
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
if (ins_nr > 0) {
|
if (ins_nr > 0) {
|
||||||
|
@ -4618,6 +4643,9 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!found_xattrs)
|
||||||
|
set_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5303,7 +5331,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||||
&inode->runtime_flags);
|
&inode->runtime_flags);
|
||||||
while(1) {
|
while(1) {
|
||||||
ret = btrfs_truncate_inode_items(trans,
|
ret = btrfs_truncate_inode_items(trans,
|
||||||
log, &inode->vfs_inode, 0, 0);
|
log, inode, 0, 0);
|
||||||
if (ret != -EAGAIN)
|
if (ret != -EAGAIN)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -5442,11 +5470,10 @@ out_unlock:
|
||||||
static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
|
static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_inode *inode)
|
struct btrfs_inode *inode)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
mutex_lock(&inode->log_mutex);
|
mutex_lock(&inode->log_mutex);
|
||||||
if (inode->last_unlink_trans > fs_info->last_trans_committed) {
|
if (inode->last_unlink_trans >= trans->transid) {
|
||||||
/*
|
/*
|
||||||
* Make sure any commits to the log are forced to be full
|
* Make sure any commits to the log are forced to be full
|
||||||
* commits.
|
* commits.
|
||||||
|
@ -5468,8 +5495,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
|
||||||
static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
|
static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_inode *inode,
|
struct btrfs_inode *inode,
|
||||||
struct dentry *parent,
|
struct dentry *parent,
|
||||||
struct super_block *sb,
|
struct super_block *sb)
|
||||||
u64 last_committed)
|
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
struct dentry *old_parent = NULL;
|
struct dentry *old_parent = NULL;
|
||||||
|
@ -5481,8 +5507,8 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
|
||||||
* and other fun in this file.
|
* and other fun in this file.
|
||||||
*/
|
*/
|
||||||
if (S_ISREG(inode->vfs_inode.i_mode) &&
|
if (S_ISREG(inode->vfs_inode.i_mode) &&
|
||||||
inode->generation <= last_committed &&
|
inode->generation < trans->transid &&
|
||||||
inode->last_unlink_trans <= last_committed)
|
inode->last_unlink_trans < trans->transid)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (!S_ISDIR(inode->vfs_inode.i_mode)) {
|
if (!S_ISDIR(inode->vfs_inode.i_mode)) {
|
||||||
|
@ -5828,7 +5854,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
const u64 last_committed = fs_info->last_trans_committed;
|
|
||||||
struct extent_buffer *leaf = path->nodes[0];
|
struct extent_buffer *leaf = path->nodes[0];
|
||||||
int slot = path->slots[0];
|
int slot = path->slots[0];
|
||||||
struct btrfs_key search_key;
|
struct btrfs_key search_key;
|
||||||
|
@ -5847,7 +5872,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
|
||||||
if (IS_ERR(inode))
|
if (IS_ERR(inode))
|
||||||
return PTR_ERR(inode);
|
return PTR_ERR(inode);
|
||||||
|
|
||||||
if (BTRFS_I(inode)->generation > last_committed)
|
if (BTRFS_I(inode)->generation >= trans->transid)
|
||||||
ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
|
ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
|
||||||
LOG_INODE_EXISTS, ctx);
|
LOG_INODE_EXISTS, ctx);
|
||||||
btrfs_add_delayed_iput(inode);
|
btrfs_add_delayed_iput(inode);
|
||||||
|
@ -5888,7 +5913,6 @@ static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_log_ctx *ctx)
|
struct btrfs_log_ctx *ctx)
|
||||||
{
|
{
|
||||||
struct btrfs_root *root = inode->root;
|
struct btrfs_root *root = inode->root;
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
|
||||||
struct dentry *old_parent = NULL;
|
struct dentry *old_parent = NULL;
|
||||||
struct super_block *sb = inode->vfs_inode.i_sb;
|
struct super_block *sb = inode->vfs_inode.i_sb;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
@ -5902,7 +5926,7 @@ static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
|
||||||
if (root != inode->root)
|
if (root != inode->root)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (inode->generation > fs_info->last_trans_committed) {
|
if (inode->generation >= trans->transid) {
|
||||||
ret = btrfs_log_inode(trans, root, inode,
|
ret = btrfs_log_inode(trans, root, inode,
|
||||||
LOG_INODE_EXISTS, ctx);
|
LOG_INODE_EXISTS, ctx);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -6019,7 +6043,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
struct super_block *sb;
|
struct super_block *sb;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
u64 last_committed = fs_info->last_trans_committed;
|
|
||||||
bool log_dentries = false;
|
bool log_dentries = false;
|
||||||
|
|
||||||
sb = inode->vfs_inode.i_sb;
|
sb = inode->vfs_inode.i_sb;
|
||||||
|
@ -6029,23 +6052,12 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
|
||||||
goto end_no_trans;
|
goto end_no_trans;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* The prev transaction commit doesn't complete, we need do
|
|
||||||
* full commit by ourselves.
|
|
||||||
*/
|
|
||||||
if (fs_info->last_trans_log_full_commit >
|
|
||||||
fs_info->last_trans_committed) {
|
|
||||||
ret = 1;
|
|
||||||
goto end_no_trans;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (btrfs_root_refs(&root->root_item) == 0) {
|
if (btrfs_root_refs(&root->root_item) == 0) {
|
||||||
ret = 1;
|
ret = 1;
|
||||||
goto end_no_trans;
|
goto end_no_trans;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = check_parent_dirs_for_sync(trans, inode, parent, sb,
|
ret = check_parent_dirs_for_sync(trans, inode, parent, sb);
|
||||||
last_committed);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
goto end_no_trans;
|
goto end_no_trans;
|
||||||
|
|
||||||
|
@ -6075,8 +6087,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
|
||||||
* and other fun in this file.
|
* and other fun in this file.
|
||||||
*/
|
*/
|
||||||
if (S_ISREG(inode->vfs_inode.i_mode) &&
|
if (S_ISREG(inode->vfs_inode.i_mode) &&
|
||||||
inode->generation <= last_committed &&
|
inode->generation < trans->transid &&
|
||||||
inode->last_unlink_trans <= last_committed) {
|
inode->last_unlink_trans < trans->transid) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto end_trans;
|
goto end_trans;
|
||||||
}
|
}
|
||||||
|
@ -6125,7 +6137,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
|
||||||
* but the file inode does not have a matching BTRFS_INODE_REF_KEY item
|
* but the file inode does not have a matching BTRFS_INODE_REF_KEY item
|
||||||
* and has a link count of 2.
|
* and has a link count of 2.
|
||||||
*/
|
*/
|
||||||
if (inode->last_unlink_trans > last_committed) {
|
if (inode->last_unlink_trans >= trans->transid) {
|
||||||
ret = btrfs_log_all_parents(trans, inode, ctx);
|
ret = btrfs_log_all_parents(trans, inode, ctx);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto end_trans;
|
goto end_trans;
|
||||||
|
@ -6434,7 +6446,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_inode *inode, struct btrfs_inode *old_dir,
|
struct btrfs_inode *inode, struct btrfs_inode *old_dir,
|
||||||
struct dentry *parent)
|
struct dentry *parent)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
|
||||||
struct btrfs_log_ctx ctx;
|
struct btrfs_log_ctx ctx;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -6448,8 +6459,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
|
||||||
* if this inode hasn't been logged and directory we're renaming it
|
* if this inode hasn't been logged and directory we're renaming it
|
||||||
* from hasn't been logged, we don't need to log it
|
* from hasn't been logged, we don't need to log it
|
||||||
*/
|
*/
|
||||||
if (inode->logged_trans <= fs_info->last_trans_committed &&
|
if (inode->logged_trans < trans->transid &&
|
||||||
(!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
|
(!old_dir || old_dir->logged_trans < trans->transid))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
|
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
|
||||||
|
|
|
@ -129,8 +129,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
|
||||||
} else {
|
} else {
|
||||||
btrfs_warn(fs_info,
|
btrfs_warn(fs_info,
|
||||||
"insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!",
|
"insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!",
|
||||||
ret, (unsigned long long)key.objectid,
|
ret, key.objectid, key.offset, type);
|
||||||
(unsigned long long)key.offset, type);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include "space-info.h"
|
#include "space-info.h"
|
||||||
#include "block-group.h"
|
#include "block-group.h"
|
||||||
#include "discard.h"
|
#include "discard.h"
|
||||||
|
#include "zoned.h"
|
||||||
|
|
||||||
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
||||||
[BTRFS_RAID_RAID10] = {
|
[BTRFS_RAID_RAID10] = {
|
||||||
|
@ -374,6 +375,7 @@ void btrfs_free_device(struct btrfs_device *device)
|
||||||
rcu_string_free(device->name);
|
rcu_string_free(device->name);
|
||||||
extent_io_tree_release(&device->alloc_state);
|
extent_io_tree_release(&device->alloc_state);
|
||||||
bio_put(device->flush_bio);
|
bio_put(device->flush_bio);
|
||||||
|
btrfs_destroy_dev_zone_info(device);
|
||||||
kfree(device);
|
kfree(device);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -667,6 +669,10 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
|
||||||
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
||||||
device->mode = flags;
|
device->mode = flags;
|
||||||
|
|
||||||
|
ret = btrfs_get_dev_zone_info(device);
|
||||||
|
if (ret != 0)
|
||||||
|
goto error_free_page;
|
||||||
|
|
||||||
fs_devices->open_devices++;
|
fs_devices->open_devices++;
|
||||||
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
|
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
|
||||||
device->devid != BTRFS_DEV_REPLACE_DEVID) {
|
device->devid != BTRFS_DEV_REPLACE_DEVID) {
|
||||||
|
@ -822,7 +828,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||||
} else {
|
} else {
|
||||||
mutex_lock(&fs_devices->device_list_mutex);
|
mutex_lock(&fs_devices->device_list_mutex);
|
||||||
device = btrfs_find_device(fs_devices, devid,
|
device = btrfs_find_device(fs_devices, devid,
|
||||||
disk_super->dev_item.uuid, NULL, false);
|
disk_super->dev_item.uuid, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this disk has been pulled into an fs devices created by
|
* If this disk has been pulled into an fs devices created by
|
||||||
|
@ -1044,7 +1050,7 @@ error:
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
|
static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
|
||||||
int step, struct btrfs_device **latest_dev)
|
struct btrfs_device **latest_dev)
|
||||||
{
|
{
|
||||||
struct btrfs_device *device, *next;
|
struct btrfs_device *device, *next;
|
||||||
|
|
||||||
|
@ -1089,16 +1095,16 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
|
||||||
* After we have read the system tree and know devids belonging to this
|
* After we have read the system tree and know devids belonging to this
|
||||||
* filesystem, remove the device which does not belong there.
|
* filesystem, remove the device which does not belong there.
|
||||||
*/
|
*/
|
||||||
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
|
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices)
|
||||||
{
|
{
|
||||||
struct btrfs_device *latest_dev = NULL;
|
struct btrfs_device *latest_dev = NULL;
|
||||||
struct btrfs_fs_devices *seed_dev;
|
struct btrfs_fs_devices *seed_dev;
|
||||||
|
|
||||||
mutex_lock(&uuid_mutex);
|
mutex_lock(&uuid_mutex);
|
||||||
__btrfs_free_extra_devids(fs_devices, step, &latest_dev);
|
__btrfs_free_extra_devids(fs_devices, &latest_dev);
|
||||||
|
|
||||||
list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list)
|
list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list)
|
||||||
__btrfs_free_extra_devids(seed_dev, step, &latest_dev);
|
__btrfs_free_extra_devids(seed_dev, &latest_dev);
|
||||||
|
|
||||||
fs_devices->latest_bdev = latest_dev->bdev;
|
fs_devices->latest_bdev = latest_dev->bdev;
|
||||||
|
|
||||||
|
@ -1137,6 +1143,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
|
||||||
device->bdev = NULL;
|
device->bdev = NULL;
|
||||||
}
|
}
|
||||||
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||||
|
btrfs_destroy_dev_zone_info(device);
|
||||||
|
|
||||||
device->fs_info = NULL;
|
device->fs_info = NULL;
|
||||||
atomic_set(&device->dev_stats_ccnt, 0);
|
atomic_set(&device->dev_stats_ccnt, 0);
|
||||||
|
@ -1217,6 +1224,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
|
||||||
fs_devices->latest_bdev = latest_dev->bdev;
|
fs_devices->latest_bdev = latest_dev->bdev;
|
||||||
fs_devices->total_rw_bytes = 0;
|
fs_devices->total_rw_bytes = 0;
|
||||||
fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
|
fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
|
||||||
|
fs_devices->read_policy = BTRFS_READ_POLICY_PID;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1268,7 +1276,7 @@ void btrfs_release_disk_super(struct btrfs_super_block *super)
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
|
static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
|
||||||
u64 bytenr)
|
u64 bytenr, u64 bytenr_orig)
|
||||||
{
|
{
|
||||||
struct btrfs_super_block *disk_super;
|
struct btrfs_super_block *disk_super;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
@ -1299,7 +1307,7 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
|
||||||
/* align our pointer to the offset of the super block */
|
/* align our pointer to the offset of the super block */
|
||||||
disk_super = p + offset_in_page(bytenr);
|
disk_super = p + offset_in_page(bytenr);
|
||||||
|
|
||||||
if (btrfs_super_bytenr(disk_super) != bytenr ||
|
if (btrfs_super_bytenr(disk_super) != bytenr_orig ||
|
||||||
btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
|
btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
|
||||||
btrfs_release_disk_super(p);
|
btrfs_release_disk_super(p);
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
@ -1334,7 +1342,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
|
||||||
bool new_device_added = false;
|
bool new_device_added = false;
|
||||||
struct btrfs_device *device = NULL;
|
struct btrfs_device *device = NULL;
|
||||||
struct block_device *bdev;
|
struct block_device *bdev;
|
||||||
u64 bytenr;
|
u64 bytenr, bytenr_orig;
|
||||||
|
int ret;
|
||||||
|
|
||||||
lockdep_assert_held(&uuid_mutex);
|
lockdep_assert_held(&uuid_mutex);
|
||||||
|
|
||||||
|
@ -1344,14 +1353,18 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
|
||||||
* So, we need to add a special mount option to scan for
|
* So, we need to add a special mount option to scan for
|
||||||
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
|
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
|
||||||
*/
|
*/
|
||||||
bytenr = btrfs_sb_offset(0);
|
|
||||||
flags |= FMODE_EXCL;
|
flags |= FMODE_EXCL;
|
||||||
|
|
||||||
bdev = blkdev_get_by_path(path, flags, holder);
|
bdev = blkdev_get_by_path(path, flags, holder);
|
||||||
if (IS_ERR(bdev))
|
if (IS_ERR(bdev))
|
||||||
return ERR_CAST(bdev);
|
return ERR_CAST(bdev);
|
||||||
|
|
||||||
disk_super = btrfs_read_disk_super(bdev, bytenr);
|
bytenr_orig = btrfs_sb_offset(0);
|
||||||
|
ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
|
||||||
|
if (ret)
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
|
||||||
|
disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
|
||||||
if (IS_ERR(disk_super)) {
|
if (IS_ERR(disk_super)) {
|
||||||
device = ERR_CAST(disk_super);
|
device = ERR_CAST(disk_super);
|
||||||
goto error_bdev_put;
|
goto error_bdev_put;
|
||||||
|
@ -2015,6 +2028,11 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
|
||||||
if (IS_ERR(disk_super))
|
if (IS_ERR(disk_super))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (bdev_is_zoned(bdev)) {
|
||||||
|
btrfs_reset_sb_log_zones(bdev, copy_num);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
memset(&disk_super->magic, 0, sizeof(disk_super->magic));
|
memset(&disk_super->magic, 0, sizeof(disk_super->magic));
|
||||||
|
|
||||||
page = virt_to_page(disk_super);
|
page = virt_to_page(disk_super);
|
||||||
|
@ -2293,10 +2311,10 @@ static struct btrfs_device *btrfs_find_device_by_path(
|
||||||
dev_uuid = disk_super->dev_item.uuid;
|
dev_uuid = disk_super->dev_item.uuid;
|
||||||
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
|
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
|
||||||
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
||||||
disk_super->metadata_uuid, true);
|
disk_super->metadata_uuid);
|
||||||
else
|
else
|
||||||
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
||||||
disk_super->fsid, true);
|
disk_super->fsid);
|
||||||
|
|
||||||
btrfs_release_disk_super(disk_super);
|
btrfs_release_disk_super(disk_super);
|
||||||
if (!device)
|
if (!device)
|
||||||
|
@ -2316,7 +2334,7 @@ struct btrfs_device *btrfs_find_device_by_devspec(
|
||||||
|
|
||||||
if (devid) {
|
if (devid) {
|
||||||
device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
|
device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
|
||||||
NULL, true);
|
NULL);
|
||||||
if (!device)
|
if (!device)
|
||||||
return ERR_PTR(-ENOENT);
|
return ERR_PTR(-ENOENT);
|
||||||
return device;
|
return device;
|
||||||
|
@ -2465,7 +2483,7 @@ next_slot:
|
||||||
read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
|
read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
|
||||||
BTRFS_FSID_SIZE);
|
BTRFS_FSID_SIZE);
|
||||||
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
||||||
fs_uuid, true);
|
fs_uuid);
|
||||||
BUG_ON(!device); /* Logic error */
|
BUG_ON(!device); /* Logic error */
|
||||||
|
|
||||||
if (device->fs_devices->seeding) {
|
if (device->fs_devices->seeding) {
|
||||||
|
@ -2507,6 +2525,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||||
if (IS_ERR(bdev))
|
if (IS_ERR(bdev))
|
||||||
return PTR_ERR(bdev);
|
return PTR_ERR(bdev);
|
||||||
|
|
||||||
|
if (!btrfs_check_device_zone_type(fs_info, bdev)) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
if (fs_devices->seeding) {
|
if (fs_devices->seeding) {
|
||||||
seeding_dev = 1;
|
seeding_dev = 1;
|
||||||
down_write(&sb->s_umount);
|
down_write(&sb->s_umount);
|
||||||
|
@ -2540,10 +2563,17 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||||
}
|
}
|
||||||
rcu_assign_pointer(device->name, name);
|
rcu_assign_pointer(device->name, name);
|
||||||
|
|
||||||
|
device->fs_info = fs_info;
|
||||||
|
device->bdev = bdev;
|
||||||
|
|
||||||
|
ret = btrfs_get_dev_zone_info(device);
|
||||||
|
if (ret)
|
||||||
|
goto error_free_device;
|
||||||
|
|
||||||
trans = btrfs_start_transaction(root, 0);
|
trans = btrfs_start_transaction(root, 0);
|
||||||
if (IS_ERR(trans)) {
|
if (IS_ERR(trans)) {
|
||||||
ret = PTR_ERR(trans);
|
ret = PTR_ERR(trans);
|
||||||
goto error_free_device;
|
goto error_free_zone;
|
||||||
}
|
}
|
||||||
|
|
||||||
q = bdev_get_queue(bdev);
|
q = bdev_get_queue(bdev);
|
||||||
|
@ -2556,8 +2586,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||||
fs_info->sectorsize);
|
fs_info->sectorsize);
|
||||||
device->disk_total_bytes = device->total_bytes;
|
device->disk_total_bytes = device->total_bytes;
|
||||||
device->commit_total_bytes = device->total_bytes;
|
device->commit_total_bytes = device->total_bytes;
|
||||||
device->fs_info = fs_info;
|
|
||||||
device->bdev = bdev;
|
|
||||||
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
||||||
clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
|
clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
|
||||||
device->mode = FMODE_EXCL;
|
device->mode = FMODE_EXCL;
|
||||||
|
@ -2704,6 +2732,8 @@ error_trans:
|
||||||
sb->s_flags |= SB_RDONLY;
|
sb->s_flags |= SB_RDONLY;
|
||||||
if (trans)
|
if (trans)
|
||||||
btrfs_end_transaction(trans);
|
btrfs_end_transaction(trans);
|
||||||
|
error_free_zone:
|
||||||
|
btrfs_destroy_dev_zone_info(device);
|
||||||
error_free_device:
|
error_free_device:
|
||||||
btrfs_free_device(device);
|
btrfs_free_device(device);
|
||||||
error:
|
error:
|
||||||
|
@ -5479,7 +5509,18 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
|
||||||
else
|
else
|
||||||
num_stripes = map->num_stripes;
|
num_stripes = map->num_stripes;
|
||||||
|
|
||||||
preferred_mirror = first + current->pid % num_stripes;
|
switch (fs_info->fs_devices->read_policy) {
|
||||||
|
default:
|
||||||
|
/* Shouldn't happen, just warn and use pid instead of failing */
|
||||||
|
btrfs_warn_rl(fs_info,
|
||||||
|
"unknown read_policy type %u, reset to pid",
|
||||||
|
fs_info->fs_devices->read_policy);
|
||||||
|
fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID;
|
||||||
|
fallthrough;
|
||||||
|
case BTRFS_READ_POLICY_PID:
|
||||||
|
preferred_mirror = first + (current->pid % num_stripes);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (dev_replace_is_ongoing &&
|
if (dev_replace_is_ongoing &&
|
||||||
fs_info->dev_replace.cont_reading_from_srcdev_mode ==
|
fs_info->dev_replace.cont_reading_from_srcdev_mode ==
|
||||||
|
@ -6335,7 +6376,7 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
|
||||||
bio->bi_iter.bi_sector = physical >> 9;
|
bio->bi_iter.bi_sector = physical >> 9;
|
||||||
btrfs_debug_in_rcu(fs_info,
|
btrfs_debug_in_rcu(fs_info,
|
||||||
"btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
|
"btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
|
||||||
bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
|
bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
|
||||||
(unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
|
(unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
|
||||||
dev->devid, bio->bi_iter.bi_size);
|
dev->devid, bio->bi_iter.bi_size);
|
||||||
bio_set_dev(bio, dev->bdev);
|
bio_set_dev(bio, dev->bdev);
|
||||||
|
@ -6367,7 +6408,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||||
{
|
{
|
||||||
struct btrfs_device *dev;
|
struct btrfs_device *dev;
|
||||||
struct bio *first_bio = bio;
|
struct bio *first_bio = bio;
|
||||||
u64 logical = (u64)bio->bi_iter.bi_sector << 9;
|
u64 logical = bio->bi_iter.bi_sector << 9;
|
||||||
u64 length = 0;
|
u64 length = 0;
|
||||||
u64 map_length;
|
u64 map_length;
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -6447,8 +6488,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||||
* If @seed is true, traverse through the seed devices.
|
* If @seed is true, traverse through the seed devices.
|
||||||
*/
|
*/
|
||||||
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
|
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
|
||||||
u64 devid, u8 *uuid, u8 *fsid,
|
u64 devid, u8 *uuid, u8 *fsid)
|
||||||
bool seed)
|
|
||||||
{
|
{
|
||||||
struct btrfs_device *device;
|
struct btrfs_device *device;
|
||||||
struct btrfs_fs_devices *seed_devs;
|
struct btrfs_fs_devices *seed_devs;
|
||||||
|
@ -6655,7 +6695,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
|
||||||
btrfs_stripe_dev_uuid_nr(chunk, i),
|
btrfs_stripe_dev_uuid_nr(chunk, i),
|
||||||
BTRFS_UUID_SIZE);
|
BTRFS_UUID_SIZE);
|
||||||
map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
|
map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
|
||||||
devid, uuid, NULL, true);
|
devid, uuid, NULL);
|
||||||
if (!map->stripes[i].dev &&
|
if (!map->stripes[i].dev &&
|
||||||
!btrfs_test_opt(fs_info, DEGRADED)) {
|
!btrfs_test_opt(fs_info, DEGRADED)) {
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
@ -6794,7 +6834,7 @@ static int read_one_dev(struct extent_buffer *leaf,
|
||||||
}
|
}
|
||||||
|
|
||||||
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
|
||||||
fs_uuid, true);
|
fs_uuid);
|
||||||
if (!device) {
|
if (!device) {
|
||||||
if (!btrfs_test_opt(fs_info, DEGRADED)) {
|
if (!btrfs_test_opt(fs_info, DEGRADED)) {
|
||||||
btrfs_report_missing_device(fs_info, devid,
|
btrfs_report_missing_device(fs_info, devid,
|
||||||
|
@ -6857,6 +6897,16 @@ static int read_one_dev(struct extent_buffer *leaf,
|
||||||
}
|
}
|
||||||
|
|
||||||
fill_device_from_item(leaf, dev_item, device);
|
fill_device_from_item(leaf, dev_item, device);
|
||||||
|
if (device->bdev) {
|
||||||
|
u64 max_total_bytes = i_size_read(device->bdev->bd_inode);
|
||||||
|
|
||||||
|
if (device->total_bytes > max_total_bytes) {
|
||||||
|
btrfs_err(fs_info,
|
||||||
|
"device total_bytes should be at most %llu but found %llu",
|
||||||
|
max_total_bytes, device->total_bytes);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
||||||
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
|
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
|
||||||
!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
|
!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
|
||||||
|
@ -6891,11 +6941,11 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
|
||||||
* fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
|
* fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
|
||||||
* overallocate but we can keep it as-is, only the first page is used.
|
* overallocate but we can keep it as-is, only the first page is used.
|
||||||
*/
|
*/
|
||||||
sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET);
|
sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET,
|
||||||
|
root->root_key.objectid, 0);
|
||||||
if (IS_ERR(sb))
|
if (IS_ERR(sb))
|
||||||
return PTR_ERR(sb);
|
return PTR_ERR(sb);
|
||||||
set_extent_buffer_uptodate(sb);
|
set_extent_buffer_uptodate(sb);
|
||||||
btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
|
|
||||||
/*
|
/*
|
||||||
* The sb extent buffer is artificial and just used to read the system array.
|
* The sb extent buffer is artificial and just used to read the system array.
|
||||||
* set_extent_buffer_uptodate() call does not properly mark all it's
|
* set_extent_buffer_uptodate() call does not properly mark all it's
|
||||||
|
@ -7059,12 +7109,8 @@ static void readahead_tree_node_children(struct extent_buffer *node)
|
||||||
int i;
|
int i;
|
||||||
const int nr_items = btrfs_header_nritems(node);
|
const int nr_items = btrfs_header_nritems(node);
|
||||||
|
|
||||||
for (i = 0; i < nr_items; i++) {
|
for (i = 0; i < nr_items; i++)
|
||||||
u64 start;
|
btrfs_readahead_node_child(node, i);
|
||||||
|
|
||||||
start = btrfs_node_blockptr(node, i);
|
|
||||||
readahead_tree_block(node->fs_info, start);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
|
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
|
||||||
|
@ -7451,8 +7497,7 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
mutex_lock(&fs_devices->device_list_mutex);
|
mutex_lock(&fs_devices->device_list_mutex);
|
||||||
dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL,
|
dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL);
|
||||||
true);
|
|
||||||
mutex_unlock(&fs_devices->device_list_mutex);
|
mutex_unlock(&fs_devices->device_list_mutex);
|
||||||
|
|
||||||
if (!dev) {
|
if (!dev) {
|
||||||
|
@ -7583,28 +7628,13 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Make sure no dev extent is beyond device bondary */
|
/* Make sure no dev extent is beyond device bondary */
|
||||||
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
|
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
|
||||||
if (!dev) {
|
if (!dev) {
|
||||||
btrfs_err(fs_info, "failed to find devid %llu", devid);
|
btrfs_err(fs_info, "failed to find devid %llu", devid);
|
||||||
ret = -EUCLEAN;
|
ret = -EUCLEAN;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* It's possible this device is a dummy for seed device */
|
|
||||||
if (dev->disk_total_bytes == 0) {
|
|
||||||
struct btrfs_fs_devices *devs;
|
|
||||||
|
|
||||||
devs = list_first_entry(&fs_info->fs_devices->seed_list,
|
|
||||||
struct btrfs_fs_devices, seed_list);
|
|
||||||
dev = btrfs_find_device(devs, devid, NULL, NULL, false);
|
|
||||||
if (!dev) {
|
|
||||||
btrfs_err(fs_info, "failed to find seed devid %llu",
|
|
||||||
devid);
|
|
||||||
ret = -EUCLEAN;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (physical_offset + physical_len > dev->disk_total_bytes) {
|
if (physical_offset + physical_len > dev->disk_total_bytes) {
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
|
"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
|
||||||
|
@ -7659,6 +7689,19 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
|
||||||
u64 prev_dev_ext_end = 0;
|
u64 prev_dev_ext_end = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't have a dev_root because we mounted with ignorebadroots and
|
||||||
|
* failed to load the root, so we want to skip the verification in this
|
||||||
|
* case for sure.
|
||||||
|
*
|
||||||
|
* However if the dev root is fine, but the tree itself is corrupted
|
||||||
|
* we'd still fail to mount. This verification is only to make sure
|
||||||
|
* writes can happen safely, so instead just bypass this check
|
||||||
|
* completely in the case of IGNOREBADROOTS.
|
||||||
|
*/
|
||||||
|
if (btrfs_test_opt(fs_info, IGNOREBADROOTS))
|
||||||
|
return 0;
|
||||||
|
|
||||||
key.objectid = 1;
|
key.objectid = 1;
|
||||||
key.type = BTRFS_DEV_EXTENT_KEY;
|
key.type = BTRFS_DEV_EXTENT_KEY;
|
||||||
key.offset = 0;
|
key.offset = 0;
|
||||||
|
|
|
@ -52,6 +52,8 @@ struct btrfs_io_geometry {
|
||||||
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
|
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
|
||||||
#define BTRFS_DEV_STATE_NO_READA (5)
|
#define BTRFS_DEV_STATE_NO_READA (5)
|
||||||
|
|
||||||
|
struct btrfs_zoned_device_info;
|
||||||
|
|
||||||
struct btrfs_device {
|
struct btrfs_device {
|
||||||
struct list_head dev_list; /* device_list_mutex */
|
struct list_head dev_list; /* device_list_mutex */
|
||||||
struct list_head dev_alloc_list; /* chunk mutex */
|
struct list_head dev_alloc_list; /* chunk mutex */
|
||||||
|
@ -65,6 +67,8 @@ struct btrfs_device {
|
||||||
|
|
||||||
struct block_device *bdev;
|
struct block_device *bdev;
|
||||||
|
|
||||||
|
struct btrfs_zoned_device_info *zone_info;
|
||||||
|
|
||||||
/* the mode sent to blkdev_get */
|
/* the mode sent to blkdev_get */
|
||||||
fmode_t mode;
|
fmode_t mode;
|
||||||
|
|
||||||
|
@ -211,6 +215,16 @@ enum btrfs_chunk_allocation_policy {
|
||||||
BTRFS_CHUNK_ALLOC_REGULAR,
|
BTRFS_CHUNK_ALLOC_REGULAR,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read policies for mirrored block group profiles, read picks the stripe based
|
||||||
|
* on these policies.
|
||||||
|
*/
|
||||||
|
enum btrfs_read_policy {
|
||||||
|
/* Use process PID to choose the stripe */
|
||||||
|
BTRFS_READ_POLICY_PID,
|
||||||
|
BTRFS_NR_READ_POLICY,
|
||||||
|
};
|
||||||
|
|
||||||
struct btrfs_fs_devices {
|
struct btrfs_fs_devices {
|
||||||
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
|
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
|
||||||
u8 metadata_uuid[BTRFS_FSID_SIZE];
|
u8 metadata_uuid[BTRFS_FSID_SIZE];
|
||||||
|
@ -264,6 +278,9 @@ struct btrfs_fs_devices {
|
||||||
struct completion kobj_unregister;
|
struct completion kobj_unregister;
|
||||||
|
|
||||||
enum btrfs_chunk_allocation_policy chunk_alloc_policy;
|
enum btrfs_chunk_allocation_policy chunk_alloc_policy;
|
||||||
|
|
||||||
|
/* Policy used to read the mirrored stripes */
|
||||||
|
enum btrfs_read_policy read_policy;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
|
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
|
||||||
|
@ -436,7 +453,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path,
|
||||||
fmode_t flags, void *holder);
|
fmode_t flags, void *holder);
|
||||||
int btrfs_forget_devices(const char *path);
|
int btrfs_forget_devices(const char *path);
|
||||||
void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
|
void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
|
||||||
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
|
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
|
||||||
void btrfs_assign_next_active_device(struct btrfs_device *device,
|
void btrfs_assign_next_active_device(struct btrfs_device *device,
|
||||||
struct btrfs_device *this_dev);
|
struct btrfs_device *this_dev);
|
||||||
struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info,
|
struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info,
|
||||||
|
@ -453,7 +470,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
|
||||||
int btrfs_grow_device(struct btrfs_trans_handle *trans,
|
int btrfs_grow_device(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_device *device, u64 new_size);
|
struct btrfs_device *device, u64 new_size);
|
||||||
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
|
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
|
||||||
u64 devid, u8 *uuid, u8 *fsid, bool seed);
|
u64 devid, u8 *uuid, u8 *fsid);
|
||||||
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
|
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
|
||||||
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
|
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
|
||||||
int btrfs_balance(struct btrfs_fs_info *fs_info,
|
int btrfs_balance(struct btrfs_fs_info *fs_info,
|
||||||
|
|
|
@ -213,9 +213,11 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
if (!ret)
|
if (!ret) {
|
||||||
set_bit(BTRFS_INODE_COPY_EVERYTHING,
|
set_bit(BTRFS_INODE_COPY_EVERYTHING,
|
||||||
&BTRFS_I(inode)->runtime_flags);
|
&BTRFS_I(inode)->runtime_flags);
|
||||||
|
clear_bit(BTRFS_INODE_NO_XATTRS, &BTRFS_I(inode)->runtime_flags);
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,7 +241,7 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
|
||||||
|
|
||||||
inode_inc_iversion(inode);
|
inode_inc_iversion(inode);
|
||||||
inode->i_ctime = current_time(inode);
|
inode->i_ctime = current_time(inode);
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
out:
|
out:
|
||||||
btrfs_end_transaction(trans);
|
btrfs_end_transaction(trans);
|
||||||
|
@ -390,7 +392,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
inode_inc_iversion(inode);
|
inode_inc_iversion(inode);
|
||||||
inode->i_ctime = current_time(inode);
|
inode->i_ctime = current_time(inode);
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,616 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
#include "ctree.h"
|
||||||
|
#include "volumes.h"
|
||||||
|
#include "zoned.h"
|
||||||
|
#include "rcu-string.h"
|
||||||
|
|
||||||
|
/* Maximum number of zones to report per blkdev_report_zones() call */
|
||||||
|
#define BTRFS_REPORT_NR_ZONES 4096
|
||||||
|
|
||||||
|
/* Number of superblock log zones */
|
||||||
|
#define BTRFS_NR_SB_LOG_ZONES 2
|
||||||
|
|
||||||
|
static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data)
|
||||||
|
{
|
||||||
|
struct blk_zone *zones = data;
|
||||||
|
|
||||||
|
memcpy(&zones[idx], zone, sizeof(*zone));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
|
||||||
|
u64 *wp_ret)
|
||||||
|
{
|
||||||
|
bool empty[BTRFS_NR_SB_LOG_ZONES];
|
||||||
|
bool full[BTRFS_NR_SB_LOG_ZONES];
|
||||||
|
sector_t sector;
|
||||||
|
|
||||||
|
ASSERT(zones[0].type != BLK_ZONE_TYPE_CONVENTIONAL &&
|
||||||
|
zones[1].type != BLK_ZONE_TYPE_CONVENTIONAL);
|
||||||
|
|
||||||
|
empty[0] = (zones[0].cond == BLK_ZONE_COND_EMPTY);
|
||||||
|
empty[1] = (zones[1].cond == BLK_ZONE_COND_EMPTY);
|
||||||
|
full[0] = (zones[0].cond == BLK_ZONE_COND_FULL);
|
||||||
|
full[1] = (zones[1].cond == BLK_ZONE_COND_FULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Possible states of log buffer zones
|
||||||
|
*
|
||||||
|
* Empty[0] In use[0] Full[0]
|
||||||
|
* Empty[1] * x 0
|
||||||
|
* In use[1] 0 x 0
|
||||||
|
* Full[1] 1 1 C
|
||||||
|
*
|
||||||
|
* Log position:
|
||||||
|
* *: Special case, no superblock is written
|
||||||
|
* 0: Use write pointer of zones[0]
|
||||||
|
* 1: Use write pointer of zones[1]
|
||||||
|
* C: Compare super blcoks from zones[0] and zones[1], use the latest
|
||||||
|
* one determined by generation
|
||||||
|
* x: Invalid state
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (empty[0] && empty[1]) {
|
||||||
|
/* Special case to distinguish no superblock to read */
|
||||||
|
*wp_ret = zones[0].start << SECTOR_SHIFT;
|
||||||
|
return -ENOENT;
|
||||||
|
} else if (full[0] && full[1]) {
|
||||||
|
/* Compare two super blocks */
|
||||||
|
struct address_space *mapping = bdev->bd_inode->i_mapping;
|
||||||
|
struct page *page[BTRFS_NR_SB_LOG_ZONES];
|
||||||
|
struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
|
||||||
|
u64 bytenr;
|
||||||
|
|
||||||
|
bytenr = ((zones[i].start + zones[i].len)
|
||||||
|
<< SECTOR_SHIFT) - BTRFS_SUPER_INFO_SIZE;
|
||||||
|
|
||||||
|
page[i] = read_cache_page_gfp(mapping,
|
||||||
|
bytenr >> PAGE_SHIFT, GFP_NOFS);
|
||||||
|
if (IS_ERR(page[i])) {
|
||||||
|
if (i == 1)
|
||||||
|
btrfs_release_disk_super(super[0]);
|
||||||
|
return PTR_ERR(page[i]);
|
||||||
|
}
|
||||||
|
super[i] = page_address(page[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (super[0]->generation > super[1]->generation)
|
||||||
|
sector = zones[1].start;
|
||||||
|
else
|
||||||
|
sector = zones[0].start;
|
||||||
|
|
||||||
|
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++)
|
||||||
|
btrfs_release_disk_super(super[i]);
|
||||||
|
} else if (!full[0] && (empty[1] || full[1])) {
|
||||||
|
sector = zones[0].wp;
|
||||||
|
} else if (full[0]) {
|
||||||
|
sector = zones[1].wp;
|
||||||
|
} else {
|
||||||
|
return -EUCLEAN;
|
||||||
|
}
|
||||||
|
*wp_ret = sector << SECTOR_SHIFT;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following zones are reserved as the circular buffer on ZONED btrfs.
|
||||||
|
* - The primary superblock: zones 0 and 1
|
||||||
|
* - The first copy: zones 16 and 17
|
||||||
|
* - The second copy: zones 1024 or zone at 256GB which is minimum, and
|
||||||
|
* the following one
|
||||||
|
*/
|
||||||
|
static inline u32 sb_zone_number(int shift, int mirror)
|
||||||
|
{
|
||||||
|
ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX);
|
||||||
|
|
||||||
|
switch (mirror) {
|
||||||
|
case 0: return 0;
|
||||||
|
case 1: return 16;
|
||||||
|
case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
|
||||||
|
struct blk_zone *zones, unsigned int *nr_zones)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!*nr_zones)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones,
|
||||||
|
copy_zone_info_cb, zones);
|
||||||
|
if (ret < 0) {
|
||||||
|
btrfs_err_in_rcu(device->fs_info,
|
||||||
|
"zoned: failed to read zone %llu on %s (devid %llu)",
|
||||||
|
pos, rcu_str_deref(device->name),
|
||||||
|
device->devid);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
*nr_zones = ret;
|
||||||
|
if (!ret)
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zone_info = NULL;
|
||||||
|
struct block_device *bdev = device->bdev;
|
||||||
|
struct request_queue *queue = bdev_get_queue(bdev);
|
||||||
|
sector_t nr_sectors;
|
||||||
|
sector_t sector = 0;
|
||||||
|
struct blk_zone *zones = NULL;
|
||||||
|
unsigned int i, nreported = 0, nr_zones;
|
||||||
|
unsigned int zone_sectors;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!bdev_is_zoned(bdev))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (device->zone_info)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL);
|
||||||
|
if (!zone_info)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
nr_sectors = bdev->bd_part->nr_sects;
|
||||||
|
zone_sectors = bdev_zone_sectors(bdev);
|
||||||
|
/* Check if it's power of 2 (see is_power_of_2) */
|
||||||
|
ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0);
|
||||||
|
zone_info->zone_size = zone_sectors << SECTOR_SHIFT;
|
||||||
|
zone_info->zone_size_shift = ilog2(zone_info->zone_size);
|
||||||
|
zone_info->max_zone_append_size =
|
||||||
|
(u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT;
|
||||||
|
zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
|
||||||
|
if (!IS_ALIGNED(nr_sectors, zone_sectors))
|
||||||
|
zone_info->nr_zones++;
|
||||||
|
|
||||||
|
zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
|
||||||
|
if (!zone_info->seq_zones) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
|
||||||
|
if (!zone_info->empty_zones) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
|
||||||
|
if (!zones) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get zones type */
|
||||||
|
while (sector < nr_sectors) {
|
||||||
|
nr_zones = BTRFS_REPORT_NR_ZONES;
|
||||||
|
ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
|
||||||
|
&nr_zones);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
for (i = 0; i < nr_zones; i++) {
|
||||||
|
if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
|
||||||
|
__set_bit(nreported, zone_info->seq_zones);
|
||||||
|
if (zones[i].cond == BLK_ZONE_COND_EMPTY)
|
||||||
|
__set_bit(nreported, zone_info->empty_zones);
|
||||||
|
nreported++;
|
||||||
|
}
|
||||||
|
sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nreported != zone_info->nr_zones) {
|
||||||
|
btrfs_err_in_rcu(device->fs_info,
|
||||||
|
"inconsistent number of zones on %s (%u/%u)",
|
||||||
|
rcu_str_deref(device->name), nreported,
|
||||||
|
zone_info->nr_zones);
|
||||||
|
ret = -EIO;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Validate superblock log */
|
||||||
|
nr_zones = BTRFS_NR_SB_LOG_ZONES;
|
||||||
|
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
|
||||||
|
u32 sb_zone;
|
||||||
|
u64 sb_wp;
|
||||||
|
int sb_pos = BTRFS_NR_SB_LOG_ZONES * i;
|
||||||
|
|
||||||
|
sb_zone = sb_zone_number(zone_info->zone_size_shift, i);
|
||||||
|
if (sb_zone + 1 >= zone_info->nr_zones)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
sector = sb_zone << (zone_info->zone_size_shift - SECTOR_SHIFT);
|
||||||
|
ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT,
|
||||||
|
&zone_info->sb_zones[sb_pos],
|
||||||
|
&nr_zones);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (nr_zones != BTRFS_NR_SB_LOG_ZONES) {
|
||||||
|
btrfs_err_in_rcu(device->fs_info,
|
||||||
|
"zoned: failed to read super block log zone info at devid %llu zone %u",
|
||||||
|
device->devid, sb_zone);
|
||||||
|
ret = -EUCLEAN;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If zones[0] is conventional, always use the beggining of the
|
||||||
|
* zone to record superblock. No need to validate in that case.
|
||||||
|
*/
|
||||||
|
if (zone_info->sb_zones[BTRFS_NR_SB_LOG_ZONES * i].type ==
|
||||||
|
BLK_ZONE_TYPE_CONVENTIONAL)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ret = sb_write_pointer(device->bdev,
|
||||||
|
&zone_info->sb_zones[sb_pos], &sb_wp);
|
||||||
|
if (ret != -ENOENT && ret) {
|
||||||
|
btrfs_err_in_rcu(device->fs_info,
|
||||||
|
"zoned: super block log zone corrupted devid %llu zone %u",
|
||||||
|
device->devid, sb_zone);
|
||||||
|
ret = -EUCLEAN;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
kfree(zones);
|
||||||
|
|
||||||
|
device->zone_info = zone_info;
|
||||||
|
|
||||||
|
/* device->fs_info is not safe to use for printing messages */
|
||||||
|
btrfs_info_in_rcu(NULL,
|
||||||
|
"host-%s zoned block device %s, %u zones of %llu bytes",
|
||||||
|
bdev_zoned_model(bdev) == BLK_ZONED_HM ? "managed" : "aware",
|
||||||
|
rcu_str_deref(device->name), zone_info->nr_zones,
|
||||||
|
zone_info->zone_size);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
out:
|
||||||
|
kfree(zones);
|
||||||
|
bitmap_free(zone_info->empty_zones);
|
||||||
|
bitmap_free(zone_info->seq_zones);
|
||||||
|
kfree(zone_info);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||||
|
|
||||||
|
if (!zone_info)
|
||||||
|
return;
|
||||||
|
|
||||||
|
bitmap_free(zone_info->seq_zones);
|
||||||
|
bitmap_free(zone_info->empty_zones);
|
||||||
|
kfree(zone_info);
|
||||||
|
device->zone_info = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||||
|
struct blk_zone *zone)
|
||||||
|
{
|
||||||
|
unsigned int nr_zones = 1;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = btrfs_get_dev_zones(device, pos, zone, &nr_zones);
|
||||||
|
if (ret != 0 || !nr_zones)
|
||||||
|
return ret ? ret : -EIO;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
|
||||||
|
struct btrfs_device *device;
|
||||||
|
u64 zoned_devices = 0;
|
||||||
|
u64 nr_devices = 0;
|
||||||
|
u64 zone_size = 0;
|
||||||
|
u64 max_zone_append_size = 0;
|
||||||
|
const bool incompat_zoned = btrfs_is_zoned(fs_info);
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
/* Count zoned devices */
|
||||||
|
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||||
|
enum blk_zoned_model model;
|
||||||
|
|
||||||
|
if (!device->bdev)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
model = bdev_zoned_model(device->bdev);
|
||||||
|
if (model == BLK_ZONED_HM ||
|
||||||
|
(model == BLK_ZONED_HA && incompat_zoned)) {
|
||||||
|
struct btrfs_zoned_device_info *zone_info;
|
||||||
|
|
||||||
|
zone_info = device->zone_info;
|
||||||
|
zoned_devices++;
|
||||||
|
if (!zone_size) {
|
||||||
|
zone_size = zone_info->zone_size;
|
||||||
|
} else if (zone_info->zone_size != zone_size) {
|
||||||
|
btrfs_err(fs_info,
|
||||||
|
"zoned: unequal block device zone sizes: have %llu found %llu",
|
||||||
|
device->zone_info->zone_size,
|
||||||
|
zone_size);
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (!max_zone_append_size ||
|
||||||
|
(zone_info->max_zone_append_size &&
|
||||||
|
zone_info->max_zone_append_size < max_zone_append_size))
|
||||||
|
max_zone_append_size =
|
||||||
|
zone_info->max_zone_append_size;
|
||||||
|
}
|
||||||
|
nr_devices++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!zoned_devices && !incompat_zoned)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (!zoned_devices && incompat_zoned) {
|
||||||
|
/* No zoned block device found on ZONED filesystem */
|
||||||
|
btrfs_err(fs_info,
|
||||||
|
"zoned: no zoned devices found on a zoned filesystem");
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (zoned_devices && !incompat_zoned) {
|
||||||
|
btrfs_err(fs_info,
|
||||||
|
"zoned: mode not enabled but zoned device found");
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (zoned_devices != nr_devices) {
|
||||||
|
btrfs_err(fs_info,
|
||||||
|
"zoned: cannot mix zoned and regular devices");
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* stripe_size is always aligned to BTRFS_STRIPE_LEN in
|
||||||
|
* __btrfs_alloc_chunk(). Since we want stripe_len == zone_size,
|
||||||
|
* check the alignment here.
|
||||||
|
*/
|
||||||
|
if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) {
|
||||||
|
btrfs_err(fs_info,
|
||||||
|
"zoned: zone size %llu not aligned to stripe %u",
|
||||||
|
zone_size, BTRFS_STRIPE_LEN);
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
|
||||||
|
btrfs_err(fs_info, "zoned: mixed block groups not supported");
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
fs_info->zone_size = zone_size;
|
||||||
|
fs_info->max_zone_append_size = max_zone_append_size;
|
||||||
|
|
||||||
|
btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size);
|
||||||
|
out:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info)
|
||||||
|
{
|
||||||
|
if (!btrfs_is_zoned(info))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Space cache writing is not COWed. Disable that to avoid write errors
|
||||||
|
* in sequential zones.
|
||||||
|
*/
|
||||||
|
if (btrfs_test_opt(info, SPACE_CACHE)) {
|
||||||
|
btrfs_err(info, "zoned: space cache v1 is not supported");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (btrfs_test_opt(info, NODATACOW)) {
|
||||||
|
btrfs_err(info, "zoned: NODATACOW not supported");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
|
||||||
|
int rw, u64 *bytenr_ret)
|
||||||
|
{
|
||||||
|
u64 wp;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (zones[0].type == BLK_ZONE_TYPE_CONVENTIONAL) {
|
||||||
|
*bytenr_ret = zones[0].start << SECTOR_SHIFT;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = sb_write_pointer(bdev, zones, &wp);
|
||||||
|
if (ret != -ENOENT && ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (rw == WRITE) {
|
||||||
|
struct blk_zone *reset = NULL;
|
||||||
|
|
||||||
|
if (wp == zones[0].start << SECTOR_SHIFT)
|
||||||
|
reset = &zones[0];
|
||||||
|
else if (wp == zones[1].start << SECTOR_SHIFT)
|
||||||
|
reset = &zones[1];
|
||||||
|
|
||||||
|
if (reset && reset->cond != BLK_ZONE_COND_EMPTY) {
|
||||||
|
ASSERT(reset->cond == BLK_ZONE_COND_FULL);
|
||||||
|
|
||||||
|
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
|
||||||
|
reset->start, reset->len,
|
||||||
|
GFP_NOFS);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
reset->cond = BLK_ZONE_COND_EMPTY;
|
||||||
|
reset->wp = reset->start;
|
||||||
|
}
|
||||||
|
} else if (ret != -ENOENT) {
|
||||||
|
/* For READ, we want the precious one */
|
||||||
|
if (wp == zones[0].start << SECTOR_SHIFT)
|
||||||
|
wp = (zones[1].start + zones[1].len) << SECTOR_SHIFT;
|
||||||
|
wp -= BTRFS_SUPER_INFO_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
*bytenr_ret = wp;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
|
||||||
|
u64 *bytenr_ret)
|
||||||
|
{
|
||||||
|
struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES];
|
||||||
|
unsigned int zone_sectors;
|
||||||
|
u32 sb_zone;
|
||||||
|
int ret;
|
||||||
|
u64 zone_size;
|
||||||
|
u8 zone_sectors_shift;
|
||||||
|
sector_t nr_sectors;
|
||||||
|
u32 nr_zones;
|
||||||
|
|
||||||
|
if (!bdev_is_zoned(bdev)) {
|
||||||
|
*bytenr_ret = btrfs_sb_offset(mirror);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT(rw == READ || rw == WRITE);
|
||||||
|
|
||||||
|
zone_sectors = bdev_zone_sectors(bdev);
|
||||||
|
if (!is_power_of_2(zone_sectors))
|
||||||
|
return -EINVAL;
|
||||||
|
zone_size = zone_sectors << SECTOR_SHIFT;
|
||||||
|
zone_sectors_shift = ilog2(zone_sectors);
|
||||||
|
nr_sectors = bdev->bd_part->nr_sects;
|
||||||
|
nr_zones = nr_sectors >> zone_sectors_shift;
|
||||||
|
|
||||||
|
sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror);
|
||||||
|
if (sb_zone + 1 >= nr_zones)
|
||||||
|
return -ENOENT;
|
||||||
|
|
||||||
|
ret = blkdev_report_zones(bdev, sb_zone << zone_sectors_shift,
|
||||||
|
BTRFS_NR_SB_LOG_ZONES, copy_zone_info_cb,
|
||||||
|
zones);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
if (ret != BTRFS_NR_SB_LOG_ZONES)
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
return sb_log_location(bdev, zones, rw, bytenr_ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
|
||||||
|
u64 *bytenr_ret)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zinfo = device->zone_info;
|
||||||
|
u32 zone_num;
|
||||||
|
|
||||||
|
if (!zinfo) {
|
||||||
|
*bytenr_ret = btrfs_sb_offset(mirror);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
zone_num = sb_zone_number(zinfo->zone_size_shift, mirror);
|
||||||
|
if (zone_num + 1 >= zinfo->nr_zones)
|
||||||
|
return -ENOENT;
|
||||||
|
|
||||||
|
return sb_log_location(device->bdev,
|
||||||
|
&zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror],
|
||||||
|
rw, bytenr_ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool is_sb_log_zone(struct btrfs_zoned_device_info *zinfo,
|
||||||
|
int mirror)
|
||||||
|
{
|
||||||
|
u32 zone_num;
|
||||||
|
|
||||||
|
if (!zinfo)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
zone_num = sb_zone_number(zinfo->zone_size_shift, mirror);
|
||||||
|
if (zone_num + 1 >= zinfo->nr_zones)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!test_bit(zone_num, zinfo->seq_zones))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zinfo = device->zone_info;
|
||||||
|
struct blk_zone *zone;
|
||||||
|
|
||||||
|
if (!is_sb_log_zone(zinfo, mirror))
|
||||||
|
return;
|
||||||
|
|
||||||
|
zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror];
|
||||||
|
if (zone->cond != BLK_ZONE_COND_FULL) {
|
||||||
|
if (zone->cond == BLK_ZONE_COND_EMPTY)
|
||||||
|
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||||
|
|
||||||
|
zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT);
|
||||||
|
|
||||||
|
if (zone->wp == zone->start + zone->len)
|
||||||
|
zone->cond = BLK_ZONE_COND_FULL;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
zone++;
|
||||||
|
ASSERT(zone->cond != BLK_ZONE_COND_FULL);
|
||||||
|
if (zone->cond == BLK_ZONE_COND_EMPTY)
|
||||||
|
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||||
|
|
||||||
|
zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT);
|
||||||
|
|
||||||
|
if (zone->wp == zone->start + zone->len)
|
||||||
|
zone->cond = BLK_ZONE_COND_FULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
|
||||||
|
{
|
||||||
|
sector_t zone_sectors;
|
||||||
|
sector_t nr_sectors;
|
||||||
|
u8 zone_sectors_shift;
|
||||||
|
u32 sb_zone;
|
||||||
|
u32 nr_zones;
|
||||||
|
|
||||||
|
zone_sectors = bdev_zone_sectors(bdev);
|
||||||
|
zone_sectors_shift = ilog2(zone_sectors);
|
||||||
|
nr_sectors = bdev->bd_part->nr_sects;
|
||||||
|
nr_zones = nr_sectors >> zone_sectors_shift;
|
||||||
|
|
||||||
|
sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror);
|
||||||
|
if (sb_zone + 1 >= nr_zones)
|
||||||
|
return -ENOENT;
|
||||||
|
|
||||||
|
return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
|
||||||
|
sb_zone << zone_sectors_shift,
|
||||||
|
zone_sectors * BTRFS_NR_SB_LOG_ZONES, GFP_NOFS);
|
||||||
|
}
|
|
@ -0,0 +1,160 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
|
||||||
|
#ifndef BTRFS_ZONED_H
|
||||||
|
#define BTRFS_ZONED_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
#include "volumes.h"
|
||||||
|
#include "disk-io.h"
|
||||||
|
|
||||||
|
struct btrfs_zoned_device_info {
|
||||||
|
/*
|
||||||
|
* Number of zones, zone size and types of zones if bdev is a
|
||||||
|
* zoned block device.
|
||||||
|
*/
|
||||||
|
u64 zone_size;
|
||||||
|
u8 zone_size_shift;
|
||||||
|
u64 max_zone_append_size;
|
||||||
|
u32 nr_zones;
|
||||||
|
unsigned long *seq_zones;
|
||||||
|
unsigned long *empty_zones;
|
||||||
|
struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX];
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_BLK_DEV_ZONED
|
||||||
|
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||||
|
struct blk_zone *zone);
|
||||||
|
int btrfs_get_dev_zone_info(struct btrfs_device *device);
|
||||||
|
void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
|
||||||
|
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info);
|
||||||
|
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info);
|
||||||
|
int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
|
||||||
|
u64 *bytenr_ret);
|
||||||
|
int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
|
||||||
|
u64 *bytenr_ret);
|
||||||
|
void btrfs_advance_sb_log(struct btrfs_device *device, int mirror);
|
||||||
|
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror);
|
||||||
|
#else /* CONFIG_BLK_DEV_ZONED */
|
||||||
|
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||||
|
struct blk_zone *zone)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { }
|
||||||
|
|
||||||
|
static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info)
|
||||||
|
{
|
||||||
|
if (!btrfs_is_zoned(fs_info))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
btrfs_err(fs_info, "zoned block devices support is not enabled");
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int btrfs_sb_log_location_bdev(struct block_device *bdev,
|
||||||
|
int mirror, int rw, u64 *bytenr_ret)
|
||||||
|
{
|
||||||
|
*bytenr_ret = btrfs_sb_offset(mirror);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int btrfs_sb_log_location(struct btrfs_device *device, int mirror,
|
||||||
|
int rw, u64 *bytenr_ret)
|
||||||
|
{
|
||||||
|
*bytenr_ret = btrfs_sb_offset(mirror);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
static inline int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||||
|
|
||||||
|
if (!zone_info)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return test_bit(pos >> zone_info->zone_size_shift, zone_info->seq_zones);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool btrfs_dev_is_empty_zone(struct btrfs_device *device, u64 pos)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||||
|
|
||||||
|
if (!zone_info)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return test_bit(pos >> zone_info->zone_size_shift, zone_info->empty_zones);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void btrfs_dev_set_empty_zone_bit(struct btrfs_device *device,
|
||||||
|
u64 pos, bool set)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||||
|
unsigned int zno;
|
||||||
|
|
||||||
|
if (!zone_info)
|
||||||
|
return;
|
||||||
|
|
||||||
|
zno = pos >> zone_info->zone_size_shift;
|
||||||
|
if (set)
|
||||||
|
set_bit(zno, zone_info->empty_zones);
|
||||||
|
else
|
||||||
|
clear_bit(zno, zone_info->empty_zones);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void btrfs_dev_set_zone_empty(struct btrfs_device *device, u64 pos)
|
||||||
|
{
|
||||||
|
btrfs_dev_set_empty_zone_bit(device, pos, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void btrfs_dev_clear_zone_empty(struct btrfs_device *device, u64 pos)
|
||||||
|
{
|
||||||
|
btrfs_dev_set_empty_zone_bit(device, pos, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool btrfs_check_device_zone_type(const struct btrfs_fs_info *fs_info,
|
||||||
|
struct block_device *bdev)
|
||||||
|
{
|
||||||
|
u64 zone_size;
|
||||||
|
|
||||||
|
if (btrfs_is_zoned(fs_info)) {
|
||||||
|
zone_size = bdev_zone_sectors(bdev) << SECTOR_SHIFT;
|
||||||
|
/* Do not allow non-zoned device */
|
||||||
|
return bdev_is_zoned(bdev) && fs_info->zone_size == zone_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do not allow Host Manged zoned device */
|
||||||
|
return bdev_zoned_model(bdev) != BLK_ZONED_HM;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool btrfs_check_super_location(struct btrfs_device *device, u64 pos)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* On a non-zoned device, any address is OK. On a zoned device,
|
||||||
|
* non-SEQUENTIAL WRITE REQUIRED zones are capable.
|
||||||
|
*/
|
||||||
|
return device->zone_info == NULL || !btrfs_dev_is_sequential(device, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -307,6 +307,7 @@ struct btrfs_ioctl_fs_info_args {
|
||||||
#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
|
#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
|
||||||
#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10)
|
#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10)
|
||||||
#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
|
#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
|
||||||
|
#define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12)
|
||||||
|
|
||||||
struct btrfs_ioctl_feature_flags {
|
struct btrfs_ioctl_feature_flags {
|
||||||
__u64 compat_flags;
|
__u64 compat_flags;
|
||||||
|
|
|
@ -299,7 +299,8 @@
|
||||||
*/
|
*/
|
||||||
#define BTRFS_STRING_ITEM_KEY 253
|
#define BTRFS_STRING_ITEM_KEY 253
|
||||||
|
|
||||||
|
/* Maximum metadata block size (nodesize) */
|
||||||
|
#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
|
||||||
|
|
||||||
/* 32 bytes in various csum fields */
|
/* 32 bytes in various csum fields */
|
||||||
#define BTRFS_CSUM_SIZE 32
|
#define BTRFS_CSUM_SIZE 32
|
||||||
|
|
Загрузка…
Ссылка в новой задаче