for-5.2-rc2-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAlzvsOAACgkQxWXV+ddt WDuLQg/+OHwlNW/8KT+1/gQvAxVnI2bglRJ3lYOQRenR8jA4y3rIKgXWXyd7A/uK acrjeZYMaho5HY5VaKqAqDST7KikR+gPQh1IArYlBcL7tI5c/YsEgqf2G8PXo1U1 9B13og3kWpdIRNIF9OyKUPcGGfnG5UdBDGNFAEuQZpRXbFKJ+8+ijYU0dXIIFdJb scl9vWQWFDoLlZ2szRDbl5gAG0lYwk5q0rTRDt+xyla83gD5UNP5oG8XNp1o/T5+ yDwM81IhQ636n51/NkX5RgFbs0ljjRqVzXJg5pa3XH1w9vwZuWoKRNcUhuDH6j9W wL4Gw33Q8607uk01D5wDdtNI8JTOaXDDYnKsgzNb+7A7ICWlQ/8OR6VZintMioun ccpNY7HMuVdGdRZxE7ZW63LxLyXulZW51r5G2IvBwRfT6aGl+oKwU4AwB6slEId3 S1ftxcCKYHqtCkRAutirjUknuYdzr0LB1sePoiFwQmIN6782fzuLF8O4hxl5Hcd9 UoEgz/240HiTDqsluUmVkurLVUwBk7CoIdec3tPELrCagI7rqG4H2nkj7XXMJiVD XyCJZB0dF3E6G8TzlL5lKQWDniqDrLizYwnxYr6OSYZvp9kzfHgxpTPGdxwbIAjr JT+v6332N09ODooODtzci0Pt0YdfcK1tIhcWXP+oLpE4v/PZj8g= =lyvo -----END PGP SIGNATURE----- Merge tag 'for-5.2-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: "A few more fixes for bugs reported by users, fuzzing tools and regressions: - fix crashes in relocation: + resuming interrupted balance operation does not properly clean up orphan trees + with enabled qgroups, resuming needs to be more careful about block groups due to limited context when updating qgroups - fsync and logging fixes found by fuzzing - incremental send fixes for no-holes and clone - fix spin lock type used in timer function for zstd" * tag 'for-5.2-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: Btrfs: fix race updating log root item during fsync Btrfs: fix wrong ctime and mtime of a directory after log replay Btrfs: fix fsync not persisting changed attributes of a directory btrfs: qgroup: Check bg while resuming relocation to avoid NULL pointer dereference btrfs: reloc: Also queue orphan reloc tree for cleanup to avoid BUG_ON() Btrfs: incremental send, fix emission of invalid clone operations Btrfs: incremental send, fix file corruption when no-holes feature is enabled btrfs: correct zstd workspace manager lock to use spin_lock_bh() btrfs: Ensure replaced device doesn't have pending chunk allocation
This commit is contained in:
Коммит
318adf8e4b
|
@ -603,17 +603,33 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
|||
}
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
WARN_ON(ret);
|
||||
/*
|
||||
* We have to use this loop approach because at this point src_device
|
||||
* has to be available for transaction commit to complete, yet new
|
||||
* chunks shouldn't be allocated on the device.
|
||||
*/
|
||||
while (1) {
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
WARN_ON(ret);
|
||||
|
||||
/* Prevent write_all_supers() during the finishing procedure */
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
/* Prevent new chunks being allocated on the source device */
|
||||
mutex_lock(&fs_info->chunk_mutex);
|
||||
|
||||
if (!list_empty(&src_device->post_commit_list)) {
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* keep away write_all_supers() during the finishing procedure */
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
mutex_lock(&fs_info->chunk_mutex);
|
||||
down_write(&dev_replace->rwsem);
|
||||
dev_replace->replace_state =
|
||||
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
|
||||
|
@ -662,7 +678,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
|||
btrfs_device_set_disk_total_bytes(tgt_device,
|
||||
src_device->disk_total_bytes);
|
||||
btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
|
||||
ASSERT(list_empty(&src_device->post_commit_list));
|
||||
tgt_device->commit_total_bytes = src_device->commit_total_bytes;
|
||||
tgt_device->commit_bytes_used = src_device->bytes_used;
|
||||
|
||||
|
|
|
@ -6433,8 +6433,18 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
|
|||
btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
|
||||
name_len * 2);
|
||||
inode_inc_iversion(&parent_inode->vfs_inode);
|
||||
parent_inode->vfs_inode.i_mtime = parent_inode->vfs_inode.i_ctime =
|
||||
current_time(&parent_inode->vfs_inode);
|
||||
/*
|
||||
* If we are replaying a log tree, we do not want to update the mtime
|
||||
* and ctime of the parent directory with the current time, since the
|
||||
* log replay procedure is responsible for setting them to their correct
|
||||
* values (the ones it had when the fsync was done).
|
||||
*/
|
||||
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
|
||||
struct timespec64 now = current_time(&parent_inode->vfs_inode);
|
||||
|
||||
parent_inode->vfs_inode.i_mtime = now;
|
||||
parent_inode->vfs_inode.i_ctime = now;
|
||||
}
|
||||
ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
|
||||
if (ret)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
|
|
|
@ -3830,7 +3830,13 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
|
|||
subvol_slot);
|
||||
block->last_snapshot = last_snapshot;
|
||||
block->level = level;
|
||||
if (bg->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
|
||||
/*
|
||||
* If we have bg == NULL, we're called from btrfs_recover_relocation(),
|
||||
* no one else can modify tree blocks thus we qgroup will not change
|
||||
* no matter the value of trace_leaf.
|
||||
*/
|
||||
if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
block->trace_leaf = true;
|
||||
else
|
||||
block->trace_leaf = false;
|
||||
|
|
|
@ -2177,22 +2177,30 @@ static int clean_dirty_subvols(struct reloc_control *rc)
|
|||
struct btrfs_root *root;
|
||||
struct btrfs_root *next;
|
||||
int ret = 0;
|
||||
int ret2;
|
||||
|
||||
list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
|
||||
reloc_dirty_list) {
|
||||
struct btrfs_root *reloc_root = root->reloc_root;
|
||||
if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
|
||||
/* Merged subvolume, cleanup its reloc root */
|
||||
struct btrfs_root *reloc_root = root->reloc_root;
|
||||
|
||||
clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
|
||||
list_del_init(&root->reloc_dirty_list);
|
||||
root->reloc_root = NULL;
|
||||
if (reloc_root) {
|
||||
int ret2;
|
||||
clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
|
||||
list_del_init(&root->reloc_dirty_list);
|
||||
root->reloc_root = NULL;
|
||||
if (reloc_root) {
|
||||
|
||||
ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1);
|
||||
ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1);
|
||||
if (ret2 < 0 && !ret)
|
||||
ret = ret2;
|
||||
}
|
||||
btrfs_put_fs_root(root);
|
||||
} else {
|
||||
/* Orphan reloc tree, just clean it up */
|
||||
ret2 = btrfs_drop_snapshot(root, NULL, 0, 1);
|
||||
if (ret2 < 0 && !ret)
|
||||
ret = ret2;
|
||||
}
|
||||
btrfs_put_fs_root(root);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -2480,6 +2488,9 @@ again:
|
|||
}
|
||||
} else {
|
||||
list_del_init(&reloc_root->root_list);
|
||||
/* Don't forget to queue this reloc root for cleanup */
|
||||
list_add_tail(&reloc_root->reloc_dirty_list,
|
||||
&rc->dirty_subvol_roots);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4999,6 +4999,12 @@ static int send_hole(struct send_ctx *sctx, u64 end)
|
|||
if (offset >= sctx->cur_inode_size)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Don't go beyond the inode's i_size due to prealloc extents that start
|
||||
* after the i_size.
|
||||
*/
|
||||
end = min_t(u64, end, sctx->cur_inode_size);
|
||||
|
||||
if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
|
||||
return send_update_extent(sctx, offset, end - offset);
|
||||
|
||||
|
@ -5218,10 +5224,50 @@ static int clone_range(struct send_ctx *sctx,
|
|||
clone_len = min_t(u64, ext_len, len);
|
||||
|
||||
if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
|
||||
clone_data_offset == data_offset)
|
||||
ret = send_clone(sctx, offset, clone_len, clone_root);
|
||||
else
|
||||
clone_data_offset == data_offset) {
|
||||
const u64 src_end = clone_root->offset + clone_len;
|
||||
const u64 sectorsize = SZ_64K;
|
||||
|
||||
/*
|
||||
* We can't clone the last block, when its size is not
|
||||
* sector size aligned, into the middle of a file. If we
|
||||
* do so, the receiver will get a failure (-EINVAL) when
|
||||
* trying to clone or will silently corrupt the data in
|
||||
* the destination file if it's on a kernel without the
|
||||
* fix introduced by commit ac765f83f1397646
|
||||
* ("Btrfs: fix data corruption due to cloning of eof
|
||||
* block).
|
||||
*
|
||||
* So issue a clone of the aligned down range plus a
|
||||
* regular write for the eof block, if we hit that case.
|
||||
*
|
||||
* Also, we use the maximum possible sector size, 64K,
|
||||
* because we don't know what's the sector size of the
|
||||
* filesystem that receives the stream, so we have to
|
||||
* assume the largest possible sector size.
|
||||
*/
|
||||
if (src_end == clone_src_i_size &&
|
||||
!IS_ALIGNED(src_end, sectorsize) &&
|
||||
offset + clone_len < sctx->cur_inode_size) {
|
||||
u64 slen;
|
||||
|
||||
slen = ALIGN_DOWN(src_end - clone_root->offset,
|
||||
sectorsize);
|
||||
if (slen > 0) {
|
||||
ret = send_clone(sctx, offset, slen,
|
||||
clone_root);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
ret = send_extent_data(sctx, offset + slen,
|
||||
clone_len - slen);
|
||||
} else {
|
||||
ret = send_clone(sctx, offset, clone_len,
|
||||
clone_root);
|
||||
}
|
||||
} else {
|
||||
ret = send_extent_data(sctx, offset, clone_len);
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
|
|
@ -3109,6 +3109,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
|
|||
root->log_transid++;
|
||||
log->log_transid = root->log_transid;
|
||||
root->log_start_pid = 0;
|
||||
/*
|
||||
* Update or create log root item under the root's log_mutex to prevent
|
||||
* races with concurrent log syncs that can lead to failure to update
|
||||
* log root item because it was not created yet.
|
||||
*/
|
||||
ret = update_log_root(trans, log);
|
||||
/*
|
||||
* IO has been started, blocks of the log tree have WRITTEN flag set
|
||||
* in their headers. new modifications of the log will be written to
|
||||
|
@ -3128,8 +3134,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
|
|||
|
||||
mutex_unlock(&log_root_tree->log_mutex);
|
||||
|
||||
ret = update_log_root(trans, log);
|
||||
|
||||
mutex_lock(&log_root_tree->log_mutex);
|
||||
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
|
||||
/* atomic_dec_and_test implies a barrier */
|
||||
|
@ -5478,7 +5482,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
|
|||
{
|
||||
int ret = 0;
|
||||
struct dentry *old_parent = NULL;
|
||||
struct btrfs_inode *orig_inode = inode;
|
||||
|
||||
/*
|
||||
* for regular files, if its inode is already on disk, we don't
|
||||
|
@ -5498,16 +5501,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
|
||||
while (1) {
|
||||
/*
|
||||
* If we are logging a directory then we start with our inode,
|
||||
* not our parent's inode, so we need to skip setting the
|
||||
* logged_trans so that further down in the log code we don't
|
||||
* think this inode has already been logged.
|
||||
*/
|
||||
if (inode != orig_inode)
|
||||
inode->logged_trans = trans->transid;
|
||||
smp_mb();
|
||||
|
||||
if (btrfs_must_commit_transaction(trans, inode)) {
|
||||
ret = 1;
|
||||
break;
|
||||
|
@ -6384,7 +6377,6 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
|
|||
* if this directory was already logged any new
|
||||
* names for this file/dir will get recorded
|
||||
*/
|
||||
smp_mb();
|
||||
if (dir->logged_trans == trans->transid)
|
||||
return;
|
||||
|
||||
|
|
|
@ -105,10 +105,10 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
|
|||
unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
|
||||
struct list_head *pos, *next;
|
||||
|
||||
spin_lock(&wsm.lock);
|
||||
spin_lock_bh(&wsm.lock);
|
||||
|
||||
if (list_empty(&wsm.lru_list)) {
|
||||
spin_unlock(&wsm.lock);
|
||||
spin_unlock_bh(&wsm.lock);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -137,7 +137,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
|
|||
if (!list_empty(&wsm.lru_list))
|
||||
mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
|
||||
|
||||
spin_unlock(&wsm.lock);
|
||||
spin_unlock_bh(&wsm.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -198,7 +198,7 @@ static void zstd_cleanup_workspace_manager(void)
|
|||
struct workspace *workspace;
|
||||
int i;
|
||||
|
||||
spin_lock(&wsm.lock);
|
||||
spin_lock_bh(&wsm.lock);
|
||||
for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
|
||||
while (!list_empty(&wsm.idle_ws[i])) {
|
||||
workspace = container_of(wsm.idle_ws[i].next,
|
||||
|
@ -208,7 +208,7 @@ static void zstd_cleanup_workspace_manager(void)
|
|||
zstd_free_workspace(&workspace->list);
|
||||
}
|
||||
}
|
||||
spin_unlock(&wsm.lock);
|
||||
spin_unlock_bh(&wsm.lock);
|
||||
|
||||
del_timer_sync(&wsm.timer);
|
||||
}
|
||||
|
@ -230,7 +230,7 @@ static struct list_head *zstd_find_workspace(unsigned int level)
|
|||
struct workspace *workspace;
|
||||
int i = level - 1;
|
||||
|
||||
spin_lock(&wsm.lock);
|
||||
spin_lock_bh(&wsm.lock);
|
||||
for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
|
||||
if (!list_empty(&wsm.idle_ws[i])) {
|
||||
ws = wsm.idle_ws[i].next;
|
||||
|
@ -242,11 +242,11 @@ static struct list_head *zstd_find_workspace(unsigned int level)
|
|||
list_del(&workspace->lru_list);
|
||||
if (list_empty(&wsm.idle_ws[i]))
|
||||
clear_bit(i, &wsm.active_map);
|
||||
spin_unlock(&wsm.lock);
|
||||
spin_unlock_bh(&wsm.lock);
|
||||
return ws;
|
||||
}
|
||||
}
|
||||
spin_unlock(&wsm.lock);
|
||||
spin_unlock_bh(&wsm.lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -305,7 +305,7 @@ static void zstd_put_workspace(struct list_head *ws)
|
|||
{
|
||||
struct workspace *workspace = list_to_workspace(ws);
|
||||
|
||||
spin_lock(&wsm.lock);
|
||||
spin_lock_bh(&wsm.lock);
|
||||
|
||||
/* A node is only taken off the lru if we are the corresponding level */
|
||||
if (workspace->req_level == workspace->level) {
|
||||
|
@ -325,7 +325,7 @@ static void zstd_put_workspace(struct list_head *ws)
|
|||
list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
|
||||
workspace->req_level = 0;
|
||||
|
||||
spin_unlock(&wsm.lock);
|
||||
spin_unlock_bh(&wsm.lock);
|
||||
|
||||
if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
|
||||
cond_wake_up(&wsm.wait);
|
||||
|
|
Загрузка…
Ссылка в новой задаче