Btrfs: check if we can nocow if we don't have data space

We always just try and reserve data space when we write, but if we are out of
space but have prealloc'ed extents we should still successfully write.  This
patch will try and see if we can write to prealloc'ed space and if we can go
ahead and allow the write to continue.  With this patch we now pass xfstests
generic/274.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
This commit is contained in:
Josef Bacik 2013-06-21 16:37:03 -04:00
Родитель 925a6efb8f
Коммит 7ee9e4405f
6 изменённых файлов: 148 добавлений и 26 удалений

Просмотреть файл

@ -3552,6 +3552,10 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
size_t pg_offset, u64 start, u64 len,
int create);
noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes);
/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)

Просмотреть файл

@ -3666,6 +3666,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
data_sinfo = root->fs_info->data_sinfo;
spin_lock(&data_sinfo->lock);
WARN_ON(data_sinfo->bytes_may_use < bytes);
data_sinfo->bytes_may_use -= bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
data_sinfo->flags, bytes, 0);

Просмотреть файл

@ -543,6 +543,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
if (bits & EXTENT_DELALLOC)
bits |= EXTENT_NORESERVE;
if (delete)
bits |= ~EXTENT_CTLBITS;
bits |= EXTENT_FIRST_DELALLOC;

Просмотреть файл

@ -19,6 +19,7 @@
#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_NEED_WAIT (1 << 13)
#define EXTENT_DAMAGED (1 << 14)
#define EXTENT_NORESERVE (1 << 15)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)

Просмотреть файл

@ -1312,6 +1312,56 @@ fail:
}
static noinline int check_can_nocow(struct inode *inode, loff_t pos,
size_t *write_bytes)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_extent *ordered;
u64 lockstart, lockend;
u64 num_bytes;
int ret;
lockstart = round_down(pos, root->sectorsize);
lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1;
while (1) {
lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
ordered = btrfs_lookup_ordered_range(inode, lockstart,
lockend - lockstart + 1);
if (!ordered) {
break;
}
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
return PTR_ERR(trans);
}
num_bytes = lockend - lockstart + 1;
ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL,
NULL);
btrfs_end_transaction(trans, root);
if (ret <= 0) {
ret = 0;
} else {
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
NULL, GFP_NOFS);
*write_bytes = min_t(size_t, *write_bytes, num_bytes);
}
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
return ret;
}
static noinline ssize_t __btrfs_buffered_write(struct file *file,
struct iov_iter *i,
loff_t pos)
@ -1319,10 +1369,12 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
u64 release_bytes = 0;
unsigned long first_index;
size_t num_written = 0;
int nrptrs;
int ret = 0;
bool only_release_metadata = false;
bool force_page_uptodate = false;
nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
@ -1343,6 +1395,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
offset);
size_t num_pages = (write_bytes + offset +
PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
size_t reserve_bytes;
size_t dirty_pages;
size_t copied;
@ -1357,11 +1410,41 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
break;
}
ret = btrfs_delalloc_reserve_space(inode,
num_pages << PAGE_CACHE_SHIFT);
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
ret = btrfs_check_data_free_space(inode, reserve_bytes);
if (ret == -ENOSPC &&
(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC))) {
ret = check_can_nocow(inode, pos, &write_bytes);
if (ret > 0) {
only_release_metadata = true;
/*
* our prealloc extent may be smaller than
* write_bytes, so scale down.
*/
num_pages = (write_bytes + offset +
PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
ret = 0;
} else {
ret = -ENOSPC;
}
}
if (ret)
break;
ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
if (ret) {
if (!only_release_metadata)
btrfs_free_reserved_data_space(inode,
reserve_bytes);
break;
}
release_bytes = reserve_bytes;
/*
* This is going to setup the pages array with the number of
* pages we want, so we don't really need to worry about the
@ -1370,11 +1453,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
ret = prepare_pages(root, file, pages, num_pages,
pos, first_index, write_bytes,
force_page_uptodate);
if (ret) {
btrfs_delalloc_release_space(inode,
num_pages << PAGE_CACHE_SHIFT);
if (ret)
break;
}
copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, i);
@ -1404,30 +1484,46 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* managed to copy.
*/
if (num_pages > dirty_pages) {
release_bytes = (num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT;
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
btrfs_delalloc_release_space(inode,
(num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT);
if (only_release_metadata)
btrfs_delalloc_release_metadata(inode,
release_bytes);
else
btrfs_delalloc_release_space(inode,
release_bytes);
}
release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
if (copied > 0) {
ret = btrfs_dirty_pages(root, inode, pages,
dirty_pages, pos, copied,
NULL);
if (ret) {
btrfs_delalloc_release_space(inode,
dirty_pages << PAGE_CACHE_SHIFT);
btrfs_drop_pages(pages, num_pages);
break;
}
}
release_bytes = 0;
btrfs_drop_pages(pages, num_pages);
if (only_release_metadata && copied > 0) {
u64 lockstart = round_down(pos, root->sectorsize);
u64 lockend = lockstart +
(dirty_pages << PAGE_CACHE_SHIFT) - 1;
set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_NORESERVE, NULL,
NULL, GFP_NOFS);
only_release_metadata = false;
}
cond_resched();
balance_dirty_pages_ratelimited(inode->i_mapping);
@ -1440,6 +1536,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
kfree(pages);
if (release_bytes) {
if (only_release_metadata)
btrfs_delalloc_release_metadata(inode, release_bytes);
else
btrfs_delalloc_release_space(inode, release_bytes);
}
return num_written ? num_written : ret;
}

Просмотреть файл

@ -1641,7 +1641,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
btrfs_delalloc_release_metadata(inode, len);
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
&& do_list)
&& do_list && !(state->state & EXTENT_NORESERVE))
btrfs_free_reserved_data_space(inode, len);
__percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
@ -6396,10 +6396,10 @@ out:
* returns 1 when the nocow is safe, < 1 on error, 0 if the
* block must be cow'd
*/
static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes)
noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes)
{
struct btrfs_path *path;
int ret;
@ -6413,7 +6413,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
u64 num_bytes;
int slot;
int found_type;
bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@ -6453,18 +6453,28 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
/* not a regular extent, must cow */
goto out;
}
if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
goto out;
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
if (disk_bytenr == 0)
goto out;
if (btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
goto out;
backref_offset = btrfs_file_extent_offset(leaf, fi);
*orig_start = key.offset - backref_offset;
*orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
if (orig_start) {
*orig_start = key.offset - backref_offset;
*orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
}
extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
if (extent_end < offset + *len) {
/* extent doesn't include our full range, must cow */
goto out;
}
if (btrfs_extent_readonly(root, disk_bytenr))
goto out;
@ -6708,8 +6718,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (IS_ERR(trans))
goto must_cow;
if (can_nocow_odirect(trans, inode, start, &len, &orig_start,
&orig_block_len, &ram_bytes) == 1) {
if (can_nocow_extent(trans, inode, start, &len, &orig_start,
&orig_block_len, &ram_bytes) == 1) {
if (type == BTRFS_ORDERED_PREALLOC) {
free_extent_map(em);
em = create_pinned_em(inode, start, len,