Btrfs: O_DIRECT writes via buffered writes + invaldiate
This reworks the btrfs O_DIRECT write code a bit. It had always fallen back to buffered IO and done an invalidate, but needed to be updated for the data=ordered code. The invalidate wasn't actually removing pages because they were still inside an ordered extent. This also combines the O_DIRECT/O_SYNC paths where possible, and kicks off IO in the main btrfs_file_write loop to keep the pipe down the the disk full as we process long writes. Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Родитель
323ac95bce
Коммит
cb843a6f51
|
@ -905,6 +905,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
|
||||||
struct page *pinned[2];
|
struct page *pinned[2];
|
||||||
unsigned long first_index;
|
unsigned long first_index;
|
||||||
unsigned long last_index;
|
unsigned long last_index;
|
||||||
|
int will_write;
|
||||||
|
|
||||||
|
will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
|
||||||
|
(file->f_flags & O_DIRECT));
|
||||||
|
|
||||||
nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
|
nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
|
||||||
PAGE_CACHE_SIZE / (sizeof(struct page *)));
|
PAGE_CACHE_SIZE / (sizeof(struct page *)));
|
||||||
|
@ -1001,15 +1005,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
if (will_write) {
|
||||||
|
btrfs_fdatawrite_range(inode->i_mapping, pos,
|
||||||
|
pos + write_bytes - 1,
|
||||||
|
WB_SYNC_NONE);
|
||||||
|
} else {
|
||||||
|
balance_dirty_pages_ratelimited_nr(inode->i_mapping,
|
||||||
|
num_pages);
|
||||||
|
if (num_pages <
|
||||||
|
(root->leafsize >> PAGE_CACHE_SHIFT) + 1)
|
||||||
|
btrfs_btree_balance_dirty(root, 1);
|
||||||
|
btrfs_throttle(root);
|
||||||
|
}
|
||||||
|
|
||||||
buf += write_bytes;
|
buf += write_bytes;
|
||||||
count -= write_bytes;
|
count -= write_bytes;
|
||||||
pos += write_bytes;
|
pos += write_bytes;
|
||||||
num_written += write_bytes;
|
num_written += write_bytes;
|
||||||
|
|
||||||
balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
|
|
||||||
if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
|
|
||||||
btrfs_btree_balance_dirty(root, 1);
|
|
||||||
btrfs_throttle(root);
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
|
@ -1023,36 +1036,29 @@ out_nolock:
|
||||||
page_cache_release(pinned[1]);
|
page_cache_release(pinned[1]);
|
||||||
*ppos = pos;
|
*ppos = pos;
|
||||||
|
|
||||||
if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
if (num_written > 0 && will_write) {
|
||||||
struct btrfs_trans_handle *trans;
|
struct btrfs_trans_handle *trans;
|
||||||
|
|
||||||
err = btrfs_fdatawrite_range(inode->i_mapping, start_pos,
|
err = btrfs_wait_ordered_range(inode, start_pos, num_written);
|
||||||
start_pos + num_written -1,
|
if (err)
|
||||||
WB_SYNC_NONE);
|
|
||||||
if (err < 0)
|
|
||||||
num_written = err;
|
num_written = err;
|
||||||
|
|
||||||
err = btrfs_wait_on_page_writeback_range(inode->i_mapping,
|
if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
|
||||||
start_pos, start_pos + num_written - 1);
|
trans = btrfs_start_transaction(root, 1);
|
||||||
if (err < 0)
|
ret = btrfs_log_dentry_safe(trans, root,
|
||||||
num_written = err;
|
file->f_dentry);
|
||||||
|
if (ret == 0) {
|
||||||
trans = btrfs_start_transaction(root, 1);
|
btrfs_sync_log(trans, root);
|
||||||
ret = btrfs_log_dentry_safe(trans, root, file->f_dentry);
|
btrfs_end_transaction(trans, root);
|
||||||
if (ret == 0) {
|
} else {
|
||||||
btrfs_sync_log(trans, root);
|
btrfs_commit_transaction(trans, root);
|
||||||
btrfs_end_transaction(trans, root);
|
}
|
||||||
} else {
|
}
|
||||||
btrfs_commit_transaction(trans, root);
|
if (file->f_flags & O_DIRECT) {
|
||||||
|
invalidate_mapping_pages(inode->i_mapping,
|
||||||
|
start_pos >> PAGE_CACHE_SHIFT,
|
||||||
|
(start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
|
||||||
}
|
}
|
||||||
} else if (num_written > 0 && (file->f_flags & O_DIRECT)) {
|
|
||||||
do_sync_mapping_range(inode->i_mapping, start_pos,
|
|
||||||
start_pos + num_written - 1,
|
|
||||||
SYNC_FILE_RANGE_WRITE |
|
|
||||||
SYNC_FILE_RANGE_WAIT_AFTER);
|
|
||||||
invalidate_mapping_pages(inode->i_mapping,
|
|
||||||
start_pos >> PAGE_CACHE_SHIFT,
|
|
||||||
(start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
|
|
||||||
}
|
}
|
||||||
current->backing_dev_info = NULL;
|
current->backing_dev_info = NULL;
|
||||||
return num_written ? num_written : err;
|
return num_written ? num_written : err;
|
||||||
|
|
|
@ -397,7 +397,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
|
||||||
/*
|
/*
|
||||||
* Used to wait on ordered extents across a large range of bytes.
|
* Used to wait on ordered extents across a large range of bytes.
|
||||||
*/
|
*/
|
||||||
void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
|
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
|
||||||
{
|
{
|
||||||
u64 end;
|
u64 end;
|
||||||
u64 orig_end;
|
u64 orig_end;
|
||||||
|
@ -451,6 +451,7 @@ again:
|
||||||
(unsigned long long)orig_end);
|
(unsigned long long)orig_end);
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -135,7 +135,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
|
||||||
u64 file_offset);
|
u64 file_offset);
|
||||||
void btrfs_start_ordered_extent(struct inode *inode,
|
void btrfs_start_ordered_extent(struct inode *inode,
|
||||||
struct btrfs_ordered_extent *entry, int wait);
|
struct btrfs_ordered_extent *entry, int wait);
|
||||||
void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
|
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
|
||||||
struct btrfs_ordered_extent *
|
struct btrfs_ordered_extent *
|
||||||
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
|
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
|
||||||
int btrfs_ordered_update_i_size(struct inode *inode,
|
int btrfs_ordered_update_i_size(struct inode *inode,
|
||||||
|
|
Загрузка…
Ссылка в новой задаче