f2fs: support in batch multi blocks preallocation
This patch introduces reserve_new_blocks to make preallocation of multi blocks as in batch operation, so it can avoid lots of redundant operation, result in better performance. In virtual machine, with rotational device: time fallocate -l 32G /mnt/f2fs/file Before: real 0m4.584s user 0m0.000s sys 0m4.580s After: real 0m0.292s user 0m0.000s sys 0m0.272s In x86, with SSD: time fallocate -l 500G $MNT/testfile Before : 24.758 s After : 1.604 s Signed-off-by: Chao Yu <yuchao0@huawei.com> [Jaegeuk Kim: fix bugs and add performance numbers measured in x86.] Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
This commit is contained in:
Родитель
0fac558b96
Коммит
46008c6d42
132
fs/f2fs/data.c
132
fs/f2fs/data.c
|
@ -278,6 +278,16 @@ alloc_new:
|
||||||
trace_f2fs_submit_page_mbio(fio->page, fio);
|
trace_f2fs_submit_page_mbio(fio->page, fio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __set_data_blkaddr(struct dnode_of_data *dn)
|
||||||
|
{
|
||||||
|
struct f2fs_node *rn = F2FS_NODE(dn->node_page);
|
||||||
|
__le32 *addr_array;
|
||||||
|
|
||||||
|
/* Get physical address of data block */
|
||||||
|
addr_array = blkaddr_in_node(rn);
|
||||||
|
addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lock ordering for the change of data block address:
|
* Lock ordering for the change of data block address:
|
||||||
* ->data_page
|
* ->data_page
|
||||||
|
@ -286,19 +296,9 @@ alloc_new:
|
||||||
*/
|
*/
|
||||||
void set_data_blkaddr(struct dnode_of_data *dn)
|
void set_data_blkaddr(struct dnode_of_data *dn)
|
||||||
{
|
{
|
||||||
struct f2fs_node *rn;
|
f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
|
||||||
__le32 *addr_array;
|
__set_data_blkaddr(dn);
|
||||||
struct page *node_page = dn->node_page;
|
if (set_page_dirty(dn->node_page))
|
||||||
unsigned int ofs_in_node = dn->ofs_in_node;
|
|
||||||
|
|
||||||
f2fs_wait_on_page_writeback(node_page, NODE, true);
|
|
||||||
|
|
||||||
rn = F2FS_NODE(node_page);
|
|
||||||
|
|
||||||
/* Get physical address of data block */
|
|
||||||
addr_array = blkaddr_in_node(rn);
|
|
||||||
addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
|
|
||||||
if (set_page_dirty(node_page))
|
|
||||||
dn->node_changed = true;
|
dn->node_changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -309,24 +309,53 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
|
||||||
f2fs_update_extent_cache(dn);
|
f2fs_update_extent_cache(dn);
|
||||||
}
|
}
|
||||||
|
|
||||||
int reserve_new_block(struct dnode_of_data *dn)
|
/* dn->ofs_in_node will be returned with up-to-date last block pointer */
|
||||||
|
int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
|
||||||
{
|
{
|
||||||
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
|
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
|
||||||
|
|
||||||
|
if (!count)
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
|
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
|
if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
|
|
||||||
trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
|
trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
|
||||||
|
dn->ofs_in_node, count);
|
||||||
|
|
||||||
|
f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
|
||||||
|
|
||||||
|
for (; count > 0; dn->ofs_in_node++) {
|
||||||
|
block_t blkaddr =
|
||||||
|
datablock_addr(dn->node_page, dn->ofs_in_node);
|
||||||
|
if (blkaddr == NULL_ADDR) {
|
||||||
|
dn->data_blkaddr = NEW_ADDR;
|
||||||
|
__set_data_blkaddr(dn);
|
||||||
|
count--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (set_page_dirty(dn->node_page))
|
||||||
|
dn->node_changed = true;
|
||||||
|
|
||||||
dn->data_blkaddr = NEW_ADDR;
|
|
||||||
set_data_blkaddr(dn);
|
|
||||||
mark_inode_dirty(dn->inode);
|
mark_inode_dirty(dn->inode);
|
||||||
sync_inode_page(dn);
|
sync_inode_page(dn);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Should keep dn->ofs_in_node unchanged */
|
||||||
|
int reserve_new_block(struct dnode_of_data *dn)
|
||||||
|
{
|
||||||
|
unsigned int ofs_in_node = dn->ofs_in_node;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = reserve_new_blocks(dn, 1);
|
||||||
|
dn->ofs_in_node = ofs_in_node;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
|
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
|
||||||
{
|
{
|
||||||
bool need_put = dn->inode_page ? false : true;
|
bool need_put = dn->inode_page ? false : true;
|
||||||
|
@ -545,6 +574,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
|
||||||
struct node_info ni;
|
struct node_info ni;
|
||||||
int seg = CURSEG_WARM_DATA;
|
int seg = CURSEG_WARM_DATA;
|
||||||
pgoff_t fofs;
|
pgoff_t fofs;
|
||||||
|
blkcnt_t count = 1;
|
||||||
|
|
||||||
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
|
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
@ -553,7 +583,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
|
||||||
if (dn->data_blkaddr == NEW_ADDR)
|
if (dn->data_blkaddr == NEW_ADDR)
|
||||||
goto alloc;
|
goto alloc;
|
||||||
|
|
||||||
if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
|
if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
|
|
||||||
alloc:
|
alloc:
|
||||||
|
@ -621,8 +651,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
|
||||||
struct dnode_of_data dn;
|
struct dnode_of_data dn;
|
||||||
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
||||||
int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
|
int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
|
||||||
pgoff_t pgofs, end_offset;
|
pgoff_t pgofs, end_offset, end;
|
||||||
int err = 0, ofs = 1;
|
int err = 0, ofs = 1;
|
||||||
|
unsigned int ofs_in_node, last_ofs_in_node;
|
||||||
|
blkcnt_t prealloc;
|
||||||
struct extent_info ei;
|
struct extent_info ei;
|
||||||
bool allocated = false;
|
bool allocated = false;
|
||||||
block_t blkaddr;
|
block_t blkaddr;
|
||||||
|
@ -632,6 +664,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
|
||||||
|
|
||||||
/* it only supports block size == page size */
|
/* it only supports block size == page size */
|
||||||
pgofs = (pgoff_t)map->m_lblk;
|
pgofs = (pgoff_t)map->m_lblk;
|
||||||
|
end = pgofs + maxblocks;
|
||||||
|
|
||||||
if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
|
if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
|
||||||
map->m_pblk = ei.blk + pgofs - ei.fofs;
|
map->m_pblk = ei.blk + pgofs - ei.fofs;
|
||||||
|
@ -659,6 +692,8 @@ next_dnode:
|
||||||
goto unlock_out;
|
goto unlock_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prealloc = 0;
|
||||||
|
ofs_in_node = dn.ofs_in_node;
|
||||||
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
|
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
|
||||||
|
|
||||||
next_block:
|
next_block:
|
||||||
|
@ -671,17 +706,20 @@ next_block:
|
||||||
goto sync_out;
|
goto sync_out;
|
||||||
}
|
}
|
||||||
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
|
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
|
||||||
if (blkaddr == NULL_ADDR)
|
if (blkaddr == NULL_ADDR) {
|
||||||
err = reserve_new_block(&dn);
|
prealloc++;
|
||||||
|
last_ofs_in_node = dn.ofs_in_node;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
err = __allocate_data_block(&dn);
|
err = __allocate_data_block(&dn);
|
||||||
if (!err)
|
if (!err) {
|
||||||
set_inode_flag(F2FS_I(inode),
|
set_inode_flag(F2FS_I(inode),
|
||||||
FI_APPEND_WRITE);
|
FI_APPEND_WRITE);
|
||||||
|
allocated = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (err)
|
if (err)
|
||||||
goto sync_out;
|
goto sync_out;
|
||||||
allocated = true;
|
|
||||||
map->m_flags = F2FS_MAP_NEW;
|
map->m_flags = F2FS_MAP_NEW;
|
||||||
blkaddr = dn.data_blkaddr;
|
blkaddr = dn.data_blkaddr;
|
||||||
} else {
|
} else {
|
||||||
|
@ -700,6 +738,9 @@ next_block:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (flag == F2FS_GET_BLOCK_PRE_AIO)
|
||||||
|
goto skip;
|
||||||
|
|
||||||
if (map->m_len == 0) {
|
if (map->m_len == 0) {
|
||||||
/* preallocated unwritten block should be mapped for fiemap. */
|
/* preallocated unwritten block should be mapped for fiemap. */
|
||||||
if (blkaddr == NEW_ADDR)
|
if (blkaddr == NEW_ADDR)
|
||||||
|
@ -711,33 +752,50 @@ next_block:
|
||||||
} else if ((map->m_pblk != NEW_ADDR &&
|
} else if ((map->m_pblk != NEW_ADDR &&
|
||||||
blkaddr == (map->m_pblk + ofs)) ||
|
blkaddr == (map->m_pblk + ofs)) ||
|
||||||
(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
|
(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
|
||||||
flag == F2FS_GET_BLOCK_PRE_DIO ||
|
flag == F2FS_GET_BLOCK_PRE_DIO) {
|
||||||
flag == F2FS_GET_BLOCK_PRE_AIO) {
|
|
||||||
ofs++;
|
ofs++;
|
||||||
map->m_len++;
|
map->m_len++;
|
||||||
} else {
|
} else {
|
||||||
goto sync_out;
|
goto sync_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
skip:
|
||||||
dn.ofs_in_node++;
|
dn.ofs_in_node++;
|
||||||
pgofs++;
|
pgofs++;
|
||||||
|
|
||||||
if (map->m_len < maxblocks) {
|
/* preallocate blocks in batch for one dnode page */
|
||||||
if (dn.ofs_in_node < end_offset)
|
if (flag == F2FS_GET_BLOCK_PRE_AIO &&
|
||||||
goto next_block;
|
(pgofs == end || dn.ofs_in_node == end_offset)) {
|
||||||
|
|
||||||
if (allocated)
|
dn.ofs_in_node = ofs_in_node;
|
||||||
sync_inode_page(&dn);
|
err = reserve_new_blocks(&dn, prealloc);
|
||||||
f2fs_put_dnode(&dn);
|
if (err)
|
||||||
|
goto sync_out;
|
||||||
|
|
||||||
if (create) {
|
map->m_len += dn.ofs_in_node - ofs_in_node;
|
||||||
f2fs_unlock_op(sbi);
|
if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
|
||||||
f2fs_balance_fs(sbi, allocated);
|
err = -ENOSPC;
|
||||||
|
goto sync_out;
|
||||||
}
|
}
|
||||||
allocated = false;
|
dn.ofs_in_node = end_offset;
|
||||||
goto next_dnode;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pgofs >= end)
|
||||||
|
goto sync_out;
|
||||||
|
else if (dn.ofs_in_node < end_offset)
|
||||||
|
goto next_block;
|
||||||
|
|
||||||
|
if (allocated)
|
||||||
|
sync_inode_page(&dn);
|
||||||
|
f2fs_put_dnode(&dn);
|
||||||
|
|
||||||
|
if (create) {
|
||||||
|
f2fs_unlock_op(sbi);
|
||||||
|
f2fs_balance_fs(sbi, allocated);
|
||||||
|
}
|
||||||
|
allocated = false;
|
||||||
|
goto next_dnode;
|
||||||
|
|
||||||
sync_out:
|
sync_out:
|
||||||
if (allocated)
|
if (allocated)
|
||||||
sync_inode_page(&dn);
|
sync_inode_page(&dn);
|
||||||
|
|
|
@ -1094,7 +1094,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
|
static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
|
||||||
struct inode *inode, blkcnt_t count)
|
struct inode *inode, blkcnt_t *count)
|
||||||
{
|
{
|
||||||
block_t valid_block_count;
|
block_t valid_block_count;
|
||||||
|
|
||||||
|
@ -1106,14 +1106,19 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
valid_block_count =
|
valid_block_count =
|
||||||
sbi->total_valid_block_count + (block_t)count;
|
sbi->total_valid_block_count + (block_t)(*count);
|
||||||
if (unlikely(valid_block_count > sbi->user_block_count)) {
|
if (unlikely(valid_block_count > sbi->user_block_count)) {
|
||||||
spin_unlock(&sbi->stat_lock);
|
*count = sbi->user_block_count - sbi->total_valid_block_count;
|
||||||
return false;
|
if (!*count) {
|
||||||
|
spin_unlock(&sbi->stat_lock);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
inode->i_blocks += count;
|
/* *count can be recalculated */
|
||||||
sbi->total_valid_block_count = valid_block_count;
|
inode->i_blocks += *count;
|
||||||
sbi->alloc_valid_block_count += (block_t)count;
|
sbi->total_valid_block_count =
|
||||||
|
sbi->total_valid_block_count + (block_t)(*count);
|
||||||
|
sbi->alloc_valid_block_count += (block_t)(*count);
|
||||||
spin_unlock(&sbi->stat_lock);
|
spin_unlock(&sbi->stat_lock);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1945,6 +1950,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *);
|
||||||
void f2fs_submit_page_mbio(struct f2fs_io_info *);
|
void f2fs_submit_page_mbio(struct f2fs_io_info *);
|
||||||
void set_data_blkaddr(struct dnode_of_data *);
|
void set_data_blkaddr(struct dnode_of_data *);
|
||||||
void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
|
void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
|
||||||
|
int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
|
||||||
int reserve_new_block(struct dnode_of_data *);
|
int reserve_new_block(struct dnode_of_data *);
|
||||||
int f2fs_get_block(struct dnode_of_data *, pgoff_t);
|
int f2fs_get_block(struct dnode_of_data *, pgoff_t);
|
||||||
ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
|
ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
|
||||||
|
|
|
@ -694,28 +694,32 @@ TRACE_EVENT(f2fs_direct_IO_exit,
|
||||||
__entry->ret)
|
__entry->ret)
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(f2fs_reserve_new_block,
|
TRACE_EVENT(f2fs_reserve_new_blocks,
|
||||||
|
|
||||||
TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node),
|
TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node,
|
||||||
|
blkcnt_t count),
|
||||||
|
|
||||||
TP_ARGS(inode, nid, ofs_in_node),
|
TP_ARGS(inode, nid, ofs_in_node, count),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__field(dev_t, dev)
|
__field(dev_t, dev)
|
||||||
__field(nid_t, nid)
|
__field(nid_t, nid)
|
||||||
__field(unsigned int, ofs_in_node)
|
__field(unsigned int, ofs_in_node)
|
||||||
|
__field(blkcnt_t, count)
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->dev = inode->i_sb->s_dev;
|
__entry->dev = inode->i_sb->s_dev;
|
||||||
__entry->nid = nid;
|
__entry->nid = nid;
|
||||||
__entry->ofs_in_node = ofs_in_node;
|
__entry->ofs_in_node = ofs_in_node;
|
||||||
|
__entry->count = count;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u",
|
TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu",
|
||||||
show_dev(__entry),
|
show_dev(__entry),
|
||||||
(unsigned int)__entry->nid,
|
(unsigned int)__entry->nid,
|
||||||
__entry->ofs_in_node)
|
__entry->ofs_in_node,
|
||||||
|
(unsigned long long)__entry->count)
|
||||||
);
|
);
|
||||||
|
|
||||||
DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
|
DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
|
||||||
|
|
Загрузка…
Ссылка в новой задаче