ext4: lookup block mapping in extent status tree

After tracking all extent status, we already have a extent cache in
memory.  Every time we want to lookup a block mapping, we can first
try to lookup it in extent status tree to avoid a potential disk I/O.

A new function called ext4_es_lookup_extent is defined to finish this
work.  When we try to lookup a block mapping, we always call
ext4_map_blocks and/or ext4_da_map_blocks.  So in these functions we
first try to lookup a block mapping in extent status tree.

A new flag EXT4_GET_BLOCKS_NO_PUT_HOLE is used in ext4_da_map_blocks
in order not to put a hole into extent status tree because this hole
will be converted to delayed extent in the tree immediately.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jan kara <jack@suse.cz>
This commit is contained in:
Zheng Liu 2013-02-18 00:29:59 -05:00 коммит произвёл Theodore Ts'o
Родитель f7fec032aa
Коммит d100eef244
6 изменённых файлов: 192 добавлений и 3 удалений

Просмотреть файл

@ -579,6 +579,8 @@ enum {
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
/* Do not take i_data_sem locking in ext4_map_blocks */
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
/* Do not put hole in extent cache */
#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
/*
* Flags used by ext4_free_blocks

Просмотреть файл

@ -2167,6 +2167,9 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
block,
le32_to_cpu(ex->ee_block),
ext4_ext_get_actual_len(ex));
if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
ext4_es_insert_extent(inode, lblock, len, ~0,
EXTENT_STATUS_HOLE);
} else if (block >= le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex)) {
ext4_lblk_t next;
@ -2180,6 +2183,9 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
block);
BUG_ON(next == lblock);
len = next - lblock;
if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
ext4_es_insert_extent(inode, lblock, len, ~0,
EXTENT_STATUS_HOLE);
} else {
lblock = len = 0;
BUG();
@ -4018,7 +4024,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* put just found gap into cache to speed up
* subsequent requests
*/
ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
goto out2;
}

Просмотреть файл

@ -461,6 +461,66 @@ error:
return err;
}
/*
* ext4_es_lookup_extent() looks up an extent in extent status tree.
*
* ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
*
* Return: 1 on found, 0 on not
*/
int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es)
{
struct ext4_es_tree *tree;
struct extent_status *es1 = NULL;
struct rb_node *node;
int found = 0;
trace_ext4_es_lookup_extent_enter(inode, lblk);
es_debug("lookup extent in block %u\n", lblk);
tree = &EXT4_I(inode)->i_es_tree;
read_lock(&EXT4_I(inode)->i_es_lock);
/* find extent in cache firstly */
es->es_lblk = es->es_len = es->es_pblk = 0;
if (tree->cache_es) {
es1 = tree->cache_es;
if (in_range(lblk, es1->es_lblk, es1->es_len)) {
es_debug("%u cached by [%u/%u)\n",
lblk, es1->es_lblk, es1->es_len);
found = 1;
goto out;
}
}
node = tree->root.rb_node;
while (node) {
es1 = rb_entry(node, struct extent_status, rb_node);
if (lblk < es1->es_lblk)
node = node->rb_left;
else if (lblk > ext4_es_end(es1))
node = node->rb_right;
else {
found = 1;
break;
}
}
out:
if (found) {
BUG_ON(!es1);
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
}
read_unlock(&EXT4_I(inode)->i_es_lock);
trace_ext4_es_lookup_extent_exit(inode, es, found);
return found;
}
static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
ext4_lblk_t end)
{

Просмотреть файл

@ -53,6 +53,8 @@ extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len);
extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es);
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es);
static inline int ext4_es_is_written(struct extent_status *es)
{

Просмотреть файл

@ -507,12 +507,33 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags)
{
struct extent_status es;
int retval;
map->m_flags = 0;
ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
"logical block %lu\n", inode->i_ino, flags, map->m_len,
(unsigned long) map->m_lblk);
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
map->m_pblk = ext4_es_pblock(&es) +
map->m_lblk - es.es_lblk;
map->m_flags |= ext4_es_is_written(&es) ?
EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
retval = es.es_len - (map->m_lblk - es.es_lblk);
if (retval > map->m_len)
retval = map->m_len;
map->m_len = retval;
} else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
retval = 0;
} else {
BUG_ON(1);
}
goto found;
}
/*
* Try to see if we can get the block without requesting a new
* file system block.
@ -544,6 +565,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
up_read((&EXT4_I(inode)->i_data_sem));
found:
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret = check_block_validity(inode, map);
if (ret != 0)
@ -1743,6 +1765,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
struct ext4_map_blocks *map,
struct buffer_head *bh)
{
struct extent_status es;
int retval;
sector_t invalid_block = ~((sector_t) 0xffff);
@ -1753,6 +1776,42 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
"logical block %lu\n", inode->i_ino, map->m_len,
(unsigned long) map->m_lblk);
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, iblock, &es)) {
if (ext4_es_is_hole(&es)) {
retval = 0;
down_read((&EXT4_I(inode)->i_data_sem));
goto add_delayed;
}
/*
* Delayed extent could be allocated by fallocate.
* So we need to check it.
*/
if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
map_bh(bh, inode->i_sb, invalid_block);
set_buffer_new(bh);
set_buffer_delay(bh);
return 0;
}
map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
retval = es.es_len - (iblock - es.es_lblk);
if (retval > map->m_len)
retval = map->m_len;
map->m_len = retval;
if (ext4_es_is_written(&es))
map->m_flags |= EXT4_MAP_MAPPED;
else if (ext4_es_is_unwritten(&es))
map->m_flags |= EXT4_MAP_UNWRITTEN;
else
BUG_ON(1);
return retval;
}
/*
* Try to see if we can get the block without requesting a new
* file system block.
@ -1771,10 +1830,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
retval = 0;
} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
retval = ext4_ext_map_blocks(NULL, inode, map, 0);
retval = ext4_ext_map_blocks(NULL, inode, map,
EXT4_GET_BLOCKS_NO_PUT_HOLE);
else
retval = ext4_ind_map_blocks(NULL, inode, map, 0);
retval = ext4_ind_map_blocks(NULL, inode, map,
EXT4_GET_BLOCKS_NO_PUT_HOLE);
add_delayed:
if (retval == 0) {
int ret;
/*

Просмотреть файл

@ -2199,6 +2199,62 @@ TRACE_EVENT(ext4_es_find_delayed_extent_exit,
__entry->pblk, __entry->status)
);
TRACE_EVENT(ext4_es_lookup_extent_enter,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk),
TP_ARGS(inode, lblk),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, lblk )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = lblk;
),
TP_printk("dev %d,%d ino %lu lblk %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->lblk)
);
TRACE_EVENT(ext4_es_lookup_extent_exit,
TP_PROTO(struct inode *inode, struct extent_status *es,
int found),
TP_ARGS(inode, es, found),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, lblk )
__field( ext4_lblk_t, len )
__field( ext4_fsblk_t, pblk )
__field( unsigned long long, status )
__field( int, found )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = es->es_lblk;
__entry->len = es->es_len;
__entry->pblk = ext4_es_pblock(es);
__entry->status = ext4_es_status(es);
__entry->found = found;
),
TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->found,
__entry->lblk, __entry->len,
__entry->found ? __entry->pblk : 0,
__entry->found ? __entry->status : 0)
);
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */