Btrfs: Search data ordered extents first for checksums on read
Checksum items are not inserted into the tree until all of the io from a given extent is complete. This means one dirty page from an extent may be written, freed, and then read again before the entire extent is on disk and the checksum item is inserted. The checksums themselves are stored in the ordered extent so they can be inserted in bulk when IO is complete. On read, if a checksum item isn't found, the ordered extents were being searched for a checksum record. This all worked most of the time, but the checksum insertion code tries to reduce the number of tree operations by pre-inserting checksum items based on i_size and a few other factors. This means the read code might find a checksum item that hasn't yet really been filled in. This commit changes things to check the ordered extents first and only dive into the btree if nothing was found. This removes the need for extra locking and is more reliable. Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Родитель
9ba4611a3a
Коммит
89642229a5
|
@ -1011,9 +1011,16 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
|
||||||
spin_lock(&em_tree->lock);
|
spin_lock(&em_tree->lock);
|
||||||
em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
|
em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
|
||||||
spin_unlock(&em_tree->lock);
|
spin_unlock(&em_tree->lock);
|
||||||
if (!em)
|
if (!em) {
|
||||||
|
__unplug_io_fn(bdi, page);
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||||
|
free_extent_map(em);
|
||||||
|
__unplug_io_fn(bdi, page);
|
||||||
|
return;
|
||||||
|
}
|
||||||
offset = offset - em->start;
|
offset = offset - em->start;
|
||||||
btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
|
btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
|
||||||
em->block_start + offset, page);
|
em->block_start + offset, page);
|
||||||
|
|
|
@ -1949,18 +1949,18 @@ printk("2bad mapping end %Lu cur %Lu\n", end, cur);
|
||||||
cur + iosize - 1);
|
cur + iosize - 1);
|
||||||
}
|
}
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
|
unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
|
||||||
nr -= page->index;
|
pnr -= page->index;
|
||||||
ret = submit_extent_page(READ, tree, page,
|
ret = submit_extent_page(READ, tree, page,
|
||||||
sector, iosize, page_offset,
|
sector, iosize, page_offset,
|
||||||
bdev, bio, nr,
|
bdev, bio, pnr,
|
||||||
end_bio_extent_readpage, mirror_num);
|
end_bio_extent_readpage, mirror_num);
|
||||||
|
nr++;
|
||||||
}
|
}
|
||||||
if (ret)
|
if (ret)
|
||||||
SetPageError(page);
|
SetPageError(page);
|
||||||
cur = cur + iosize;
|
cur = cur + iosize;
|
||||||
page_offset += iosize;
|
page_offset += iosize;
|
||||||
nr++;
|
|
||||||
}
|
}
|
||||||
if (!nr) {
|
if (!nr) {
|
||||||
if (!PageError(page))
|
if (!PageError(page))
|
||||||
|
|
|
@ -611,22 +611,25 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
|
||||||
btrfs_test_flag(inode, NODATASUM))
|
btrfs_test_flag(inode, NODATASUM))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It is possible there is an ordered extent that has
|
||||||
|
* not yet finished for this range in the file. If so,
|
||||||
|
* that extent will have a csum cached, and it will insert
|
||||||
|
* the sum after all the blocks in the extent are fully
|
||||||
|
* on disk. So, look for an ordered extent and use the
|
||||||
|
* sum if found. We have to do this before looking in the
|
||||||
|
* btree because csum items are pre-inserted based on
|
||||||
|
* the file size. btrfs_lookup_csum might find an item
|
||||||
|
* that still hasn't been fully filled.
|
||||||
|
*/
|
||||||
|
ret = btrfs_find_ordered_sum(inode, start, &csum);
|
||||||
|
if (ret == 0)
|
||||||
|
goto found;
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
mutex_lock(&BTRFS_I(inode)->csum_mutex);
|
|
||||||
item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
|
item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
|
||||||
if (IS_ERR(item)) {
|
if (IS_ERR(item)) {
|
||||||
/*
|
|
||||||
* It is possible there is an ordered extent that has
|
|
||||||
* not yet finished for this range in the file. If so,
|
|
||||||
* that extent will have a csum cached, and it will insert
|
|
||||||
* the sum after all the blocks in the extent are fully
|
|
||||||
* on disk. So, look for an ordered extent and use the
|
|
||||||
* sum if found.
|
|
||||||
*/
|
|
||||||
ret = btrfs_find_ordered_sum(inode, start, &csum);
|
|
||||||
if (ret == 0)
|
|
||||||
goto found;
|
|
||||||
|
|
||||||
ret = PTR_ERR(item);
|
ret = PTR_ERR(item);
|
||||||
/* a csum that isn't present is a preallocated region. */
|
/* a csum that isn't present is a preallocated region. */
|
||||||
if (ret == -ENOENT || ret == -EFBIG)
|
if (ret == -ENOENT || ret == -EFBIG)
|
||||||
|
@ -641,7 +644,6 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
|
||||||
found:
|
found:
|
||||||
set_state_private(io_tree, start, csum);
|
set_state_private(io_tree, start, csum);
|
||||||
out:
|
out:
|
||||||
mutex_unlock(&BTRFS_I(inode)->csum_mutex);
|
|
||||||
if (path)
|
if (path)
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -1375,7 +1377,7 @@ again:
|
||||||
}
|
}
|
||||||
if (!PageUptodate(page)) {
|
if (!PageUptodate(page)) {
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
goto out;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wait_on_page_writeback(page);
|
wait_on_page_writeback(page);
|
||||||
|
@ -1406,6 +1408,7 @@ again:
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
|
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
page_cache_release(page);
|
page_cache_release(page);
|
||||||
out:
|
out:
|
||||||
|
|
|
@ -557,6 +557,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
mutex_unlock(&tree->mutex);
|
mutex_unlock(&tree->mutex);
|
||||||
|
btrfs_put_ordered_extent(ordered);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче