Btrfs: scrub: batch rebuild for raid56
In case of raid56, writes and rebuilds always take BTRFS_STRIPE_LEN(64K) as unit, however, scrub_extent() sets blocksize as unit, so rebuild process may be triggered on every block on a same stripe. A typical example would be that when we're replacing a disappeared disk, all reads on the disks get -EIO, every block (size is 4K if blocksize is 4K) would go thru these, scrub_handle_errored_block scrub_recheck_block # re-read pages one by one scrub_recheck_block # rebuild by calling raid56_parity_recover() page by page Although with raid56 stripe cache most of reads during rebuild can be avoided, the parity recover calculation(xor or raid6 algorithms) needs to be done $(BTRFS_STRIPE_LEN / blocksize) times. This makes it smarter by doing raid56 scrub/replace on stripe length. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Родитель
416a72022e
Коммит
6ca1765b36
|
@ -1727,6 +1727,45 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
|
|||
return blk_status_to_errno(bio->bi_status);
|
||||
}
|
||||
|
||||
static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
|
||||
struct scrub_block *sblock)
|
||||
{
|
||||
struct scrub_page *first_page = sblock->pagev[0];
|
||||
struct bio *bio;
|
||||
int page_num;
|
||||
|
||||
/* All pages in sblock belong to the same stripe on the same device. */
|
||||
ASSERT(first_page->dev);
|
||||
if (!first_page->dev->bdev)
|
||||
goto out;
|
||||
|
||||
bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
|
||||
bio_set_dev(bio, first_page->dev->bdev);
|
||||
|
||||
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
||||
struct scrub_page *page = sblock->pagev[page_num];
|
||||
|
||||
WARN_ON(!page->page);
|
||||
bio_add_page(bio, page->page, PAGE_SIZE, 0);
|
||||
}
|
||||
|
||||
if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
|
||||
bio_put(bio);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bio_put(bio);
|
||||
|
||||
scrub_recheck_block_checksum(sblock);
|
||||
|
||||
return;
|
||||
out:
|
||||
for (page_num = 0; page_num < sblock->page_count; page_num++)
|
||||
sblock->pagev[page_num]->io_error = 1;
|
||||
|
||||
sblock->no_io_error_seen = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* this function will check the on disk data for checksum errors, header
|
||||
* errors and read I/O errors. If any I/O errors happen, the exact pages
|
||||
|
@ -1742,6 +1781,10 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
|||
|
||||
sblock->no_io_error_seen = 1;
|
||||
|
||||
/* short cut for raid56 */
|
||||
if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
|
||||
return scrub_recheck_block_on_raid56(fs_info, sblock);
|
||||
|
||||
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
||||
struct bio *bio;
|
||||
struct scrub_page *page = sblock->pagev[page_num];
|
||||
|
@ -1757,19 +1800,12 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
|||
bio_set_dev(bio, page->dev->bdev);
|
||||
|
||||
bio_add_page(bio, page->page, PAGE_SIZE, 0);
|
||||
if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
|
||||
if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) {
|
||||
page->io_error = 1;
|
||||
sblock->no_io_error_seen = 0;
|
||||
}
|
||||
} else {
|
||||
bio->bi_iter.bi_sector = page->physical >> 9;
|
||||
bio_set_op_attrs(bio, REQ_OP_READ, 0);
|
||||
bio->bi_iter.bi_sector = page->physical >> 9;
|
||||
bio->bi_opf = REQ_OP_READ;
|
||||
|
||||
if (btrfsic_submit_bio_wait(bio)) {
|
||||
page->io_error = 1;
|
||||
sblock->no_io_error_seen = 0;
|
||||
}
|
||||
if (btrfsic_submit_bio_wait(bio)) {
|
||||
page->io_error = 1;
|
||||
sblock->no_io_error_seen = 0;
|
||||
}
|
||||
|
||||
bio_put(bio);
|
||||
|
@ -2737,7 +2773,8 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
|
|||
}
|
||||
|
||||
/* scrub extent tries to collect up to 64 kB for each bio */
|
||||
static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
|
||||
static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
|
||||
u64 logical, u64 len,
|
||||
u64 physical, struct btrfs_device *dev, u64 flags,
|
||||
u64 gen, int mirror_num, u64 physical_for_dev_replace)
|
||||
{
|
||||
|
@ -2746,13 +2783,19 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
|
|||
u32 blocksize;
|
||||
|
||||
if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
||||
blocksize = sctx->fs_info->sectorsize;
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
|
||||
blocksize = map->stripe_len;
|
||||
else
|
||||
blocksize = sctx->fs_info->sectorsize;
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.data_extents_scrubbed++;
|
||||
sctx->stat.data_bytes_scrubbed += len;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
|
||||
blocksize = sctx->fs_info->nodesize;
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
|
||||
blocksize = map->stripe_len;
|
||||
else
|
||||
blocksize = sctx->fs_info->nodesize;
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.tree_extents_scrubbed++;
|
||||
sctx->stat.tree_bytes_scrubbed += len;
|
||||
|
@ -2892,9 +2935,9 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
|
|||
}
|
||||
|
||||
if (flags & BTRFS_EXTENT_FLAG_DATA) {
|
||||
blocksize = sctx->fs_info->sectorsize;
|
||||
blocksize = sparity->stripe_len;
|
||||
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
|
||||
blocksize = sctx->fs_info->nodesize;
|
||||
blocksize = sparity->stripe_len;
|
||||
} else {
|
||||
blocksize = sctx->fs_info->sectorsize;
|
||||
WARN_ON(1);
|
||||
|
@ -3604,7 +3647,7 @@ again:
|
|||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scrub_extent(sctx, extent_logical, extent_len,
|
||||
ret = scrub_extent(sctx, map, extent_logical, extent_len,
|
||||
extent_physical, extent_dev, flags,
|
||||
generation, extent_mirror_num,
|
||||
extent_logical - logical + physical);
|
||||
|
|
Загрузка…
Ссылка в новой задаче