btrfs: use bios instead of buffer_heads from super block writeout

Similar to the superblock read path, change the write path to using bios
and pages instead of buffer_heads. This allows us to skip over the
buffer_head code, for writing the superblock to disk.

This is based on a patch originally authored by Nikolay Borisov.

Co-developed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Johannes Thumshirn 2020-02-14 00:24:33 +09:00 коммит произвёл David Sterba
Родитель 8f32380d3f
Коммит 314b6dd0ee
1 изменённых файлов: 73 добавлений и 54 удалений

Просмотреть файл

@ -7,7 +7,6 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/radix-tree.h> #include <linux/radix-tree.h>
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/slab.h> #include <linux/slab.h>
@ -3395,25 +3394,34 @@ fail:
} }
ALLOW_ERROR_INJECTION(open_ctree, ERRNO); ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) static void btrfs_end_super_write(struct bio *bio)
{ {
if (uptodate) { struct btrfs_device *device = bio->bi_private;
set_buffer_uptodate(bh); struct bio_vec *bvec;
} else { struct bvec_iter_all iter_all;
struct btrfs_device *device = (struct btrfs_device *) struct page *page;
bh->b_private;
btrfs_warn_rl_in_rcu(device->fs_info, bio_for_each_segment_all(bvec, bio, iter_all) {
"lost page write due to IO error on %s", page = bvec->bv_page;
rcu_str_deref(device->name));
/* note, we don't set_buffer_write_io_error because we have if (bio->bi_status) {
* our own ways of dealing with the IO errors btrfs_warn_rl_in_rcu(device->fs_info,
*/ "lost page write due to IO error on %s (%d)",
clear_buffer_uptodate(bh); rcu_str_deref(device->name),
btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS); blk_status_to_errno(bio->bi_status));
ClearPageUptodate(page);
SetPageError(page);
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_WRITE_ERRS);
} else {
SetPageUptodate(page);
}
put_page(page);
unlock_page(page);
} }
unlock_buffer(bh);
put_bh(bh); bio_put(bio);
} }
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
@ -3473,25 +3481,23 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
/* /*
* Write superblock @sb to the @device. Do not wait for completion, all the * Write superblock @sb to the @device. Do not wait for completion, all the
* buffer heads we write are pinned. * pages we use for writing are locked.
* *
* Write @max_mirrors copies of the superblock, where 0 means default that fit * Write @max_mirrors copies of the superblock, where 0 means default that fit
* the expected device size at commit time. Note that max_mirrors must be * the expected device size at commit time. Note that max_mirrors must be
* same for write and wait phases. * same for write and wait phases.
* *
* Return number of errors when buffer head is not found or submission fails. * Return number of errors when page is not found or submission fails.
*/ */
static int write_dev_supers(struct btrfs_device *device, static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors) struct btrfs_super_block *sb, int max_mirrors)
{ {
struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_fs_info *fs_info = device->fs_info;
struct address_space *mapping = device->bdev->bd_inode->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct buffer_head *bh;
int i; int i;
int ret;
int errors = 0; int errors = 0;
u64 bytenr; u64 bytenr;
int op_flags;
if (max_mirrors == 0) if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX; max_mirrors = BTRFS_SUPER_MIRROR_MAX;
@ -3499,6 +3505,10 @@ static int write_dev_supers(struct btrfs_device *device,
shash->tfm = fs_info->csum_shash; shash->tfm = fs_info->csum_shash;
for (i = 0; i < max_mirrors; i++) { for (i = 0; i < max_mirrors; i++) {
struct page *page;
struct bio *bio;
struct btrfs_super_block *disk_super;
bytenr = btrfs_sb_offset(i); bytenr = btrfs_sb_offset(i);
if (bytenr + BTRFS_SUPER_INFO_SIZE >= if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes) device->commit_total_bytes)
@ -3511,37 +3521,45 @@ static int write_dev_supers(struct btrfs_device *device,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
crypto_shash_final(shash, sb->csum); crypto_shash_final(shash, sb->csum);
/* One reference for us, and we leave it for the caller */ page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, GFP_NOFS);
BTRFS_SUPER_INFO_SIZE); if (!page) {
if (!bh) {
btrfs_err(device->fs_info, btrfs_err(device->fs_info,
"couldn't get super buffer head for bytenr %llu", "couldn't get super block page for bytenr %llu",
bytenr); bytenr);
errors++; errors++;
continue; continue;
} }
memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); /* Bump the refcount for wait_dev_supers() */
get_page(page);
/* one reference for submit_bh */ disk_super = page_address(page);
get_bh(bh); memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
set_buffer_uptodate(bh);
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_write_sync;
bh->b_private = device;
/* /*
* we fua the first super. The others we allow * Directly use bios here instead of relying on the page cache
* to go down lazy. * to do I/O, so we don't lose the ability to do integrity
* checking.
*/ */
op_flags = REQ_SYNC | REQ_META | REQ_PRIO; bio = bio_alloc(GFP_NOFS, 1);
bio_set_dev(bio, device->bdev);
bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
bio->bi_private = device;
bio->bi_end_io = btrfs_end_super_write;
__bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
offset_in_page(bytenr));
/*
* We FUA only the first super block. The others we allow to
* go down lazy and there's a short window where the on-disk
* copies might still contain the older version.
*/
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO;
if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER)) if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
op_flags |= REQ_FUA; bio->bi_opf |= REQ_FUA;
ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
if (ret) btrfsic_submit_bio(bio);
errors++;
} }
return errors < i ? 0 : -1; return errors < i ? 0 : -1;
} }
@ -3550,12 +3568,11 @@ static int write_dev_supers(struct btrfs_device *device,
* Wait for write completion of superblocks done by write_dev_supers, * Wait for write completion of superblocks done by write_dev_supers,
* @max_mirrors same for write and wait phases. * @max_mirrors same for write and wait phases.
* *
* Return number of errors when buffer head is not found or not marked up to * Return number of errors when page is not found or not marked up to
* date. * date.
*/ */
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
{ {
struct buffer_head *bh;
int i; int i;
int errors = 0; int errors = 0;
bool primary_failed = false; bool primary_failed = false;
@ -3565,32 +3582,34 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
max_mirrors = BTRFS_SUPER_MIRROR_MAX; max_mirrors = BTRFS_SUPER_MIRROR_MAX;
for (i = 0; i < max_mirrors; i++) { for (i = 0; i < max_mirrors; i++) {
struct page *page;
bytenr = btrfs_sb_offset(i); bytenr = btrfs_sb_offset(i);
if (bytenr + BTRFS_SUPER_INFO_SIZE >= if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes) device->commit_total_bytes)
break; break;
bh = __find_get_block(device->bdev, page = find_get_page(device->bdev->bd_inode->i_mapping,
bytenr / BTRFS_BDEV_BLOCKSIZE, bytenr >> PAGE_SHIFT);
BTRFS_SUPER_INFO_SIZE); if (!page) {
if (!bh) {
errors++; errors++;
if (i == 0) if (i == 0)
primary_failed = true; primary_failed = true;
continue; continue;
} }
wait_on_buffer(bh); /* Page is submitted locked and unlocked once the IO completes */
if (!buffer_uptodate(bh)) { wait_on_page_locked(page);
if (PageError(page)) {
errors++; errors++;
if (i == 0) if (i == 0)
primary_failed = true; primary_failed = true;
} }
/* drop our reference */ /* Drop our reference */
brelse(bh); put_page(page);
/* drop the reference from the writing run */ /* Drop the reference from the writing run */
brelse(bh); put_page(page);
} }
/* log error, force error return */ /* log error, force error return */