btrfs: split the bio submission path into a separate file

The code used by btrfs_submit_bio only interacts with the rest of
volumes.c through __btrfs_map_block (which itself is a more generic
version of two exported helpers) and does not really have anything
to do with volumes.c.  Create a new bio.c file and a bio.h header
going along with it for the btrfs_bio-based storage layer, which
will grow even more going forward.

Also update the file with my copyright notice given that a large
part of the moved code was written or rewritten by me.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Christoph Hellwig 2022-11-15 10:44:05 +01:00 коммит произвёл David Sterba
Родитель 27137fac4c
Коммит 103c19723c
14 изменённых файлов: 438 добавлений и 403 удалений

Просмотреть файл

@ -31,7 +31,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o
subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

291
fs/btrfs/bio.c Normal file
Просмотреть файл

@ -0,0 +1,291 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2007 Oracle. All rights reserved.
* Copyright (C) 2022 Christoph Hellwig.
*/
#include <linux/bio.h>
#include "bio.h"
#include "ctree.h"
#include "volumes.h"
#include "raid56.h"
#include "async-thread.h"
#include "check-integrity.h"
#include "dev-replace.h"
#include "rcu-string.h"
#include "zoned.h"
static struct bio_set btrfs_bioset;
/*
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
*/
static inline void btrfs_bio_init(struct btrfs_bio *bbio,
btrfs_bio_end_io_t end_io, void *private)
{
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
bbio->end_io = end_io;
bbio->private = private;
}
/*
* Allocate a btrfs_bio structure. The btrfs_bio is the main I/O container for
* btrfs, and is used for all I/O submitted through btrfs_submit_bio.
*
* Just like the underlying bio_alloc_bioset it will not fail as it is backed by
* a mempool.
*/
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
btrfs_bio_end_io_t end_io, void *private)
{
struct bio *bio;
bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
btrfs_bio_init(btrfs_bio(bio), end_io, private);
return bio;
}
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
btrfs_bio_end_io_t end_io, void *private)
{
struct bio *bio;
struct btrfs_bio *bbio;
ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
bbio = btrfs_bio(bio);
btrfs_bio_init(bbio, end_io, private);
bio_trim(bio, offset >> 9, size >> 9);
bbio->iter = bio->bi_iter;
return bio;
}
static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
{
if (!dev || !dev->bdev)
return;
if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET)
return;
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
}
static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info,
struct bio *bio)
{
if (bio->bi_opf & REQ_META)
return fs_info->endio_meta_workers;
return fs_info->endio_workers;
}
static void btrfs_end_bio_work(struct work_struct *work)
{
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
bbio->end_io(bbio);
}
static void btrfs_simple_end_io(struct bio *bio)
{
struct btrfs_fs_info *fs_info = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(fs_info);
if (bio->bi_status)
btrfs_log_dev_io_error(bio, bbio->device);
if (bio_op(bio) == REQ_OP_READ) {
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
bbio->end_io(bbio);
}
}
static void btrfs_raid56_end_io(struct bio *bio)
{
struct btrfs_io_context *bioc = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
bbio->end_io(bbio);
btrfs_put_bioc(bioc);
}
static void btrfs_orig_write_end_io(struct bio *bio)
{
struct btrfs_io_stripe *stripe = bio->bi_private;
struct btrfs_io_context *bioc = stripe->bioc;
struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(bioc->fs_info);
if (bio->bi_status) {
atomic_inc(&bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
}
/*
* Only send an error to the higher layers if it is beyond the tolerance
* threshold.
*/
if (atomic_read(&bioc->error) > bioc->max_errors)
bio->bi_status = BLK_STS_IOERR;
else
bio->bi_status = BLK_STS_OK;
bbio->end_io(bbio);
btrfs_put_bioc(bioc);
}
static void btrfs_clone_write_end_io(struct bio *bio)
{
struct btrfs_io_stripe *stripe = bio->bi_private;
if (bio->bi_status) {
atomic_inc(&stripe->bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
}
/* Pass on control to the original bio this one was cloned from */
bio_endio(stripe->bioc->orig_bio);
bio_put(bio);
}
static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
{
if (!dev || !dev->bdev ||
test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
(btrfs_op(bio) == BTRFS_MAP_WRITE &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
bio_io_error(bio);
return;
}
bio_set_dev(bio, dev->bdev);
/*
* For zone append writing, bi_sector must point the beginning of the
* zone
*/
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
if (btrfs_dev_is_sequential(dev, physical)) {
u64 zone_start = round_down(physical,
dev->fs_info->zone_size);
bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
} else {
bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
bio->bi_opf |= REQ_OP_WRITE;
}
}
btrfs_debug_in_rcu(dev->fs_info,
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
__func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
(unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
dev->devid, bio->bi_iter.bi_size);
btrfsic_check_bio(bio);
submit_bio(bio);
}
static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
{
struct bio *orig_bio = bioc->orig_bio, *bio;
ASSERT(bio_op(orig_bio) != REQ_OP_READ);
/* Reuse the bio embedded into the btrfs_bio for the last mirror */
if (dev_nr == bioc->num_stripes - 1) {
bio = orig_bio;
bio->bi_end_io = btrfs_orig_write_end_io;
} else {
bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
bio_inc_remaining(orig_bio);
bio->bi_end_io = btrfs_clone_write_end_io;
}
bio->bi_private = &bioc->stripes[dev_nr];
bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
bioc->stripes[dev_nr].bioc = bioc;
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
}
void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num)
{
u64 logical = bio->bi_iter.bi_sector << 9;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
struct btrfs_io_context *bioc = NULL;
struct btrfs_io_stripe smap;
int ret;
btrfs_bio_counter_inc_blocked(fs_info);
ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
&bioc, &smap, &mirror_num, 1);
if (ret) {
btrfs_bio_counter_dec(fs_info);
btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
return;
}
if (map_length < length) {
btrfs_crit(fs_info,
"mapping failed logical %llu bio len %llu len %llu",
logical, length, map_length);
BUG();
}
if (!bioc) {
/* Single mirror read/write fast path */
btrfs_bio(bio)->mirror_num = mirror_num;
btrfs_bio(bio)->device = smap.dev;
bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
bio->bi_private = fs_info;
bio->bi_end_io = btrfs_simple_end_io;
btrfs_submit_dev_bio(smap.dev, bio);
} else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
/* Parity RAID write or read recovery */
bio->bi_private = bioc;
bio->bi_end_io = btrfs_raid56_end_io;
if (bio_op(bio) == REQ_OP_READ)
raid56_parity_recover(bio, bioc, mirror_num);
else
raid56_parity_write(bio, bioc);
} else {
/* Write to multiple mirrors */
int total_devs = bioc->num_stripes;
int dev_nr;
bioc->orig_bio = bio;
for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
btrfs_submit_mirrored_bio(bioc, dev_nr);
}
}
int __init btrfs_bioset_init(void)
{
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
offsetof(struct btrfs_bio, bio),
BIOSET_NEED_BVECS))
return -ENOMEM;
return 0;
}
void __cold btrfs_bioset_exit(void)
{
bioset_exit(&btrfs_bioset);
}

127
fs/btrfs/bio.h Normal file
Просмотреть файл

@ -0,0 +1,127 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2007 Oracle. All rights reserved.
* Copyright (C) 2022 Christoph Hellwig.
*/
#ifndef BTRFS_BIO_H
#define BTRFS_BIO_H
#include <linux/bio.h>
#include <linux/workqueue.h>
#include "tree-checker.h"
struct btrfs_bio;
struct btrfs_fs_info;
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
/*
* Maximum number of sectors for a single bio to limit the size of the
* checksum array. This matches the number of bio_vecs per bio and thus the
* I/O size for buffered I/O.
*/
#define BTRFS_MAX_BIO_SECTORS (256)
typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
/*
* Additional info to pass along bio.
*
* Mostly for btrfs specific features like csum and mirror_num.
*/
struct btrfs_bio {
unsigned int mirror_num:7;
/*
* Extra indicator for metadata bios.
* For some btrfs bios they use pages without a mapping, thus
* we can not rely on page->mapping->host to determine if
* it's a metadata bio.
*/
unsigned int is_metadata:1;
struct bvec_iter iter;
/* for direct I/O */
u64 file_offset;
/* @device is for stripe IO submission. */
struct btrfs_device *device;
union {
/* For data checksum verification. */
struct {
u8 *csum;
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
};
/* For metadata parentness verification. */
struct btrfs_tree_parent_check parent_check;
};
/* End I/O information supplied to btrfs_bio_alloc */
btrfs_bio_end_io_t end_io;
void *private;
/* For read end I/O handling */
struct work_struct end_io_work;
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_bio but relies on bio being last.
*/
struct bio bio;
};
static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
{
return container_of(bio, struct btrfs_bio, bio);
}
int __init btrfs_bioset_init(void);
void __cold btrfs_bioset_exit(void);
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
btrfs_bio_end_io_t end_io, void *private);
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
btrfs_bio_end_io_t end_io, void *private);
static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
bbio->end_io(bbio);
}
static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
{
if (bbio->is_metadata)
return;
if (bbio->csum != bbio->csum_inline) {
kfree(bbio->csum);
bbio->csum = NULL;
}
}
/*
* Iterate through a btrfs_bio (@bbio) on a per-sector basis.
*
* bvl - struct bio_vec
* bbio - struct btrfs_bio
* iters - struct bvec_iter
* bio_offset - unsigned int
*/
#define btrfs_bio_for_each_sector(fs_info, bvl, bbio, iter, bio_offset) \
for ((iter) = (bbio)->iter, (bio_offset) = 0; \
(iter).bi_size && \
(((bvl) = bio_iter_iovec((&(bbio)->bio), (iter))), 1); \
(bio_offset) += fs_info->sectorsize, \
bio_advance_iter_single(&(bbio)->bio, &(iter), \
(fs_info)->sectorsize))
void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num);
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num);
#endif

Просмотреть файл

@ -27,7 +27,7 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "bio.h"
#include "ordered-data.h"
#include "compression.h"
#include "extent_io.h"

Просмотреть файл

@ -23,7 +23,7 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "bio.h"
#include "print-tree.h"
#include "locking.h"
#include "tree-log.h"

Просмотреть файл

@ -42,6 +42,7 @@
#include "root-tree.h"
#include "file-item.h"
#include "orphan.h"
#include "tree-checker.h"
#undef SCRAMBLE_DELAYED_REFS

Просмотреть файл

@ -20,7 +20,7 @@
#include "extent_map.h"
#include "ctree.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "bio.h"
#include "check-integrity.h"
#include "locking.h"
#include "rcu-string.h"

Просмотреть файл

@ -14,7 +14,7 @@
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "volumes.h"
#include "bio.h"
#include "print-tree.h"
#include "compression.h"
#include "fs.h"

Просмотреть файл

@ -43,7 +43,7 @@
#include "ordered-data.h"
#include "xattr.h"
#include "tree-log.h"
#include "volumes.h"
#include "bio.h"
#include "compression.h"
#include "locking.h"
#include "free-space-cache.h"

Просмотреть файл

@ -35,6 +35,7 @@
#include "file-item.h"
#include "relocation.h"
#include "super.h"
#include "tree-checker.h"
/*
* Relocation overview

Просмотреть файл

@ -35,7 +35,7 @@
#include "print-tree.h"
#include "props.h"
#include "xattr.h"
#include "volumes.h"
#include "bio.h"
#include "export.h"
#include "compression.h"
#include "rcu-string.h"

Просмотреть файл

@ -29,6 +29,7 @@
#include "file-item.h"
#include "file.h"
#include "orphan.h"
#include "tree-checker.h"
#define MAX_CONFLICT_INODES 10

Просмотреть файл

@ -5,12 +5,9 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/semaphore.h>
#include <linux/uuid.h>
#include <linux/list_sort.h>
@ -23,8 +20,6 @@
#include "print-tree.h"
#include "volumes.h"
#include "raid56.h"
#include "async-thread.h"
#include "check-integrity.h"
#include "rcu-string.h"
#include "dev-replace.h"
#include "sysfs.h"
@ -41,8 +36,6 @@
#include "scrub.h"
#include "super.h"
static struct bio_set btrfs_bioset;
#define BTRFS_BLOCK_GROUP_STRIPE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID10 | \
BTRFS_BLOCK_GROUP_RAID56_MASK)
@ -255,11 +248,6 @@ out_overflow:;
static int init_first_rw_device(struct btrfs_trans_handle *trans);
static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
enum btrfs_map_op op, u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
struct btrfs_io_stripe *smap,
int *mirror_num_ret, int need_raid_map);
/*
* Device locking
@ -6364,11 +6352,11 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *
stripe_offset + stripe_nr * map->stripe_len;
}
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
enum btrfs_map_op op, u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
struct btrfs_io_stripe *smap,
int *mirror_num_ret, int need_raid_map)
int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
struct btrfs_io_stripe *smap, int *mirror_num_ret,
int need_raid_map)
{
struct extent_map *em;
struct map_lookup *map;
@ -6651,266 +6639,6 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
NULL, NULL, 1);
}
/*
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
*/
static inline void btrfs_bio_init(struct btrfs_bio *bbio,
btrfs_bio_end_io_t end_io, void *private)
{
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
bbio->end_io = end_io;
bbio->private = private;
}
/*
* Allocate a btrfs_bio structure. The btrfs_bio is the main I/O container for
* btrfs, and is used for all I/O submitted through btrfs_submit_bio.
*
* Just like the underlying bio_alloc_bioset it will not fail as it is backed by
* a mempool.
*/
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
btrfs_bio_end_io_t end_io, void *private)
{
struct bio *bio;
bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
btrfs_bio_init(btrfs_bio(bio), end_io, private);
return bio;
}
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
btrfs_bio_end_io_t end_io, void *private)
{
struct bio *bio;
struct btrfs_bio *bbio;
ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
bbio = btrfs_bio(bio);
btrfs_bio_init(bbio, end_io, private);
bio_trim(bio, offset >> 9, size >> 9);
bbio->iter = bio->bi_iter;
return bio;
}
static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
{
if (!dev || !dev->bdev)
return;
if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET)
return;
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
}
static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info,
struct bio *bio)
{
if (bio->bi_opf & REQ_META)
return fs_info->endio_meta_workers;
return fs_info->endio_workers;
}
static void btrfs_end_bio_work(struct work_struct *work)
{
struct btrfs_bio *bbio =
container_of(work, struct btrfs_bio, end_io_work);
bbio->end_io(bbio);
}
static void btrfs_simple_end_io(struct bio *bio)
{
struct btrfs_fs_info *fs_info = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(fs_info);
if (bio->bi_status)
btrfs_log_dev_io_error(bio, bbio->device);
if (bio_op(bio) == REQ_OP_READ) {
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
bbio->end_io(bbio);
}
}
static void btrfs_raid56_end_io(struct bio *bio)
{
struct btrfs_io_context *bioc = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
bbio->end_io(bbio);
btrfs_put_bioc(bioc);
}
static void btrfs_orig_write_end_io(struct bio *bio)
{
struct btrfs_io_stripe *stripe = bio->bi_private;
struct btrfs_io_context *bioc = stripe->bioc;
struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(bioc->fs_info);
if (bio->bi_status) {
atomic_inc(&bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
}
/*
* Only send an error to the higher layers if it is beyond the tolerance
* threshold.
*/
if (atomic_read(&bioc->error) > bioc->max_errors)
bio->bi_status = BLK_STS_IOERR;
else
bio->bi_status = BLK_STS_OK;
bbio->end_io(bbio);
btrfs_put_bioc(bioc);
}
static void btrfs_clone_write_end_io(struct bio *bio)
{
struct btrfs_io_stripe *stripe = bio->bi_private;
if (bio->bi_status) {
atomic_inc(&stripe->bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
}
/* Pass on control to the original bio this one was cloned from */
bio_endio(stripe->bioc->orig_bio);
bio_put(bio);
}
static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
{
if (!dev || !dev->bdev ||
test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
(btrfs_op(bio) == BTRFS_MAP_WRITE &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
bio_io_error(bio);
return;
}
bio_set_dev(bio, dev->bdev);
/*
* For zone append writing, bi_sector must point the beginning of the
* zone
*/
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
if (btrfs_dev_is_sequential(dev, physical)) {
u64 zone_start = round_down(physical,
dev->fs_info->zone_size);
bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
} else {
bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
bio->bi_opf |= REQ_OP_WRITE;
}
}
btrfs_debug_in_rcu(dev->fs_info,
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
__func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
(unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
dev->devid, bio->bi_iter.bi_size);
btrfsic_check_bio(bio);
submit_bio(bio);
}
static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
{
struct bio *orig_bio = bioc->orig_bio, *bio;
ASSERT(bio_op(orig_bio) != REQ_OP_READ);
/* Reuse the bio embedded into the btrfs_bio for the last mirror */
if (dev_nr == bioc->num_stripes - 1) {
bio = orig_bio;
bio->bi_end_io = btrfs_orig_write_end_io;
} else {
bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
bio_inc_remaining(orig_bio);
bio->bi_end_io = btrfs_clone_write_end_io;
}
bio->bi_private = &bioc->stripes[dev_nr];
bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
bioc->stripes[dev_nr].bioc = bioc;
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
}
void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num)
{
u64 logical = bio->bi_iter.bi_sector << 9;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
struct btrfs_io_context *bioc = NULL;
struct btrfs_io_stripe smap;
int ret;
btrfs_bio_counter_inc_blocked(fs_info);
ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
&bioc, &smap, &mirror_num, 1);
if (ret) {
btrfs_bio_counter_dec(fs_info);
btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
return;
}
if (map_length < length) {
btrfs_crit(fs_info,
"mapping failed logical %llu bio len %llu len %llu",
logical, length, map_length);
BUG();
}
if (!bioc) {
/* Single mirror read/write fast path */
btrfs_bio(bio)->mirror_num = mirror_num;
btrfs_bio(bio)->device = smap.dev;
bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
bio->bi_private = fs_info;
bio->bi_end_io = btrfs_simple_end_io;
btrfs_submit_dev_bio(smap.dev, bio);
} else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
/* Parity RAID write or read recovery */
bio->bi_private = bioc;
bio->bi_end_io = btrfs_raid56_end_io;
if (bio_op(bio) == REQ_OP_READ)
raid56_parity_recover(bio, bioc, mirror_num);
else
raid56_parity_write(bio, bioc);
} else {
/* Write to multiple mirrors */
int total_devs = bioc->num_stripes;
int dev_nr;
bioc->orig_bio = bio;
for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
btrfs_submit_mirrored_bio(bioc, dev_nr);
}
}
static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
const struct btrfs_fs_devices *fs_devices)
{
@ -8440,17 +8168,3 @@ bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
return true;
}
int __init btrfs_bioset_init(void)
{
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
offsetof(struct btrfs_bio, bio),
BIOSET_NEED_BVECS))
return -ENOMEM;
return 0;
}
void __cold btrfs_bioset_exit(void)
{
bioset_exit(&btrfs_bioset);
}

Просмотреть файл

@ -6,7 +6,6 @@
#ifndef BTRFS_VOLUMES_H
#define BTRFS_VOLUMES_H
#include <linux/bio.h>
#include <linux/sort.h>
#include <linux/btrfs.h>
#include "async-thread.h"
@ -373,8 +372,6 @@ struct btrfs_fs_devices {
enum btrfs_read_policy read_policy;
};
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
- sizeof(struct btrfs_chunk)) \
/ sizeof(struct btrfs_stripe) + 1)
@ -384,107 +381,6 @@ struct btrfs_fs_devices {
- 2 * sizeof(struct btrfs_chunk)) \
/ sizeof(struct btrfs_stripe) + 1)
/*
* Maximum number of sectors for a single bio to limit the size of the
* checksum array. This matches the number of bio_vecs per bio and thus the
* I/O size for buffered I/O.
*/
#define BTRFS_MAX_BIO_SECTORS (256)
typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
/*
* Additional info to pass along bio.
*
* Mostly for btrfs specific features like csum and mirror_num.
*/
struct btrfs_bio {
unsigned int mirror_num:7;
/*
* Extra indicator for metadata bios.
* For some btrfs bios they use pages without a mapping, thus
* we can not rely on page->mapping->host to determine if
* it's a metadata bio.
*/
unsigned int is_metadata:1;
struct bvec_iter iter;
/* for direct I/O */
u64 file_offset;
/* @device is for stripe IO submission. */
struct btrfs_device *device;
union {
/* For data checksum verification. */
struct {
u8 *csum;
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
};
/* For metadata parentness verification. */
struct btrfs_tree_parent_check parent_check;
};
/* End I/O information supplied to btrfs_bio_alloc */
btrfs_bio_end_io_t end_io;
void *private;
/* For read end I/O handling */
struct work_struct end_io_work;
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_bio but relies on bio being last.
*/
struct bio bio;
};
static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
{
return container_of(bio, struct btrfs_bio, bio);
}
int __init btrfs_bioset_init(void);
void __cold btrfs_bioset_exit(void);
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
btrfs_bio_end_io_t end_io, void *private);
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
btrfs_bio_end_io_t end_io, void *private);
static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
bbio->end_io(bbio);
}
static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
{
if (bbio->is_metadata)
return;
if (bbio->csum != bbio->csum_inline) {
kfree(bbio->csum);
bbio->csum = NULL;
}
}
/*
* Iterate through a btrfs_bio (@bbio) on a per-sector basis.
*
* bvl - struct bio_vec
* bbio - struct btrfs_bio
* iters - struct bvec_iter
* bio_offset - unsigned int
*/
#define btrfs_bio_for_each_sector(fs_info, bvl, bbio, iter, bio_offset) \
for ((iter) = (bbio)->iter, (bio_offset) = 0; \
(iter).bi_size && \
(((bvl) = bio_iter_iovec((&(bbio)->bio), (iter))), 1); \
(bio_offset) += fs_info->sectorsize, \
bio_advance_iter_single(&(bbio)->bio, &(iter), \
(fs_info)->sectorsize))
struct btrfs_io_stripe {
struct btrfs_device *dev;
union {
@ -641,6 +537,11 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret);
int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
struct btrfs_io_stripe *smap, int *mirror_num_ret,
int need_raid_map);
struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
u64 logical, u64 *length_ret,
u32 *num_stripes);
@ -652,7 +553,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
u64 type);
void btrfs_mapping_tree_free(struct extent_map_tree *tree);
void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder);
struct btrfs_device *btrfs_scan_one_device(const char *path,