|
|
|
@ -91,7 +91,7 @@ static inline struct r10bio *get_resync_r10bio(struct bio *bio)
|
|
|
|
|
static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
|
|
|
|
|
{
|
|
|
|
|
struct r10conf *conf = data;
|
|
|
|
|
int size = offsetof(struct r10bio, devs[conf->geo.raid_disks]);
|
|
|
|
|
int size = offsetof(struct r10bio, devs[conf->copies]);
|
|
|
|
|
|
|
|
|
|
/* allocate a r10bio with room for raid_disks entries in the
|
|
|
|
|
* bios array */
|
|
|
|
@ -238,7 +238,7 @@ static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < conf->geo.raid_disks; i++) {
|
|
|
|
|
for (i = 0; i < conf->copies; i++) {
|
|
|
|
|
struct bio **bio = & r10_bio->devs[i].bio;
|
|
|
|
|
if (!BIO_SPECIAL(*bio))
|
|
|
|
|
bio_put(*bio);
|
|
|
|
@ -327,7 +327,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
|
|
|
|
|
int slot;
|
|
|
|
|
int repl = 0;
|
|
|
|
|
|
|
|
|
|
for (slot = 0; slot < conf->geo.raid_disks; slot++) {
|
|
|
|
|
for (slot = 0; slot < conf->copies; slot++) {
|
|
|
|
|
if (r10_bio->devs[slot].bio == bio)
|
|
|
|
|
break;
|
|
|
|
|
if (r10_bio->devs[slot].repl_bio == bio) {
|
|
|
|
@ -336,6 +336,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BUG_ON(slot == conf->copies);
|
|
|
|
|
update_head_pos(slot, r10_bio);
|
|
|
|
|
|
|
|
|
|
if (slotp)
|
|
|
|
@ -1275,75 +1276,12 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
struct r10conf *conf = mddev->private;
|
|
|
|
|
struct md_rdev *blocked_rdev;
|
|
|
|
|
|
|
|
|
|
retry_wait:
|
|
|
|
|
blocked_rdev = NULL;
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
for (i = 0; i < conf->copies; i++) {
|
|
|
|
|
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
|
|
|
|
struct md_rdev *rrdev = rcu_dereference(
|
|
|
|
|
conf->mirrors[i].replacement);
|
|
|
|
|
if (rdev == rrdev)
|
|
|
|
|
rrdev = NULL;
|
|
|
|
|
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
|
|
|
|
|
atomic_inc(&rdev->nr_pending);
|
|
|
|
|
blocked_rdev = rdev;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
|
|
|
|
|
atomic_inc(&rrdev->nr_pending);
|
|
|
|
|
blocked_rdev = rrdev;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
|
|
|
|
|
sector_t first_bad;
|
|
|
|
|
sector_t dev_sector = r10_bio->devs[i].addr;
|
|
|
|
|
int bad_sectors;
|
|
|
|
|
int is_bad;
|
|
|
|
|
|
|
|
|
|
/* Discard request doesn't care the write result
|
|
|
|
|
* so it doesn't need to wait blocked disk here.
|
|
|
|
|
*/
|
|
|
|
|
if (!r10_bio->sectors)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors,
|
|
|
|
|
&first_bad, &bad_sectors);
|
|
|
|
|
if (is_bad < 0) {
|
|
|
|
|
/* Mustn't write here until the bad block
|
|
|
|
|
* is acknowledged
|
|
|
|
|
*/
|
|
|
|
|
atomic_inc(&rdev->nr_pending);
|
|
|
|
|
set_bit(BlockedBadBlocks, &rdev->flags);
|
|
|
|
|
blocked_rdev = rdev;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
|
|
if (unlikely(blocked_rdev)) {
|
|
|
|
|
/* Have to wait for this device to get unblocked, then retry */
|
|
|
|
|
allow_barrier(conf);
|
|
|
|
|
raid10_log(conf->mddev, "%s wait rdev %d blocked",
|
|
|
|
|
__func__, blocked_rdev->raid_disk);
|
|
|
|
|
md_wait_for_blocked_rdev(blocked_rdev, mddev);
|
|
|
|
|
wait_barrier(conf);
|
|
|
|
|
goto retry_wait;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
|
|
|
|
struct r10bio *r10_bio)
|
|
|
|
|
{
|
|
|
|
|
struct r10conf *conf = mddev->private;
|
|
|
|
|
int i;
|
|
|
|
|
struct md_rdev *blocked_rdev;
|
|
|
|
|
sector_t sectors;
|
|
|
|
|
int max_sectors;
|
|
|
|
|
|
|
|
|
@ -1401,9 +1339,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
|
|
|
|
|
|
|
|
|
r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
|
|
|
|
|
raid10_find_phys(conf, r10_bio);
|
|
|
|
|
|
|
|
|
|
wait_blocked_dev(mddev, r10_bio);
|
|
|
|
|
|
|
|
|
|
retry_write:
|
|
|
|
|
blocked_rdev = NULL;
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
max_sectors = r10_bio->sectors;
|
|
|
|
|
|
|
|
|
@ -1414,6 +1351,16 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
|
|
|
|
conf->mirrors[d].replacement);
|
|
|
|
|
if (rdev == rrdev)
|
|
|
|
|
rrdev = NULL;
|
|
|
|
|
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
|
|
|
|
|
atomic_inc(&rdev->nr_pending);
|
|
|
|
|
blocked_rdev = rdev;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
|
|
|
|
|
atomic_inc(&rrdev->nr_pending);
|
|
|
|
|
blocked_rdev = rrdev;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (rdev && (test_bit(Faulty, &rdev->flags)))
|
|
|
|
|
rdev = NULL;
|
|
|
|
|
if (rrdev && (test_bit(Faulty, &rrdev->flags)))
|
|
|
|
@ -1434,6 +1381,15 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
|
|
|
|
|
|
|
|
|
is_bad = is_badblock(rdev, dev_sector, max_sectors,
|
|
|
|
|
&first_bad, &bad_sectors);
|
|
|
|
|
if (is_bad < 0) {
|
|
|
|
|
/* Mustn't write here until the bad block
|
|
|
|
|
* is acknowledged
|
|
|
|
|
*/
|
|
|
|
|
atomic_inc(&rdev->nr_pending);
|
|
|
|
|
set_bit(BlockedBadBlocks, &rdev->flags);
|
|
|
|
|
blocked_rdev = rdev;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (is_bad && first_bad <= dev_sector) {
|
|
|
|
|
/* Cannot write here at all */
|
|
|
|
|
bad_sectors -= (dev_sector - first_bad);
|
|
|
|
@ -1469,6 +1425,35 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
|
|
|
|
}
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
|
|
if (unlikely(blocked_rdev)) {
|
|
|
|
|
/* Have to wait for this device to get unblocked, then retry */
|
|
|
|
|
int j;
|
|
|
|
|
int d;
|
|
|
|
|
|
|
|
|
|
for (j = 0; j < i; j++) {
|
|
|
|
|
if (r10_bio->devs[j].bio) {
|
|
|
|
|
d = r10_bio->devs[j].devnum;
|
|
|
|
|
rdev_dec_pending(conf->mirrors[d].rdev, mddev);
|
|
|
|
|
}
|
|
|
|
|
if (r10_bio->devs[j].repl_bio) {
|
|
|
|
|
struct md_rdev *rdev;
|
|
|
|
|
d = r10_bio->devs[j].devnum;
|
|
|
|
|
rdev = conf->mirrors[d].replacement;
|
|
|
|
|
if (!rdev) {
|
|
|
|
|
/* Race with remove_disk */
|
|
|
|
|
smp_mb();
|
|
|
|
|
rdev = conf->mirrors[d].rdev;
|
|
|
|
|
}
|
|
|
|
|
rdev_dec_pending(rdev, mddev);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
allow_barrier(conf);
|
|
|
|
|
raid10_log(conf->mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
|
|
|
|
|
md_wait_for_blocked_rdev(blocked_rdev, mddev);
|
|
|
|
|
wait_barrier(conf);
|
|
|
|
|
goto retry_write;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (max_sectors < r10_bio->sectors)
|
|
|
|
|
r10_bio->sectors = max_sectors;
|
|
|
|
|
|
|
|
|
@ -1508,7 +1493,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
|
|
|
|
|
r10_bio->mddev = mddev;
|
|
|
|
|
r10_bio->sector = bio->bi_iter.bi_sector;
|
|
|
|
|
r10_bio->state = 0;
|
|
|
|
|
memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->geo.raid_disks);
|
|
|
|
|
memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies);
|
|
|
|
|
|
|
|
|
|
if (bio_data_dir(bio) == READ)
|
|
|
|
|
raid10_read_request(mddev, bio, r10_bio);
|
|
|
|
@ -1516,296 +1501,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
|
|
|
|
|
raid10_write_request(mddev, bio, r10_bio);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct bio *raid10_split_bio(struct r10conf *conf,
|
|
|
|
|
struct bio *bio, sector_t sectors, bool want_first)
|
|
|
|
|
{
|
|
|
|
|
struct bio *split;
|
|
|
|
|
|
|
|
|
|
split = bio_split(bio, sectors, GFP_NOIO, &conf->bio_split);
|
|
|
|
|
bio_chain(split, bio);
|
|
|
|
|
allow_barrier(conf);
|
|
|
|
|
if (want_first) {
|
|
|
|
|
submit_bio_noacct(bio);
|
|
|
|
|
bio = split;
|
|
|
|
|
} else
|
|
|
|
|
submit_bio_noacct(split);
|
|
|
|
|
wait_barrier(conf);
|
|
|
|
|
|
|
|
|
|
return bio;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void raid_end_discard_bio(struct r10bio *r10bio)
|
|
|
|
|
{
|
|
|
|
|
struct r10conf *conf = r10bio->mddev->private;
|
|
|
|
|
struct r10bio *first_r10bio;
|
|
|
|
|
|
|
|
|
|
while (atomic_dec_and_test(&r10bio->remaining)) {
|
|
|
|
|
|
|
|
|
|
allow_barrier(conf);
|
|
|
|
|
|
|
|
|
|
if (!test_bit(R10BIO_Discard, &r10bio->state)) {
|
|
|
|
|
first_r10bio = (struct r10bio *)r10bio->master_bio;
|
|
|
|
|
free_r10bio(r10bio);
|
|
|
|
|
r10bio = first_r10bio;
|
|
|
|
|
} else {
|
|
|
|
|
md_write_end(r10bio->mddev);
|
|
|
|
|
bio_endio(r10bio->master_bio);
|
|
|
|
|
free_r10bio(r10bio);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void raid10_end_discard_request(struct bio *bio)
|
|
|
|
|
{
|
|
|
|
|
struct r10bio *r10_bio = bio->bi_private;
|
|
|
|
|
struct r10conf *conf = r10_bio->mddev->private;
|
|
|
|
|
struct md_rdev *rdev = NULL;
|
|
|
|
|
int dev;
|
|
|
|
|
int slot, repl;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We don't care the return value of discard bio
|
|
|
|
|
*/
|
|
|
|
|
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
|
|
|
|
|
set_bit(R10BIO_Uptodate, &r10_bio->state);
|
|
|
|
|
|
|
|
|
|
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
|
|
|
|
|
if (repl)
|
|
|
|
|
rdev = conf->mirrors[dev].replacement;
|
|
|
|
|
if (!rdev) {
|
|
|
|
|
/* raid10_remove_disk uses smp_mb to make sure rdev is set to
|
|
|
|
|
* replacement before setting replacement to NULL. It can read
|
|
|
|
|
* rdev first without barrier protect even replacment is NULL
|
|
|
|
|
*/
|
|
|
|
|
smp_rmb();
|
|
|
|
|
rdev = conf->mirrors[dev].rdev;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
raid_end_discard_bio(r10_bio);
|
|
|
|
|
rdev_dec_pending(rdev, conf->mddev);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* There are some limitations to handle discard bio
|
|
|
|
|
* 1st, the discard size is bigger than stripe_size*2.
|
|
|
|
|
* 2st, if the discard bio spans reshape progress, we use the old way to
|
|
|
|
|
* handle discard bio
|
|
|
|
|
*/
|
|
|
|
|
static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
|
|
|
|
|
{
|
|
|
|
|
struct r10conf *conf = mddev->private;
|
|
|
|
|
struct geom *geo = &conf->geo;
|
|
|
|
|
struct r10bio *r10_bio, *first_r10bio;
|
|
|
|
|
int far_copies = geo->far_copies;
|
|
|
|
|
bool first_copy = true;
|
|
|
|
|
|
|
|
|
|
int disk;
|
|
|
|
|
sector_t chunk;
|
|
|
|
|
unsigned int stripe_size;
|
|
|
|
|
sector_t split_size;
|
|
|
|
|
|
|
|
|
|
sector_t bio_start, bio_end;
|
|
|
|
|
sector_t first_stripe_index, last_stripe_index;
|
|
|
|
|
sector_t start_disk_offset;
|
|
|
|
|
unsigned int start_disk_index;
|
|
|
|
|
sector_t end_disk_offset;
|
|
|
|
|
unsigned int end_disk_index;
|
|
|
|
|
unsigned int remainder;
|
|
|
|
|
|
|
|
|
|
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
|
|
|
|
return -EAGAIN;
|
|
|
|
|
|
|
|
|
|
wait_barrier(conf);
|
|
|
|
|
|
|
|
|
|
/* Check reshape again to avoid reshape happens after checking
|
|
|
|
|
* MD_RECOVERY_RESHAPE and before wait_barrier
|
|
|
|
|
*/
|
|
|
|
|
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
|
|
stripe_size = geo->raid_disks << geo->chunk_shift;
|
|
|
|
|
bio_start = bio->bi_iter.bi_sector;
|
|
|
|
|
bio_end = bio_end_sector(bio);
|
|
|
|
|
|
|
|
|
|
/* Maybe one discard bio is smaller than strip size or across one stripe
|
|
|
|
|
* and discard region is larger than one stripe size. For far offset layout,
|
|
|
|
|
* if the discard region is not aligned with stripe size, there is hole
|
|
|
|
|
* when we submit discard bio to member disk. For simplicity, we only
|
|
|
|
|
* handle discard bio which discard region is bigger than stripe_size*2
|
|
|
|
|
*/
|
|
|
|
|
if (bio_sectors(bio) < stripe_size*2)
|
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
|
|
/* For far and far offset layout, if bio is not aligned with stripe size,
|
|
|
|
|
* it splits the part that is not aligned with strip size.
|
|
|
|
|
*/
|
|
|
|
|
div_u64_rem(bio_start, stripe_size, &remainder);
|
|
|
|
|
if ((far_copies > 1) && remainder) {
|
|
|
|
|
split_size = stripe_size - remainder;
|
|
|
|
|
bio = raid10_split_bio(conf, bio, split_size, false);
|
|
|
|
|
}
|
|
|
|
|
div_u64_rem(bio_end, stripe_size, &remainder);
|
|
|
|
|
if ((far_copies > 1) && remainder) {
|
|
|
|
|
split_size = bio_sectors(bio) - remainder;
|
|
|
|
|
bio = raid10_split_bio(conf, bio, split_size, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bio_start = bio->bi_iter.bi_sector;
|
|
|
|
|
bio_end = bio_end_sector(bio);
|
|
|
|
|
|
|
|
|
|
/* raid10 uses chunk as the unit to store data. It's similar like raid0.
|
|
|
|
|
* One stripe contains the chunks from all member disk (one chunk from
|
|
|
|
|
* one disk at the same HBA address). For layout detail, see 'man md 4'
|
|
|
|
|
*/
|
|
|
|
|
chunk = bio_start >> geo->chunk_shift;
|
|
|
|
|
chunk *= geo->near_copies;
|
|
|
|
|
first_stripe_index = chunk;
|
|
|
|
|
start_disk_index = sector_div(first_stripe_index, geo->raid_disks);
|
|
|
|
|
if (geo->far_offset)
|
|
|
|
|
first_stripe_index *= geo->far_copies;
|
|
|
|
|
start_disk_offset = (bio_start & geo->chunk_mask) +
|
|
|
|
|
(first_stripe_index << geo->chunk_shift);
|
|
|
|
|
|
|
|
|
|
chunk = bio_end >> geo->chunk_shift;
|
|
|
|
|
chunk *= geo->near_copies;
|
|
|
|
|
last_stripe_index = chunk;
|
|
|
|
|
end_disk_index = sector_div(last_stripe_index, geo->raid_disks);
|
|
|
|
|
if (geo->far_offset)
|
|
|
|
|
last_stripe_index *= geo->far_copies;
|
|
|
|
|
end_disk_offset = (bio_end & geo->chunk_mask) +
|
|
|
|
|
(last_stripe_index << geo->chunk_shift);
|
|
|
|
|
|
|
|
|
|
retry_discard:
|
|
|
|
|
r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
|
|
|
|
|
r10_bio->mddev = mddev;
|
|
|
|
|
r10_bio->state = 0;
|
|
|
|
|
r10_bio->sectors = 0;
|
|
|
|
|
memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
|
|
|
|
|
wait_blocked_dev(mddev, r10_bio);
|
|
|
|
|
|
|
|
|
|
/* For far layout it needs more than one r10bio to cover all regions.
|
|
|
|
|
* Inspired by raid10_sync_request, we can use the first r10bio->master_bio
|
|
|
|
|
* to record the discard bio. Other r10bio->master_bio record the first
|
|
|
|
|
* r10bio. The first r10bio only release after all other r10bios finish.
|
|
|
|
|
* The discard bio returns only first r10bio finishes
|
|
|
|
|
*/
|
|
|
|
|
if (first_copy) {
|
|
|
|
|
r10_bio->master_bio = bio;
|
|
|
|
|
set_bit(R10BIO_Discard, &r10_bio->state);
|
|
|
|
|
first_copy = false;
|
|
|
|
|
first_r10bio = r10_bio;
|
|
|
|
|
} else
|
|
|
|
|
r10_bio->master_bio = (struct bio *)first_r10bio;
|
|
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
for (disk = 0; disk < geo->raid_disks; disk++) {
|
|
|
|
|
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
|
|
|
|
struct md_rdev *rrdev = rcu_dereference(
|
|
|
|
|
conf->mirrors[disk].replacement);
|
|
|
|
|
|
|
|
|
|
r10_bio->devs[disk].bio = NULL;
|
|
|
|
|
r10_bio->devs[disk].repl_bio = NULL;
|
|
|
|
|
|
|
|
|
|
if (rdev && (test_bit(Faulty, &rdev->flags)))
|
|
|
|
|
rdev = NULL;
|
|
|
|
|
if (rrdev && (test_bit(Faulty, &rrdev->flags)))
|
|
|
|
|
rrdev = NULL;
|
|
|
|
|
if (!rdev && !rrdev)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (rdev) {
|
|
|
|
|
r10_bio->devs[disk].bio = bio;
|
|
|
|
|
atomic_inc(&rdev->nr_pending);
|
|
|
|
|
}
|
|
|
|
|
if (rrdev) {
|
|
|
|
|
r10_bio->devs[disk].repl_bio = bio;
|
|
|
|
|
atomic_inc(&rrdev->nr_pending);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
|
|
atomic_set(&r10_bio->remaining, 1);
|
|
|
|
|
for (disk = 0; disk < geo->raid_disks; disk++) {
|
|
|
|
|
sector_t dev_start, dev_end;
|
|
|
|
|
struct bio *mbio, *rbio = NULL;
|
|
|
|
|
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
|
|
|
|
struct md_rdev *rrdev = rcu_dereference(
|
|
|
|
|
conf->mirrors[disk].replacement);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now start to calculate the start and end address for each disk.
|
|
|
|
|
* The space between dev_start and dev_end is the discard region.
|
|
|
|
|
*
|
|
|
|
|
* For dev_start, it needs to consider three conditions:
|
|
|
|
|
* 1st, the disk is before start_disk, you can imagine the disk in
|
|
|
|
|
* the next stripe. So the dev_start is the start address of next
|
|
|
|
|
* stripe.
|
|
|
|
|
* 2st, the disk is after start_disk, it means the disk is at the
|
|
|
|
|
* same stripe of first disk
|
|
|
|
|
* 3st, the first disk itself, we can use start_disk_offset directly
|
|
|
|
|
*/
|
|
|
|
|
if (disk < start_disk_index)
|
|
|
|
|
dev_start = (first_stripe_index + 1) * mddev->chunk_sectors;
|
|
|
|
|
else if (disk > start_disk_index)
|
|
|
|
|
dev_start = first_stripe_index * mddev->chunk_sectors;
|
|
|
|
|
else
|
|
|
|
|
dev_start = start_disk_offset;
|
|
|
|
|
|
|
|
|
|
if (disk < end_disk_index)
|
|
|
|
|
dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
|
|
|
|
|
else if (disk > end_disk_index)
|
|
|
|
|
dev_end = last_stripe_index * mddev->chunk_sectors;
|
|
|
|
|
else
|
|
|
|
|
dev_end = end_disk_offset;
|
|
|
|
|
|
|
|
|
|
/* It only handles discard bio which size is >= stripe size, so
|
|
|
|
|
* dev_end > dev_start all the time
|
|
|
|
|
*/
|
|
|
|
|
if (r10_bio->devs[disk].bio) {
|
|
|
|
|
mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
|
|
|
|
|
mbio->bi_end_io = raid10_end_discard_request;
|
|
|
|
|
mbio->bi_private = r10_bio;
|
|
|
|
|
r10_bio->devs[disk].bio = mbio;
|
|
|
|
|
r10_bio->devs[disk].devnum = disk;
|
|
|
|
|
atomic_inc(&r10_bio->remaining);
|
|
|
|
|
md_submit_discard_bio(mddev, rdev, mbio,
|
|
|
|
|
dev_start + choose_data_offset(r10_bio, rdev),
|
|
|
|
|
dev_end - dev_start);
|
|
|
|
|
bio_endio(mbio);
|
|
|
|
|
}
|
|
|
|
|
if (r10_bio->devs[disk].repl_bio) {
|
|
|
|
|
rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
|
|
|
|
|
rbio->bi_end_io = raid10_end_discard_request;
|
|
|
|
|
rbio->bi_private = r10_bio;
|
|
|
|
|
r10_bio->devs[disk].repl_bio = rbio;
|
|
|
|
|
r10_bio->devs[disk].devnum = disk;
|
|
|
|
|
atomic_inc(&r10_bio->remaining);
|
|
|
|
|
md_submit_discard_bio(mddev, rrdev, rbio,
|
|
|
|
|
dev_start + choose_data_offset(r10_bio, rrdev),
|
|
|
|
|
dev_end - dev_start);
|
|
|
|
|
bio_endio(rbio);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!geo->far_offset && --far_copies) {
|
|
|
|
|
first_stripe_index += geo->stride >> geo->chunk_shift;
|
|
|
|
|
start_disk_offset += geo->stride;
|
|
|
|
|
last_stripe_index += geo->stride >> geo->chunk_shift;
|
|
|
|
|
end_disk_offset += geo->stride;
|
|
|
|
|
atomic_inc(&first_r10bio->remaining);
|
|
|
|
|
raid_end_discard_bio(r10_bio);
|
|
|
|
|
wait_barrier(conf);
|
|
|
|
|
goto retry_discard;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
raid_end_discard_bio(r10_bio);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
out:
|
|
|
|
|
allow_barrier(conf);
|
|
|
|
|
return -EAGAIN;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
|
|
|
|
|
{
|
|
|
|
|
struct r10conf *conf = mddev->private;
|
|
|
|
@ -1820,10 +1515,6 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
|
|
|
|
|
if (!md_write_start(mddev, bio))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
|
|
|
|
|
if (!raid10_handle_discard(mddev, bio))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If this request crosses a chunk boundary, we need to split
|
|
|
|
|
* it.
|
|
|
|
@ -4063,7 +3754,7 @@ static int raid10_run(struct mddev *mddev)
|
|
|
|
|
|
|
|
|
|
if (mddev->queue) {
|
|
|
|
|
blk_queue_max_discard_sectors(mddev->queue,
|
|
|
|
|
UINT_MAX);
|
|
|
|
|
mddev->chunk_sectors);
|
|
|
|
|
blk_queue_max_write_same_sectors(mddev->queue, 0);
|
|
|
|
|
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
|
|
|
|
|
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
|
|
|
|
|