md: raid1: improve write behind
This patch improve handling of write behind in the following ways: - introduce behind master bio to hold all write behind pages - fast clone bios from behind master bio - avoid to change bvec table directly - use bio_copy_data() and make code more clean Suggested-by: Shaohua Li <shli@fb.com> Signed-off-by: Ming Lei <tom.leiming@gmail.com> Signed-off-by: Shaohua Li <shli@fb.com>
This commit is contained in:
Родитель
d8c84c4f8b
Коммит
841c1316c7
|
@ -388,12 +388,9 @@ static void close_write(struct r1bio *r1_bio)
|
|||
{
|
||||
/* it really is the end of this request */
|
||||
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
|
||||
/* free extra copy of the data pages */
|
||||
int i = r1_bio->behind_page_count;
|
||||
while (i--)
|
||||
safe_put_page(r1_bio->behind_bvecs[i].bv_page);
|
||||
kfree(r1_bio->behind_bvecs);
|
||||
r1_bio->behind_bvecs = NULL;
|
||||
bio_free_pages(r1_bio->behind_master_bio);
|
||||
bio_put(r1_bio->behind_master_bio);
|
||||
r1_bio->behind_master_bio = NULL;
|
||||
}
|
||||
/* clear the bitmap if all writes complete successfully */
|
||||
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
|
||||
|
@ -495,6 +492,10 @@ static void raid1_end_write_request(struct bio *bio)
|
|||
}
|
||||
|
||||
if (behind) {
|
||||
/* we release behind master bio when all write are done */
|
||||
if (r1_bio->behind_master_bio == bio)
|
||||
to_put = NULL;
|
||||
|
||||
if (test_bit(WriteMostly, &rdev->flags))
|
||||
atomic_dec(&r1_bio->behind_remaining);
|
||||
|
||||
|
@ -1089,39 +1090,46 @@ static void unfreeze_array(struct r1conf *conf)
|
|||
wake_up(&conf->wait_barrier);
|
||||
}
|
||||
|
||||
/* duplicate the data pages for behind I/O
|
||||
*/
|
||||
static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
|
||||
static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio,
|
||||
struct bio *bio,
|
||||
int offset, int size)
|
||||
{
|
||||
int i;
|
||||
struct bio_vec *bvec;
|
||||
struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
|
||||
GFP_NOIO);
|
||||
if (unlikely(!bvecs))
|
||||
return;
|
||||
unsigned vcnt = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
int i = 0;
|
||||
struct bio *behind_bio = NULL;
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, i) {
|
||||
bvecs[i] = *bvec;
|
||||
bvecs[i].bv_page = alloc_page(GFP_NOIO);
|
||||
if (unlikely(!bvecs[i].bv_page))
|
||||
goto do_sync_io;
|
||||
memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset,
|
||||
kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
|
||||
kunmap(bvecs[i].bv_page);
|
||||
kunmap(bvec->bv_page);
|
||||
behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev);
|
||||
if (!behind_bio)
|
||||
goto fail;
|
||||
|
||||
while (i < vcnt && size) {
|
||||
struct page *page;
|
||||
int len = min_t(int, PAGE_SIZE, size);
|
||||
|
||||
page = alloc_page(GFP_NOIO);
|
||||
if (unlikely(!page))
|
||||
goto free_pages;
|
||||
|
||||
bio_add_page(behind_bio, page, len, 0);
|
||||
|
||||
size -= len;
|
||||
i++;
|
||||
}
|
||||
r1_bio->behind_bvecs = bvecs;
|
||||
r1_bio->behind_page_count = bio->bi_vcnt;
|
||||
set_bit(R1BIO_BehindIO, &r1_bio->state);
|
||||
return;
|
||||
|
||||
do_sync_io:
|
||||
for (i = 0; i < bio->bi_vcnt; i++)
|
||||
if (bvecs[i].bv_page)
|
||||
put_page(bvecs[i].bv_page);
|
||||
kfree(bvecs);
|
||||
bio_copy_data_partial(behind_bio, bio, offset,
|
||||
behind_bio->bi_iter.bi_size);
|
||||
|
||||
r1_bio->behind_master_bio = behind_bio;;
|
||||
set_bit(R1BIO_BehindIO, &r1_bio->state);
|
||||
|
||||
return behind_bio;
|
||||
|
||||
free_pages:
|
||||
pr_debug("%dB behind alloc failed, doing sync I/O\n",
|
||||
bio->bi_iter.bi_size);
|
||||
bio_free_pages(behind_bio);
|
||||
fail:
|
||||
return behind_bio;
|
||||
}
|
||||
|
||||
struct raid1_plug_cb {
|
||||
|
@ -1457,11 +1465,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
|
|||
(atomic_read(&bitmap->behind_writes)
|
||||
< mddev->bitmap_info.max_write_behind) &&
|
||||
!waitqueue_active(&bitmap->behind_wait)) {
|
||||
mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
|
||||
mddev->bio_set,
|
||||
offset << 9,
|
||||
max_sectors << 9);
|
||||
alloc_behind_pages(mbio, r1_bio);
|
||||
mbio = alloc_behind_master_bio(r1_bio, bio,
|
||||
offset << 9,
|
||||
max_sectors << 9);
|
||||
}
|
||||
|
||||
bitmap_startwrite(bitmap, r1_bio->sector,
|
||||
|
@ -1472,26 +1478,17 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
|
|||
}
|
||||
|
||||
if (!mbio) {
|
||||
if (r1_bio->behind_bvecs)
|
||||
mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
|
||||
mddev->bio_set,
|
||||
offset << 9,
|
||||
max_sectors << 9);
|
||||
if (r1_bio->behind_master_bio)
|
||||
mbio = bio_clone_fast(r1_bio->behind_master_bio,
|
||||
GFP_NOIO,
|
||||
mddev->bio_set);
|
||||
else {
|
||||
mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
|
||||
bio_trim(mbio, offset, max_sectors);
|
||||
}
|
||||
}
|
||||
|
||||
if (r1_bio->behind_bvecs) {
|
||||
struct bio_vec *bvec;
|
||||
int j;
|
||||
|
||||
/*
|
||||
* We trimmed the bio, so _all is legit
|
||||
*/
|
||||
bio_for_each_segment_all(bvec, mbio, j)
|
||||
bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
|
||||
if (r1_bio->behind_master_bio) {
|
||||
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
|
||||
atomic_inc(&r1_bio->behind_remaining);
|
||||
}
|
||||
|
@ -2386,18 +2383,11 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
|
|||
/* Write at 'sector' for 'sectors'*/
|
||||
|
||||
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
|
||||
unsigned vcnt = r1_bio->behind_page_count;
|
||||
struct bio_vec *vec = r1_bio->behind_bvecs;
|
||||
|
||||
while (!vec->bv_page) {
|
||||
vec++;
|
||||
vcnt--;
|
||||
}
|
||||
|
||||
wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
|
||||
memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
|
||||
|
||||
wbio->bi_vcnt = vcnt;
|
||||
wbio = bio_clone_fast(r1_bio->behind_master_bio,
|
||||
GFP_NOIO,
|
||||
mddev->bio_set);
|
||||
/* We really need a _all clone */
|
||||
wbio->bi_iter = (struct bvec_iter){ 0 };
|
||||
} else {
|
||||
wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
|
||||
mddev->bio_set);
|
||||
|
|
|
@ -153,9 +153,13 @@ struct r1bio {
|
|||
int read_disk;
|
||||
|
||||
struct list_head retry_list;
|
||||
/* Next two are only valid when R1BIO_BehindIO is set */
|
||||
struct bio_vec *behind_bvecs;
|
||||
int behind_page_count;
|
||||
|
||||
/*
|
||||
* When R1BIO_BehindIO is set, we store pages for write behind
|
||||
* in behind_master_bio.
|
||||
*/
|
||||
struct bio *behind_master_bio;
|
||||
|
||||
/*
|
||||
* if the IO is in WRITE direction, then multiple bios are used.
|
||||
* We choose the number when they are allocated.
|
||||
|
|
Загрузка…
Ссылка в новой задаче