drbd: fix race between drbdadm invalidate/verify and finishing resync
When a resync or online verify is finished or aborted, drbd does a bulk write-out of changed bitmap pages. If *in that very moment* a new verify or resync is triggered, this can race: ASSERT( !test_bit(BITMAP_IO, &mdev->flags) ) in drbd_main.c FIXME going to queue 'set_n_write from StartingSync' but 'write from resync_finished' still pending? and similar. This can be observed with e.g. tight invalidate loops in test scripts, and probably has no real-life implication. Still, that race can be solved by first quiescen the device, before starting a new resync or verify. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
Родитель
ba280c092e
Коммит
a574daf5d7
|
@ -1963,6 +1963,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
|
||||||
|
|
||||||
/* If there is still bitmap IO pending, probably because of a previous
|
/* If there is still bitmap IO pending, probably because of a previous
|
||||||
* resync just being finished, wait for it before requesting a new resync. */
|
* resync just being finished, wait for it before requesting a new resync. */
|
||||||
|
drbd_suspend_io(mdev);
|
||||||
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
|
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
|
||||||
|
|
||||||
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
|
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
|
||||||
|
@ -1981,6 +1982,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
|
||||||
|
|
||||||
retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
|
retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
|
||||||
}
|
}
|
||||||
|
drbd_resume_io(mdev);
|
||||||
|
|
||||||
reply->ret_code = retcode;
|
reply->ret_code = retcode;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2002,6 +2004,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
|
||||||
|
|
||||||
/* If there is still bitmap IO pending, probably because of a previous
|
/* If there is still bitmap IO pending, probably because of a previous
|
||||||
* resync just being finished, wait for it before requesting a new resync. */
|
* resync just being finished, wait for it before requesting a new resync. */
|
||||||
|
drbd_suspend_io(mdev);
|
||||||
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
|
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
|
||||||
|
|
||||||
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
|
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
|
||||||
|
@ -2020,6 +2023,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
|
||||||
} else
|
} else
|
||||||
retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
|
retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
|
||||||
}
|
}
|
||||||
|
drbd_resume_io(mdev);
|
||||||
|
|
||||||
reply->ret_code = retcode;
|
reply->ret_code = retcode;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2192,11 +2196,13 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
|
||||||
|
|
||||||
/* If there is still bitmap IO pending, e.g. previous resync or verify
|
/* If there is still bitmap IO pending, e.g. previous resync or verify
|
||||||
* just being finished, wait for it before requesting a new resync. */
|
* just being finished, wait for it before requesting a new resync. */
|
||||||
|
drbd_suspend_io(mdev);
|
||||||
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
|
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
|
||||||
|
|
||||||
/* w_make_ov_request expects position to be aligned */
|
/* w_make_ov_request expects position to be aligned */
|
||||||
mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
|
mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
|
||||||
reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
|
reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
|
||||||
|
drbd_resume_io(mdev);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче