drbd: flush drbd work queue before invalidate/invalidate remote
If you do back to back wait-sync/invalidate on a Primary in a tight loop, during application IO load, you could trigger a race: kernel: block drbd6: FIXME going to queue 'set_n_write from StartingSync' but 'write from resync_finished' still pending? Fix this by changing the order of the drbd_queue_work() and the wake_up() in dec_ap_pending(), and adding the additional drbd_flush_workqueue() before requesting the full sync. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
Родитель
c12e9c8964
Коммит
7ee1fb93f3
|
@ -2421,15 +2421,17 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
|
||||||
int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt);
|
int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt);
|
||||||
|
|
||||||
D_ASSERT(ap_bio >= 0);
|
D_ASSERT(ap_bio >= 0);
|
||||||
|
|
||||||
|
if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
|
||||||
|
if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
|
||||||
|
drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
|
||||||
|
}
|
||||||
|
|
||||||
/* this currently does wake_up for every dec_ap_bio!
|
/* this currently does wake_up for every dec_ap_bio!
|
||||||
* maybe rather introduce some type of hysteresis?
|
* maybe rather introduce some type of hysteresis?
|
||||||
* e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
|
* e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
|
||||||
if (ap_bio < mxb)
|
if (ap_bio < mxb)
|
||||||
wake_up(&mdev->misc_wait);
|
wake_up(&mdev->misc_wait);
|
||||||
if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
|
|
||||||
if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
|
|
||||||
drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
|
static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
|
||||||
|
|
|
@ -1977,9 +1977,11 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
|
||||||
int retcode;
|
int retcode;
|
||||||
|
|
||||||
/* If there is still bitmap IO pending, probably because of a previous
|
/* If there is still bitmap IO pending, probably because of a previous
|
||||||
* resync just being finished, wait for it before requesting a new resync. */
|
* resync just being finished, wait for it before requesting a new resync.
|
||||||
|
* Also wait for it's after_state_ch(). */
|
||||||
drbd_suspend_io(mdev);
|
drbd_suspend_io(mdev);
|
||||||
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
|
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
|
||||||
|
drbd_flush_workqueue(mdev);
|
||||||
|
|
||||||
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
|
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
|
||||||
|
|
||||||
|
@ -2018,9 +2020,11 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
|
||||||
int retcode;
|
int retcode;
|
||||||
|
|
||||||
/* If there is still bitmap IO pending, probably because of a previous
|
/* If there is still bitmap IO pending, probably because of a previous
|
||||||
* resync just being finished, wait for it before requesting a new resync. */
|
* resync just being finished, wait for it before requesting a new resync.
|
||||||
|
* Also wait for it's after_state_ch(). */
|
||||||
drbd_suspend_io(mdev);
|
drbd_suspend_io(mdev);
|
||||||
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
|
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
|
||||||
|
drbd_flush_workqueue(mdev);
|
||||||
|
|
||||||
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
|
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче