ocfs2/dlm: disable BUG_ON when DLM_LOCK_RES_DROPPING_REF is cleared before dlm_deref_lockres_done_handler
We found a BUG situation in which DLM_LOCK_RES_DROPPING_REF is cleared
unexpected that described below. To solve the bug, we disable the
BUG_ON and purge lockres in dlm_do_local_recovery_cleanup.
Node 1 Node 2(master)
dlm_purge_lockres
dlm_deref_lockres_handler
DLM_LOCK_RES_SETREF_INPROG is set
response DLM_DEREF_RESPONSE_INPROG
receive DLM_DEREF_RESPONSE_INPROG
stop puring in dlm_purge_lockres
and wait for DLM_DEREF_RESPONSE_DONE
dispatch dlm_deref_lockres_worker
response DLM_DEREF_RESPONSE_DONE
receive DLM_DEREF_RESPONSE_DONE and
prepare to purge lockres
Node 2 goes down
find Node2 down and do local
clean up for Node2:
dlm_do_local_recovery_cleanup
-> clear DLM_LOCK_RES_DROPPING_REF
when purging lockres, BUG_ON happens
because DLM_LOCK_RES_DROPPING_REF is clear:
dlm_deref_lockres_done_handler
->BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF));
[akpm@linux-foundation.org: fix duplicated write to `ret']
Fixes: 60d663cb52
("ocfs2/dlm: add DEREF_DONE message")
Link: http://lkml.kernel.org/r/57845055.9080702@huawei.com
Signed-off-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
Reviewed-by: Jiufei Xue <xuejiufei@huawei.com>
Reviewed-by: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
2070ad1aeb
Коммит
86b652b93a
|
@ -2416,7 +2416,17 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock(&res->spinlock);
|
spin_lock(&res->spinlock);
|
||||||
BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF));
|
if (!(res->state & DLM_LOCK_RES_DROPPING_REF)) {
|
||||||
|
spin_unlock(&res->spinlock);
|
||||||
|
spin_unlock(&dlm->spinlock);
|
||||||
|
mlog(ML_NOTICE, "%s:%.*s: node %u sends deref done "
|
||||||
|
"but it is already derefed!\n", dlm->name,
|
||||||
|
res->lockname.len, res->lockname.name, node);
|
||||||
|
dlm_lockres_put(res);
|
||||||
|
ret = 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
if (!list_empty(&res->purge)) {
|
if (!list_empty(&res->purge)) {
|
||||||
mlog(0, "%s: Removing res %.*s from purgelist\n",
|
mlog(0, "%s: Removing res %.*s from purgelist\n",
|
||||||
dlm->name, res->lockname.len, res->lockname.name);
|
dlm->name, res->lockname.len, res->lockname.name);
|
||||||
|
@ -2456,7 +2466,6 @@ int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||||
spin_unlock(&dlm->spinlock);
|
spin_unlock(&dlm->spinlock);
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
done:
|
done:
|
||||||
dlm_put(dlm);
|
dlm_put(dlm);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче