scsi: zfcp: fix infinite iteration on ERP ready list
zfcp_erp_adapter_reopen() schedules blocking of all of the adapter's rports via zfcp_scsi_schedule_rports_block() and enqueues a reopen adapter ERP action via zfcp_erp_action_enqueue(). Both are separately processed asynchronously and concurrently. Blocking of rports is done in a kworker by zfcp_scsi_rport_work(). It calls zfcp_scsi_rport_block(), which then traces a DBF REC "scpdely" via zfcp_dbf_rec_trig(). zfcp_dbf_rec_trig() acquires the DBF REC spin lock and then iterates with list_for_each() over the adapter's ERP ready list without holding the ERP lock. This opens a race window in which the current list entry can be moved to another list, causing list_for_each() to iterate forever on the wrong list, as the erp_ready_head is never encountered as terminal condition. Meanwhile the ERP action can be processed in the ERP thread by zfcp_erp_thread(). It calls zfcp_erp_strategy(), which acquires the ERP lock and then calls zfcp_erp_action_to_running() to move the ERP action from the ready to the running list. zfcp_erp_action_to_running() can move the ERP action using list_move() just during the aforementioned race window. It then traces a REC RUN "erator1" via zfcp_dbf_rec_run(). zfcp_dbf_rec_run() tries to acquire the DBF REC spin lock. If this is held by the infinitely looping kworker, it effectively spins forever. Example Sequence Diagram: Process ERP Thread rport_work ------------------- ------------------- ------------------- zfcp_erp_adapter_reopen() zfcp_erp_adapter_block() zfcp_scsi_schedule_rports_block() lock ERP zfcp_scsi_rport_work() zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER) list_add_tail() on ready !(rport_task==RPORT_ADD) wake_up() ERP thread zfcp_scsi_rport_block() zfcp_dbf_rec_trig() zfcp_erp_strategy() zfcp_dbf_rec_trig() unlock ERP lock DBF REC zfcp_erp_wait() lock ERP | zfcp_erp_action_to_running() | list_for_each() ready | list_move() current entry | ready to running | zfcp_dbf_rec_run() endless loop over running | zfcp_dbf_rec_run_lvl() | lock DBF REC spins forever Any adapter recovery can trigger this, such as setting the device offline or reboot. V4.9 commit4eeaa4f3f1
("zfcp: close window with unblocked rport during rport gone") introduced additional tracing of (un)blocking of rports. It missed that the adapter->erp_lock must be held when calling zfcp_dbf_rec_trig(). This fix uses the approach formerly introduced by commitaa0fec6239
("[SCSI] zfcp: Fix sparse warning by providing new entry in dbf") that got later removed by commitae0904f60f
("[SCSI] zfcp: Redesign of the debug tracing for recovery actions."). Introduce zfcp_dbf_rec_trig_lock(), a wrapper for zfcp_dbf_rec_trig() that acquires and releases the adapter->erp_lock for read. Reported-by: Sebastian Ott <sebott@linux.ibm.com> Signed-off-by: Jens Remus <jremus@linux.ibm.com> Fixes:4eeaa4f3f1
("zfcp: close window with unblocked rport during rport gone") Cc: <stable@vger.kernel.org> # 2.6.32+ Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com> Signed-off-by: Steffen Maier <maier@linux.ibm.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Родитель
7d3af7d96a
Коммит
fa89adba19
|
@ -4,7 +4,7 @@
|
|||
*
|
||||
* Debug traces for zfcp.
|
||||
*
|
||||
* Copyright IBM Corp. 2002, 2017
|
||||
* Copyright IBM Corp. 2002, 2018
|
||||
*/
|
||||
|
||||
#define KMSG_COMPONENT "zfcp"
|
||||
|
@ -308,6 +308,27 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter,
|
|||
spin_unlock_irqrestore(&dbf->rec_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* zfcp_dbf_rec_trig_lock - trace event related to triggered recovery with lock
|
||||
* @tag: identifier for event
|
||||
* @adapter: adapter on which the erp_action should run
|
||||
* @port: remote port involved in the erp_action
|
||||
* @sdev: scsi device involved in the erp_action
|
||||
* @want: wanted erp_action
|
||||
* @need: required erp_action
|
||||
*
|
||||
* The adapter->erp_lock must not be held.
|
||||
*/
|
||||
void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter,
|
||||
struct zfcp_port *port, struct scsi_device *sdev,
|
||||
u8 want, u8 need)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
read_lock_irqsave(&adapter->erp_lock, flags);
|
||||
zfcp_dbf_rec_trig(tag, adapter, port, sdev, want, need);
|
||||
read_unlock_irqrestore(&adapter->erp_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* zfcp_dbf_rec_run_lvl - trace event related to running recovery
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
*
|
||||
* External function declarations.
|
||||
*
|
||||
* Copyright IBM Corp. 2002, 2016
|
||||
* Copyright IBM Corp. 2002, 2018
|
||||
*/
|
||||
|
||||
#ifndef ZFCP_EXT_H
|
||||
|
@ -35,6 +35,9 @@ extern int zfcp_dbf_adapter_register(struct zfcp_adapter *);
|
|||
extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *);
|
||||
extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *,
|
||||
struct zfcp_port *, struct scsi_device *, u8, u8);
|
||||
extern void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter,
|
||||
struct zfcp_port *port,
|
||||
struct scsi_device *sdev, u8 want, u8 need);
|
||||
extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *);
|
||||
extern void zfcp_dbf_rec_run_lvl(int level, char *tag,
|
||||
struct zfcp_erp_action *erp);
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
*
|
||||
* Interface to Linux SCSI midlayer.
|
||||
*
|
||||
* Copyright IBM Corp. 2002, 2017
|
||||
* Copyright IBM Corp. 2002, 2018
|
||||
*/
|
||||
|
||||
#define KMSG_COMPONENT "zfcp"
|
||||
|
@ -618,9 +618,9 @@ static void zfcp_scsi_rport_register(struct zfcp_port *port)
|
|||
ids.port_id = port->d_id;
|
||||
ids.roles = FC_RPORT_ROLE_FCP_TARGET;
|
||||
|
||||
zfcp_dbf_rec_trig("scpaddy", port->adapter, port, NULL,
|
||||
ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD,
|
||||
ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD);
|
||||
zfcp_dbf_rec_trig_lock("scpaddy", port->adapter, port, NULL,
|
||||
ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD,
|
||||
ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD);
|
||||
rport = fc_remote_port_add(port->adapter->scsi_host, 0, &ids);
|
||||
if (!rport) {
|
||||
dev_err(&port->adapter->ccw_device->dev,
|
||||
|
@ -642,9 +642,9 @@ static void zfcp_scsi_rport_block(struct zfcp_port *port)
|
|||
struct fc_rport *rport = port->rport;
|
||||
|
||||
if (rport) {
|
||||
zfcp_dbf_rec_trig("scpdely", port->adapter, port, NULL,
|
||||
ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL,
|
||||
ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL);
|
||||
zfcp_dbf_rec_trig_lock("scpdely", port->adapter, port, NULL,
|
||||
ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL,
|
||||
ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL);
|
||||
fc_remote_port_delete(rport);
|
||||
port->rport = NULL;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче