From 318aaf34f1179b39fa9c30fa0f3288b645beee39 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 8 Mar 2018 10:34:53 +0800 Subject: [PATCH 1/2] scsi: libsas: defer ata device eh commands to libata When ata device doing EH, some commands still attached with tasks are not passed to libata when abort failed or recover failed, so libata did not handle these commands. After these commands done, sas task is freed, but ata qc is not freed. This will cause ata qc leak and trigger a warning like below: WARNING: CPU: 0 PID: 28512 at drivers/ata/libata-eh.c:4037 ata_eh_finish+0xb4/0xcc CPU: 0 PID: 28512 Comm: kworker/u32:2 Tainted: G W OE 4.14.0#1 ...... Call trace: [] ata_eh_finish+0xb4/0xcc [] ata_do_eh+0xc4/0xd8 [] ata_std_error_handler+0x44/0x8c [] ata_scsi_port_error_handler+0x480/0x694 [] async_sas_ata_eh+0x4c/0x80 [] async_run_entry_fn+0x4c/0x170 [] process_one_work+0x144/0x390 [] worker_thread+0x144/0x418 [] kthread+0x10c/0x138 [] ret_from_fork+0x10/0x18 If ata qc leaked too many, ata tag allocation will fail and io blocked for ever. As suggested by Dan Williams, defer ata device commands to libata and merge sas_eh_finish_cmd() with sas_eh_defer_cmd(). libata will handle ata qcs correctly after this. Signed-off-by: Jason Yan CC: Xiaofei Tan CC: John Garry CC: Dan Williams Reviewed-by: Dan Williams Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_scsi_host.c | 33 ++++++++++++----------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 626727207889..a372af68d9a9 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -223,6 +223,7 @@ out_done: static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) { struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host); + struct domain_device *dev = cmd_to_domain_dev(cmd); struct sas_task *task = TO_SAS_TASK(cmd); /* At this point, we only get called following an actual abort @@ -231,6 +232,14 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) */ sas_end_task(cmd, task); + if (dev_is_sata(dev)) { + /* defer commands to libata so that libata EH can + * handle ata qcs correctly + */ + list_move_tail(&cmd->eh_entry, &sas_ha->eh_ata_q); + return; + } + /* now finish the command and move it on to the error * handler done list, this also takes it off the * error handler pending list. @@ -238,22 +247,6 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q); } -static void sas_eh_defer_cmd(struct scsi_cmnd *cmd) -{ - struct domain_device *dev = cmd_to_domain_dev(cmd); - struct sas_ha_struct *ha = dev->port->ha; - struct sas_task *task = TO_SAS_TASK(cmd); - - if (!dev_is_sata(dev)) { - sas_eh_finish_cmd(cmd); - return; - } - - /* report the timeout to libata */ - sas_end_task(cmd, task); - list_move_tail(&cmd->eh_entry, &ha->eh_ata_q); -} - static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd) { struct scsi_cmnd *cmd, *n; @@ -261,7 +254,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd list_for_each_entry_safe(cmd, n, error_q, eh_entry) { if (cmd->device->sdev_target == my_cmd->device->sdev_target && cmd->device->lun == my_cmd->device->lun) - sas_eh_defer_cmd(cmd); + sas_eh_finish_cmd(cmd); } } @@ -618,12 +611,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * case TASK_IS_DONE: SAS_DPRINTK("%s: task 0x%p is done\n", __func__, task); - sas_eh_defer_cmd(cmd); + sas_eh_finish_cmd(cmd); continue; case TASK_IS_ABORTED: SAS_DPRINTK("%s: task 0x%p is aborted\n", __func__, task); - sas_eh_defer_cmd(cmd); + sas_eh_finish_cmd(cmd); continue; case TASK_IS_AT_LU: SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task); @@ -634,7 +627,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * "recovered\n", SAS_ADDR(task->dev), cmd->device->lun); - sas_eh_defer_cmd(cmd); + sas_eh_finish_cmd(cmd); sas_scsi_clear_queue_lu(work_q, cmd); goto Again; } From 14bc1dff74277408f08661d03e785710a46e0699 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Wed, 7 Mar 2018 10:49:26 -0800 Subject: [PATCH 2/2] scsi: qla2xxx: Remove FC_NO_LOOP_ID for FCP and FC-NVMe Discovery Commit 7d64c39e64310 fixed regression of FCP discovery when Nport Handle is in-use and relogin is triggered. However, during FCP and FC-NVMe discovery this resulted into only discovering NVMe LUNs. This patch fixes issue where FCP and FC-NVMe protocol is used on same port where assigning FC_NO_LOOP_ID will result into discovery failure for FCP LUNs. Fixes: a084fd68e1d26 ("scsi: qla2xxx: Fix re-login for Nport Handle in use") Signed-off-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 00329dda6179..8d7fab3cd01d 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1719,7 +1719,6 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea) set_bit(ea->fcport->loop_id, vha->hw->loop_id_map); spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); - ea->fcport->loop_id = FC_NO_LOOP_ID; ea->fcport->chip_reset = vha->hw->base_qpair->chip_reset; ea->fcport->logout_on_delete = 1; ea->fcport->send_els_logo = 0;