scsi: core: alua: I/O errors for ALUA state transitions
[ Upstream commit 10157b1fc1a762293381e9145041253420dfc6ad ] When a host is configured with a few LUNs and I/O is running, injecting FC faults repeatedly leads to path recovery problems. The LUNs have 4 paths each and 3 of them come back active after say an FC fault which makes 2 of the paths go down, instead of all 4. This happens after several iterations of continuous FC faults. Reason here is that we're returning an I/O error whenever we're encountering sense code 06/04/0a (LOGICAL UNIT NOT ACCESSIBLE, ASYMMETRIC ACCESS STATE TRANSITION) instead of retrying. [mwilck: The original patch was developed by Rajashekhar M A and Hannes Reinecke. I moved the code to alua_check_sense() as suggested by Mike Christie [1]. Evan Milne had raised the question whether pg->state should be set to transitioning in the UA case [2]. I believe that doing this is correct. SCSI_ACCESS_STATE_TRANSITIONING by itself doesn't cause I/O errors. Our handler schedules an RTPG, which will only result in an I/O error condition if the transitioning timeout expires.] [1] https://lore.kernel.org/all/0bc96e82-fdda-4187-148d-5b34f81d4942@oracle.com/ [2] https://lore.kernel.org/all/CAGtn9r=kicnTDE2o7Gt5Y=yoidHYD7tG8XdMHEBJTBraVEoOCw@mail.gmail.com/ Co-developed-by: Rajashekhar M A <rajs@netapp.com> Co-developed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Martin Wilck <martin.wilck@suse.com> Link: https://lore.kernel.org/r/20240514140344.19538-1-mwilck@suse.com Reviewed-by: Damien Le Moal <dlemoal@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Mike Christie <michael.christie@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Родитель
f818708eee
Коммит
9ffd72041f
|
@ -406,28 +406,40 @@ static char print_alua_state(unsigned char state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
|
static void alua_handle_state_transition(struct scsi_device *sdev)
|
||||||
struct scsi_sense_hdr *sense_hdr)
|
|
||||||
{
|
{
|
||||||
struct alua_dh_data *h = sdev->handler_data;
|
struct alua_dh_data *h = sdev->handler_data;
|
||||||
struct alua_port_group *pg;
|
struct alua_port_group *pg;
|
||||||
|
|
||||||
switch (sense_hdr->sense_key) {
|
|
||||||
case NOT_READY:
|
|
||||||
if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
|
|
||||||
/*
|
|
||||||
* LUN Not Accessible - ALUA state transition
|
|
||||||
*/
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
pg = rcu_dereference(h->pg);
|
pg = rcu_dereference(h->pg);
|
||||||
if (pg)
|
if (pg)
|
||||||
pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
|
pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
alua_check(sdev, false);
|
alua_check(sdev, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
|
||||||
|
struct scsi_sense_hdr *sense_hdr)
|
||||||
|
{
|
||||||
|
switch (sense_hdr->sense_key) {
|
||||||
|
case NOT_READY:
|
||||||
|
if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
|
||||||
|
/*
|
||||||
|
* LUN Not Accessible - ALUA state transition
|
||||||
|
*/
|
||||||
|
alua_handle_state_transition(sdev);
|
||||||
return NEEDS_RETRY;
|
return NEEDS_RETRY;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case UNIT_ATTENTION:
|
case UNIT_ATTENTION:
|
||||||
|
if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
|
||||||
|
/*
|
||||||
|
* LUN Not Accessible - ALUA state transition
|
||||||
|
*/
|
||||||
|
alua_handle_state_transition(sdev);
|
||||||
|
return NEEDS_RETRY;
|
||||||
|
}
|
||||||
if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
|
if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
|
||||||
/*
|
/*
|
||||||
* Power On, Reset, or Bus Device Reset.
|
* Power On, Reset, or Bus Device Reset.
|
||||||
|
@ -494,7 +506,8 @@ static int alua_tur(struct scsi_device *sdev)
|
||||||
|
|
||||||
retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
|
retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
|
||||||
ALUA_FAILOVER_RETRIES, &sense_hdr);
|
ALUA_FAILOVER_RETRIES, &sense_hdr);
|
||||||
if (sense_hdr.sense_key == NOT_READY &&
|
if ((sense_hdr.sense_key == NOT_READY ||
|
||||||
|
sense_hdr.sense_key == UNIT_ATTENTION) &&
|
||||||
sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
|
sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
|
||||||
return SCSI_DH_RETRY;
|
return SCSI_DH_RETRY;
|
||||||
else if (retval)
|
else if (retval)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче