target: Fix ALUA transition state race between multiple initiators

Multiple threads could be writing to alua_access_state at
the same time, or there could be multiple STPGs in flight
(different initiators sending them or one initiator sending
them to different ports), or a combo of both and the
core_alua_do_transition_tg_pt calls will race with each other.

Because from the last patches we no longer delay running
core_alua_do_transition_tg_pt_work, there does not seem to be
any point in running that in a workqueue. And, we always
wait for it to complete one way or another, so we can sleep
in this code path. So, this patch made over target-pending just adds a
mutex and does the work core_alua_do_transition_tg_pt_work was doing in
core_alua_do_transition_tg_pt.

There is also no need to use an atomic for the
tg_pt_gp_alua_access_state. In core_alua_do_transition_tg_pt we will
test and set it under the transition mutex. And, it is a int/32 bits
so in the other places where it is read, we will never see it partially
updated.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
This commit is contained in:
Mike Christie 2017-03-29 00:19:24 -05:00 коммит произвёл Nicholas Bellinger
Родитель 7a56dc8888
Коммит d19c4643a5
3 изменённых файлов: 60 добавлений и 104 удалений

Просмотреть файл

@ -197,8 +197,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
/*
* Set the ASYMMETRIC ACCESS State
*/
buf[off++] |= (atomic_read(
&tg_pt_gp->tg_pt_gp_alua_access_state) & 0xff);
buf[off++] |= tg_pt_gp->tg_pt_gp_alua_access_state & 0xff;
/*
* Set supported ASYMMETRIC ACCESS State bits
*/
@ -710,7 +709,7 @@ target_alua_state_check(struct se_cmd *cmd)
spin_lock(&lun->lun_tg_pt_gp_lock);
tg_pt_gp = lun->lun_tg_pt_gp;
out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
out_alua_state = tg_pt_gp->tg_pt_gp_alua_access_state;
nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs;
// XXX: keeps using tg_pt_gp witout reference after unlock
@ -911,7 +910,7 @@ static int core_alua_write_tpg_metadata(
}
/*
* Called with tg_pt_gp->tg_pt_gp_md_mutex held
* Called with tg_pt_gp->tg_pt_gp_transition_mutex held
*/
static int core_alua_update_tpg_primary_metadata(
struct t10_alua_tg_pt_gp *tg_pt_gp)
@ -934,7 +933,7 @@ static int core_alua_update_tpg_primary_metadata(
"alua_access_state=0x%02x\n"
"alua_access_status=0x%02x\n",
tg_pt_gp->tg_pt_gp_id,
tg_pt_gp->tg_pt_gp_alua_pending_state,
tg_pt_gp->tg_pt_gp_alua_access_state,
tg_pt_gp->tg_pt_gp_alua_access_status);
snprintf(path, ALUA_METADATA_PATH_LEN,
@ -1013,13 +1012,52 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
}
static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
static int core_alua_do_transition_tg_pt(
struct t10_alua_tg_pt_gp *tg_pt_gp,
int new_state,
int explicit)
{
struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
int prev_state;
mutex_lock(&tg_pt_gp->tg_pt_gp_transition_mutex);
/* Nothing to be done here */
if (tg_pt_gp->tg_pt_gp_alua_access_state == new_state) {
mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
return 0;
}
if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) {
mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
return -EAGAIN;
}
/*
* Save the old primary ALUA access state, and set the current state
* to ALUA_ACCESS_STATE_TRANSITION.
*/
prev_state = tg_pt_gp->tg_pt_gp_alua_access_state;
tg_pt_gp->tg_pt_gp_alua_access_state = ALUA_ACCESS_STATE_TRANSITION;
tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG :
ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA;
core_alua_queue_state_change_ua(tg_pt_gp);
if (new_state == ALUA_ACCESS_STATE_TRANSITION) {
mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
return 0;
}
/*
* Check for the optional ALUA primary state transition delay
*/
if (tg_pt_gp->tg_pt_gp_trans_delay_msecs != 0)
msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs);
/*
* Set the current primary ALUA access state to the requested new state
*/
tg_pt_gp->tg_pt_gp_alua_access_state = new_state;
/*
* Update the ALUA metadata buf that has been allocated in
@ -1034,93 +1072,19 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
* struct file does NOT affect the actual ALUA transition.
*/
if (tg_pt_gp->tg_pt_gp_write_metadata) {
mutex_lock(&tg_pt_gp->tg_pt_gp_md_mutex);
core_alua_update_tpg_primary_metadata(tg_pt_gp);
mutex_unlock(&tg_pt_gp->tg_pt_gp_md_mutex);
}
/*
* Set the current primary ALUA access state to the requested new state
*/
atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
tg_pt_gp->tg_pt_gp_alua_pending_state);
pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
" from primary access state %s to %s\n", (explicit) ? "explicit" :
"implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
tg_pt_gp->tg_pt_gp_id,
core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state),
core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state));
core_alua_dump_state(prev_state),
core_alua_dump_state(new_state));
core_alua_queue_state_change_ua(tg_pt_gp);
spin_lock(&dev->t10_alua.tg_pt_gps_lock);
atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
if (tg_pt_gp->tg_pt_gp_transition_complete)
complete(tg_pt_gp->tg_pt_gp_transition_complete);
}
static int core_alua_do_transition_tg_pt(
struct t10_alua_tg_pt_gp *tg_pt_gp,
int new_state,
int explicit)
{
struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
DECLARE_COMPLETION_ONSTACK(wait);
/* Nothing to be done here */
if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state)
return 0;
if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION)
return -EAGAIN;
/*
* Flush any pending transitions
*/
if (!explicit)
flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
/*
* Save the old primary ALUA access state, and set the current state
* to ALUA_ACCESS_STATE_TRANSITION.
*/
atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
ALUA_ACCESS_STATE_TRANSITION);
tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG :
ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA;
core_alua_queue_state_change_ua(tg_pt_gp);
if (new_state == ALUA_ACCESS_STATE_TRANSITION)
return 0;
tg_pt_gp->tg_pt_gp_alua_previous_state =
atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
/*
* Check for the optional ALUA primary state transition delay
*/
if (tg_pt_gp->tg_pt_gp_trans_delay_msecs != 0)
msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs);
/*
* Take a reference for workqueue item
*/
spin_lock(&dev->t10_alua.tg_pt_gps_lock);
atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
if (explicit) {
tg_pt_gp->tg_pt_gp_transition_complete = &wait;
wait_for_completion(&wait);
tg_pt_gp->tg_pt_gp_transition_complete = NULL;
}
mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
return 0;
}
@ -1685,14 +1649,12 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
}
INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list);
INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list);
mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
mutex_init(&tg_pt_gp->tg_pt_gp_transition_mutex);
spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
core_alua_do_transition_tg_pt_work);
tg_pt_gp->tg_pt_gp_dev = dev;
atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
tg_pt_gp->tg_pt_gp_alua_access_state =
ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED;
/*
* Enable both explicit and implicit ALUA support by default
*/
@ -1797,8 +1759,6 @@ void core_alua_free_tg_pt_gp(
dev->t10_alua.alua_tg_pt_gps_counter--;
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
/*
* Allow a struct t10_alua_tg_pt_gp_member * referenced by
* core_alua_get_tg_pt_gp_by_name() in
@ -1938,8 +1898,8 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page)
"Primary Access Status: %s\nTG Port Secondary Access"
" State: %s\nTG Port Secondary Access Status: %s\n",
config_item_name(tg_pt_ci), tg_pt_gp->tg_pt_gp_id,
core_alua_dump_state(atomic_read(
&tg_pt_gp->tg_pt_gp_alua_access_state)),
core_alua_dump_state(
tg_pt_gp->tg_pt_gp_alua_access_state),
core_alua_dump_status(
tg_pt_gp->tg_pt_gp_alua_access_status),
atomic_read(&lun->lun_tg_pt_secondary_offline) ?

Просмотреть файл

@ -2392,7 +2392,7 @@ static ssize_t target_tg_pt_gp_alua_access_state_show(struct config_item *item,
char *page)
{
return sprintf(page, "%d\n",
atomic_read(&to_tg_pt_gp(item)->tg_pt_gp_alua_access_state));
to_tg_pt_gp(item)->tg_pt_gp_alua_access_state);
}
static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item,

Просмотреть файл

@ -280,8 +280,6 @@ struct t10_alua_tg_pt_gp {
u16 tg_pt_gp_id;
int tg_pt_gp_valid_id;
int tg_pt_gp_alua_supported_states;
int tg_pt_gp_alua_pending_state;
int tg_pt_gp_alua_previous_state;
int tg_pt_gp_alua_access_status;
int tg_pt_gp_alua_access_type;
int tg_pt_gp_nonop_delay_msecs;
@ -290,18 +288,16 @@ struct t10_alua_tg_pt_gp {
int tg_pt_gp_pref;
int tg_pt_gp_write_metadata;
u32 tg_pt_gp_members;
atomic_t tg_pt_gp_alua_access_state;
int tg_pt_gp_alua_access_state;
atomic_t tg_pt_gp_ref_cnt;
spinlock_t tg_pt_gp_lock;
struct mutex tg_pt_gp_md_mutex;
struct mutex tg_pt_gp_transition_mutex;
struct se_device *tg_pt_gp_dev;
struct config_group tg_pt_gp_group;
struct list_head tg_pt_gp_list;
struct list_head tg_pt_gp_lun_list;
struct se_lun *tg_pt_gp_alua_lun;
struct se_node_acl *tg_pt_gp_alua_nacl;
struct work_struct tg_pt_gp_transition_work;
struct completion *tg_pt_gp_transition_complete;
};
struct t10_vpd {