net/rds: Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition after posting IB_WR_LOCAL_INV
In order to: 1) avoid a silly bouncing between "clean_list" and "drop_list" triggered by function "rds_ib_reg_frmr" as it is releases frmr regions whose state is not "FRMR_IS_FREE" right away. 2) prevent an invalid access error in a race from a pending "IB_WR_LOCAL_INV" operation with a teardown ("dma_unmap_sg", "put_page") and de-registration ("ib_dereg_mr") of the corresponding memory region. Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Родитель
c9467447fc
Коммит
5f33141d2f
|
@ -76,6 +76,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
|
||||||
|
|
||||||
frmr->fr_state = FRMR_IS_FREE;
|
frmr->fr_state = FRMR_IS_FREE;
|
||||||
init_waitqueue_head(&frmr->fr_inv_done);
|
init_waitqueue_head(&frmr->fr_inv_done);
|
||||||
|
init_waitqueue_head(&frmr->fr_reg_done);
|
||||||
return ibmr;
|
return ibmr;
|
||||||
|
|
||||||
out_no_cigar:
|
out_no_cigar:
|
||||||
|
@ -124,6 +125,7 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
|
||||||
*/
|
*/
|
||||||
ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
|
ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
|
||||||
frmr->fr_state = FRMR_IS_INUSE;
|
frmr->fr_state = FRMR_IS_INUSE;
|
||||||
|
frmr->fr_reg = true;
|
||||||
|
|
||||||
memset(®_wr, 0, sizeof(reg_wr));
|
memset(®_wr, 0, sizeof(reg_wr));
|
||||||
reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
|
reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
|
||||||
|
@ -144,7 +146,17 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
|
||||||
if (printk_ratelimit())
|
if (printk_ratelimit())
|
||||||
pr_warn("RDS/IB: %s returned error(%d)\n",
|
pr_warn("RDS/IB: %s returned error(%d)\n",
|
||||||
__func__, ret);
|
__func__, ret);
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Wait for the registration to complete in order to prevent an invalid
|
||||||
|
* access error resulting from a race between the memory region already
|
||||||
|
* being accessed while registration is still pending.
|
||||||
|
*/
|
||||||
|
wait_event(frmr->fr_reg_done, !frmr->fr_reg);
|
||||||
|
|
||||||
|
out:
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -262,6 +274,19 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
|
||||||
pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
|
pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to
|
||||||
|
* 1) avoid a silly bouncing between "clean_list" and "drop_list"
|
||||||
|
* triggered by function "rds_ib_reg_frmr" as it is releases frmr
|
||||||
|
* regions whose state is not "FRMR_IS_FREE" right away.
|
||||||
|
* 2) prevents an invalid access error in a race
|
||||||
|
* from a pending "IB_WR_LOCAL_INV" operation
|
||||||
|
* with a teardown ("dma_unmap_sg", "put_page")
|
||||||
|
* and de-registration ("ib_dereg_mr") of the corresponding
|
||||||
|
* memory region.
|
||||||
|
*/
|
||||||
|
wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -289,6 +314,11 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
|
||||||
wake_up(&frmr->fr_inv_done);
|
wake_up(&frmr->fr_inv_done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (frmr->fr_reg) {
|
||||||
|
frmr->fr_reg = false;
|
||||||
|
wake_up(&frmr->fr_reg_done);
|
||||||
|
}
|
||||||
|
|
||||||
atomic_inc(&ic->i_fastreg_wrs);
|
atomic_inc(&ic->i_fastreg_wrs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -297,14 +327,18 @@ void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
|
||||||
{
|
{
|
||||||
struct rds_ib_mr *ibmr, *next;
|
struct rds_ib_mr *ibmr, *next;
|
||||||
struct rds_ib_frmr *frmr;
|
struct rds_ib_frmr *frmr;
|
||||||
int ret = 0;
|
int ret = 0, ret2;
|
||||||
unsigned int freed = *nfreed;
|
unsigned int freed = *nfreed;
|
||||||
|
|
||||||
/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
|
/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
|
||||||
list_for_each_entry(ibmr, list, unmap_list) {
|
list_for_each_entry(ibmr, list, unmap_list) {
|
||||||
if (ibmr->sg_dma_len)
|
if (ibmr->sg_dma_len) {
|
||||||
ret |= rds_ib_post_inv(ibmr);
|
ret2 = rds_ib_post_inv(ibmr);
|
||||||
|
if (ret2 && !ret)
|
||||||
|
ret = ret2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
|
pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
|
||||||
|
|
||||||
|
@ -347,31 +381,8 @@ struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
if (ibmr) {
|
if (ibmr)
|
||||||
/* Memory regions make it onto the "clean_list" via
|
|
||||||
* "rds_ib_flush_mr_pool", after the memory region has
|
|
||||||
* been posted for invalidation via "rds_ib_post_inv".
|
|
||||||
*
|
|
||||||
* At that point in time, "fr_state" may still be
|
|
||||||
* in state "FRMR_IS_INUSE", since the only place where
|
|
||||||
* "fr_state" transitions to "FRMR_IS_FREE" is in
|
|
||||||
* is in "rds_ib_mr_cqe_handler", which is
|
|
||||||
* triggered by a tasklet.
|
|
||||||
*
|
|
||||||
* So we wait for "fr_inv_done" to trigger
|
|
||||||
* and only put memory regions onto the drop_list
|
|
||||||
* that failed (i.e. not marked "FRMR_IS_FREE").
|
|
||||||
*
|
|
||||||
* This avoids the problem of memory-regions bouncing
|
|
||||||
* between "clean_list" and "drop_list" before they
|
|
||||||
* even have a chance to be properly invalidated.
|
|
||||||
*/
|
|
||||||
frmr = &ibmr->u.frmr;
|
|
||||||
wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
|
|
||||||
if (frmr->fr_state == FRMR_IS_FREE)
|
|
||||||
break;
|
|
||||||
rds_ib_free_frmr(ibmr, true);
|
rds_ib_free_frmr(ibmr, true);
|
||||||
}
|
|
||||||
ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
|
ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
|
||||||
if (IS_ERR(ibmr))
|
if (IS_ERR(ibmr))
|
||||||
return ibmr;
|
return ibmr;
|
||||||
|
|
|
@ -58,6 +58,8 @@ struct rds_ib_frmr {
|
||||||
enum rds_ib_fr_state fr_state;
|
enum rds_ib_fr_state fr_state;
|
||||||
bool fr_inv;
|
bool fr_inv;
|
||||||
wait_queue_head_t fr_inv_done;
|
wait_queue_head_t fr_inv_done;
|
||||||
|
bool fr_reg;
|
||||||
|
wait_queue_head_t fr_reg_done;
|
||||||
struct ib_send_wr fr_wr;
|
struct ib_send_wr fr_wr;
|
||||||
unsigned int dma_npages;
|
unsigned int dma_npages;
|
||||||
unsigned int sg_byte_len;
|
unsigned int sg_byte_len;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче