IB/iser: Add more RX CQs to scale out processing of SCSI responses

RX/TX CQs will now be selected from a per HCA pool.  For the RX flow
this has the effect of using different interrupt vectors when using
low level drivers (such as mlx4) that map the "vector" param provided
by the ULP on CQ creation to a dedicated IRQ/MSI-X vector.  This
allows the RX flow processing of IO responses to be distributed across
multiple CPUs.

QPs (--> iSER sessions) are assigned to CQs in round robin order using
the CQ with the minimum number of sessions attached to it.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Alex Tabachnik <alext@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
Alex Tabachnik 2012-09-23 15:17:44 +00:00 коммит произвёл Roland Dreier
Родитель 7a9a2970b5
Коммит 5a33a66949
2 изменённых файлов: 103 добавлений и 44 удалений

Просмотреть файл

@ -177,6 +177,7 @@ struct iser_data_buf {
/* fwd declarations */
struct iser_device;
struct iser_cq_desc;
struct iscsi_iser_conn;
struct iscsi_iser_task;
struct iscsi_endpoint;
@ -226,16 +227,21 @@ struct iser_rx_desc {
char pad[ISER_RX_PAD_SIZE];
} __attribute__((packed));
#define ISER_MAX_CQ 4
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
struct ib_cq *rx_cq;
struct ib_cq *tx_cq;
struct ib_cq *rx_cq[ISER_MAX_CQ];
struct ib_cq *tx_cq[ISER_MAX_CQ];
struct ib_mr *mr;
struct tasklet_struct cq_tasklet;
struct tasklet_struct cq_tasklet[ISER_MAX_CQ];
struct ib_event_handler event_handler;
struct list_head ig_list; /* entry in ig devices list */
int refcount;
int cq_active_qps[ISER_MAX_CQ];
int cqs_used;
struct iser_cq_desc *cq_desc;
};
struct iser_conn {
@ -287,6 +293,11 @@ struct iser_page_vec {
int data_size;
};
struct iser_cq_desc {
struct iser_device *device;
int cq_index;
};
struct iser_global {
struct mutex device_list_mutex;/* */
struct list_head device_list; /* all iSER devices */

Просмотреть файл

@ -70,32 +70,50 @@ static void iser_event_handler(struct ib_event_handler *handler,
*/
static int iser_create_device_ib_res(struct iser_device *device)
{
int i, j;
struct iser_cq_desc *cq_desc;
device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
iser_err("using %d CQs, device %s supports %d vectors\n", device->cqs_used,
device->ib_device->name, device->ib_device->num_comp_vectors);
device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
GFP_KERNEL);
if (device->cq_desc == NULL)
goto cq_desc_err;
cq_desc = device->cq_desc;
device->pd = ib_alloc_pd(device->ib_device);
if (IS_ERR(device->pd))
goto pd_err;
device->rx_cq = ib_create_cq(device->ib_device,
iser_cq_callback,
iser_cq_event_callback,
(void *)device,
ISER_MAX_RX_CQ_LEN, 0);
if (IS_ERR(device->rx_cq))
goto rx_cq_err;
for (i = 0; i < device->cqs_used; i++) {
cq_desc[i].device = device;
cq_desc[i].cq_index = i;
device->tx_cq = ib_create_cq(device->ib_device,
NULL, iser_cq_event_callback,
(void *)device,
ISER_MAX_TX_CQ_LEN, 0);
device->rx_cq[i] = ib_create_cq(device->ib_device,
iser_cq_callback,
iser_cq_event_callback,
(void *)&cq_desc[i],
ISER_MAX_RX_CQ_LEN, i);
if (IS_ERR(device->rx_cq[i]))
goto cq_err;
if (IS_ERR(device->tx_cq))
goto tx_cq_err;
device->tx_cq[i] = ib_create_cq(device->ib_device,
NULL, iser_cq_event_callback,
(void *)&cq_desc[i],
ISER_MAX_TX_CQ_LEN, i);
if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
goto cq_arm_err;
if (IS_ERR(device->tx_cq[i]))
goto cq_err;
tasklet_init(&device->cq_tasklet,
iser_cq_tasklet_fn,
(unsigned long)device);
if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
goto cq_err;
tasklet_init(&device->cq_tasklet[i],
iser_cq_tasklet_fn,
(unsigned long)&cq_desc[i]);
}
device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
@ -113,14 +131,19 @@ static int iser_create_device_ib_res(struct iser_device *device)
handler_err:
ib_dereg_mr(device->mr);
dma_mr_err:
tasklet_kill(&device->cq_tasklet);
cq_arm_err:
ib_destroy_cq(device->tx_cq);
tx_cq_err:
ib_destroy_cq(device->rx_cq);
rx_cq_err:
for (j = 0; j < device->cqs_used; j++)
tasklet_kill(&device->cq_tasklet[j]);
cq_err:
for (j = 0; j < i; j++) {
if (device->tx_cq[j])
ib_destroy_cq(device->tx_cq[j]);
if (device->rx_cq[j])
ib_destroy_cq(device->rx_cq[j]);
}
ib_dealloc_pd(device->pd);
pd_err:
kfree(device->cq_desc);
cq_desc_err:
iser_err("failed to allocate an IB resource\n");
return -1;
}
@ -131,18 +154,24 @@ pd_err:
*/
static void iser_free_device_ib_res(struct iser_device *device)
{
int i;
BUG_ON(device->mr == NULL);
tasklet_kill(&device->cq_tasklet);
for (i = 0; i < device->cqs_used; i++) {
tasklet_kill(&device->cq_tasklet[i]);
(void)ib_destroy_cq(device->tx_cq[i]);
(void)ib_destroy_cq(device->rx_cq[i]);
device->tx_cq[i] = NULL;
device->rx_cq[i] = NULL;
}
(void)ib_unregister_event_handler(&device->event_handler);
(void)ib_dereg_mr(device->mr);
(void)ib_destroy_cq(device->tx_cq);
(void)ib_destroy_cq(device->rx_cq);
(void)ib_dealloc_pd(device->pd);
kfree(device->cq_desc);
device->mr = NULL;
device->tx_cq = NULL;
device->rx_cq = NULL;
device->pd = NULL;
}
@ -157,6 +186,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
struct ib_qp_init_attr init_attr;
int req_err, resp_err, ret = -ENOMEM;
struct ib_fmr_pool_param params;
int index, min_index = 0;
BUG_ON(ib_conn->device == NULL);
@ -220,10 +250,20 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
memset(&init_attr, 0, sizeof init_attr);
mutex_lock(&ig.connlist_mutex);
/* select the CQ with the minimal number of usages */
for (index = 0; index < device->cqs_used; index++)
if (device->cq_active_qps[index] <
device->cq_active_qps[min_index])
min_index = index;
device->cq_active_qps[min_index]++;
mutex_unlock(&ig.connlist_mutex);
iser_err("cq index %d used for ib_conn %p\n", min_index, ib_conn);
init_attr.event_handler = iser_qp_event_callback;
init_attr.qp_context = (void *)ib_conn;
init_attr.send_cq = device->tx_cq;
init_attr.recv_cq = device->rx_cq;
init_attr.send_cq = device->tx_cq[min_index];
init_attr.recv_cq = device->rx_cq[min_index];
init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
init_attr.cap.max_send_sge = 2;
@ -252,6 +292,7 @@ out_err:
*/
static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
{
int cq_index;
BUG_ON(ib_conn == NULL);
iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
@ -262,9 +303,12 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
if (ib_conn->fmr_pool != NULL)
ib_destroy_fmr_pool(ib_conn->fmr_pool);
if (ib_conn->qp != NULL)
rdma_destroy_qp(ib_conn->cma_id);
if (ib_conn->qp != NULL) {
cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
ib_conn->device->cq_active_qps[cq_index]--;
rdma_destroy_qp(ib_conn->cma_id);
}
/* if cma handler context, the caller acts s.t the cma destroy the id */
if (ib_conn->cma_id != NULL && can_destroy_id)
rdma_destroy_id(ib_conn->cma_id);
@ -791,9 +835,9 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
}
}
static int iser_drain_tx_cq(struct iser_device *device)
static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
{
struct ib_cq *cq = device->tx_cq;
struct ib_cq *cq = device->tx_cq[cq_index];
struct ib_wc wc;
struct iser_tx_desc *tx_desc;
struct iser_conn *ib_conn;
@ -822,8 +866,10 @@ static int iser_drain_tx_cq(struct iser_device *device)
static void iser_cq_tasklet_fn(unsigned long data)
{
struct iser_device *device = (struct iser_device *)data;
struct ib_cq *cq = device->rx_cq;
struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data;
struct iser_device *device = cq_desc->device;
int cq_index = cq_desc->cq_index;
struct ib_cq *cq = device->rx_cq[cq_index];
struct ib_wc wc;
struct iser_rx_desc *desc;
unsigned long xfer_len;
@ -851,19 +897,21 @@ static void iser_cq_tasklet_fn(unsigned long data)
}
completed_rx++;
if (!(completed_rx & 63))
completed_tx += iser_drain_tx_cq(device);
completed_tx += iser_drain_tx_cq(device, cq_index);
}
/* #warning "it is assumed here that arming CQ only once its empty" *
* " would not cause interrupts to be missed" */
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
completed_tx += iser_drain_tx_cq(device);
completed_tx += iser_drain_tx_cq(device, cq_index);
iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
}
static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
{
struct iser_device *device = (struct iser_device *)cq_context;
struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context;
struct iser_device *device = cq_desc->device;
int cq_index = cq_desc->cq_index;
tasklet_schedule(&device->cq_tasklet);
tasklet_schedule(&device->cq_tasklet[cq_index]);
}