Several driver bug fixes posted in the last several weeks
 
 - Several bug fixes for the hfi1 driver 'TID RDMA' functionality merged
   into 5.1. Since TID RDMA is on by default these all seem to be
   regressions.
 
 - Wrong software permission checks on memory in mlx5
 
 - Memory leak in vmw_pvrdma during driver remove
 
 - Several bug fixes for hns driver features merged into 5.1
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEfB7FMLh+8QxL+6i3OG33FX4gmxoFAlyuObAACgkQOG33FX4g
 mxo0dBAAhcmUzuSLr13DdJ5MqS/VQQHXj0DF7RGYKwjQR1EAbEI7VeS8PNqkKzST
 euQ+jS9MGnlevjRafMO3x25bfjFAxG67srsLCbVqm65jzjJlQk0MOyJ+iYxLqf4r
 Of9yvQ97gUChZSOOfbavdDUCUtSdvDI07XKgR4iX8ASdh+QQa6rUR67dL/JtDVuP
 cTNgfkSOfdB1RsntNsQ9T8GbkX6gZiw8TUwpKDbDLTlEeY+NFMn4hSOyNcTabPjo
 yl+oTIl0Olip37eLHj6CKOkJDjH3XmAKRlQWgiszSlDOlxnKmIr+UET7Yi/2Elmc
 n81MXpSAPFOnprqZznLvHrJH6LDKaU5qvw49KcsrtdlkevshQ5QYQlRbEBwjSF7g
 HQFmkQRmt09a2WeSTdbVmiBsm/DkCTSsIyD+32vPjtRpIb440wjbddlal3nbz9Du
 gp5FNsfuTYq8vEFiFvI7XTbNSKQS9LYAn5ZPH6YkBY2KkqBdNKjMMcmBfLMkLOcQ
 Do5CU8sZHQ5G4244A3NKmah/IGhp5JijbrZZNzZabd29tWihiie5H5ECrT6DAoY/
 iGop1CVxSUZbpxcz61cfG6y0J9dBaL8/FevLWRYQM1nx9JTaNBb5TyXc2mFT7fqk
 qok/Ubxydz5geQ0b/3yzbNzjLbAt1MCUb78q3jNIflslidgcMNk=
 =Dq9o
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Several driver bug fixes posted in the last several weeks

   - Several bug fixes for the hfi1 driver 'TID RDMA' functionality
     merged into 5.1. Since TID RDMA is on by default these all seem to
     be regressions.

   - Wrong software permission checks on memory in mlx5

   - Memory leak in vmw_pvrdma during driver remove

   - Several bug fixes for hns driver features merged into 5.1"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  IB/hfi1: Do not flush send queue in the TID RDMA second leg
  RDMA/hns: Bugfix for SCC hem free
  RDMA/hns: Fix bug that caused srq creation to fail
  RDMA/vmw_pvrdma: Fix memory leak on pvrdma_pci_remove
  IB/mlx5: Reset access mask when looping inside page fault handler
  IB/hfi1: Fix the allocation of RSM table
  IB/hfi1: Eliminate opcode tests on mr deref
  IB/hfi1: Clear the IOWAIT pending bits when QP is put into error state
  IB/hfi1: Failed to drain send queue when QP is put into error state
This commit is contained in:
Linus Torvalds 2019-04-10 09:39:04 -10:00
Родитель ed79cc8730 d737b25b1a
Коммит 582549e3fb
9 изменённых файлов: 42 добавлений и 41 удалений

Просмотреть файл

@ -13232,7 +13232,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
int total_contexts;
int ret;
unsigned ngroups;
int qos_rmt_count;
int rmt_count;
int user_rmt_reduced;
u32 n_usr_ctxts;
u32 send_contexts = chip_send_contexts(dd);
@ -13294,10 +13294,20 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
n_usr_ctxts = rcv_contexts - total_contexts;
}
/* each user context requires an entry in the RMT */
qos_rmt_count = qos_rmt_entries(dd, NULL, NULL);
if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count;
/*
* The RMT entries are currently allocated as shown below:
* 1. QOS (0 to 128 entries);
* 2. FECN for PSM (num_user_contexts + num_vnic_contexts);
* 3. VNIC (num_vnic_contexts).
* It should be noted that PSM FECN oversubscribe num_vnic_contexts
* entries of RMT because both VNIC and PSM could allocate any receive
* context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
* and PSM FECN must reserve an RMT entry for each possible PSM receive
* context.
*/
rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
dd_dev_err(dd,
"RMT size is reducing the number of user receive contexts from %u to %d\n",
n_usr_ctxts,
@ -14285,9 +14295,11 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
u64 reg;
int i, idx, regoff, regidx;
u8 offset;
u32 total_cnt;
/* there needs to be enough room in the map table */
if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) {
total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) {
dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n");
return;
}
@ -14341,7 +14353,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
/* add rule 1 */
add_rsm_rule(dd, RSM_INS_FECN, &rrd);
rmt->used += dd->num_user_contexts;
rmt->used += total_cnt;
}
/* Initialize RSM for VNIC */

Просмотреть файл

@ -898,7 +898,9 @@ void notify_error_qp(struct rvt_qp *qp)
if (!list_empty(&priv->s_iowait.list) &&
!(qp->s_flags & RVT_S_BUSY) &&
!(priv->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
rvt_put_qp(qp);

Просмотреть файл

@ -3088,7 +3088,7 @@ send_last:
update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
if (e->rdma_sge.mr) {
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
@ -3166,7 +3166,7 @@ send_last:
update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
if (e->rdma_sge.mr) {
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}

Просмотреть файл

@ -5017,24 +5017,14 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
make_tid_rdma_ack(qp, ohdr, ps))
return 1;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (iowait_sdma_pending(&priv->s_iowait)) {
qp->s_flags |= RVT_S_WAIT_DMA;
goto bail;
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done_free_tx;
}
/*
* Bail out if we can't send data.
* Be reminded that this check must been done after the call to
* make_tid_rdma_ack() because the responding QP could be in
* RTR state where it can send TID RDMA ACK, not TID RDMA WRITE DATA.
*/
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK))
goto bail;
if (priv->s_flags & RVT_S_WAIT_ACK)
goto bail;
@ -5144,11 +5134,6 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2,
middle, ps);
return 1;
done_free_tx:
hfi1_put_txreq(ps->s_txreq);
ps->s_txreq = NULL;
return 1;
bail:
hfi1_put_txreq(ps->s_txreq);
bail_no_tx:

Просмотреть файл

@ -792,6 +792,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
idx_offset = (obj & (table->num_obj - 1)) % obj_per_chunk;
dma_offset = offset = idx_offset * table->obj_size;
} else {
u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */
hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
/* mtt mhop */
i = mhop.l0_idx;
@ -803,8 +805,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
hem_idx = i;
hem = table->hem[hem_idx];
dma_offset = offset = (obj & (table->num_obj - 1)) *
table->obj_size % mhop.bt_chunk_size;
dma_offset = offset = (obj & (table->num_obj - 1)) * seg_size %
mhop.bt_chunk_size;
if (mhop.hop_num == 2)
dma_offset = offset = 0;
}

Просмотреть файл

@ -746,7 +746,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table;
dma_addr_t dma_handle;
__le64 *mtts;
u32 s = start_index * sizeof(u64);
u32 bt_page_size;
u32 i;
@ -780,7 +779,8 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
return -EINVAL;
mtts = hns_roce_table_find(hr_dev, table,
mtt->first_seg + s / hr_dev->caps.mtt_entry_sz,
mtt->first_seg +
start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
&dma_handle);
if (!mtts)
return -ENOMEM;

Просмотреть файл

@ -274,9 +274,6 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
wait_for_completion(&hr_qp->free);
if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) {
if (hr_dev->caps.sccc_entry_sz)
hns_roce_table_put(hr_dev, &qp_table->sccc_table,
hr_qp->qpn);
if (hr_dev->caps.trrl_entry_sz)
hns_roce_table_put(hr_dev, &qp_table->trrl_table,
hr_qp->qpn);

Просмотреть файл

@ -585,7 +585,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
u64 access_mask = ODP_READ_ALLOWED_BIT;
u64 access_mask;
u64 start_idx, page_mask;
struct ib_umem_odp *odp;
size_t size;
@ -607,6 +607,7 @@ next_mr:
page_shift = mr->umem->page_shift;
page_mask = ~(BIT(page_shift) - 1);
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
access_mask = ODP_READ_ALLOWED_BIT;
if (prefetch && !downgrade && !mr->umem->writable) {
/* prefetch with write-access must

Просмотреть файл

@ -1131,6 +1131,8 @@ static void pvrdma_pci_remove(struct pci_dev *pdev)
pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
pvrdma_free_slots(dev);
dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
dev->dsrbase);
iounmap(dev->regs);
kfree(dev->sgid_tbl);