net/mlx5: WQ, fixes for fragmented WQ buffers API

mlx5e netdevice used to calculate fragment edges by a call to
mlx5_wq_cyc_get_frag_size(). This calculation did not give the correct
indication for queues smaller than a PAGE_SIZE, (broken by default on
PowerPC, where PAGE_SIZE == 64KB).  Here it is replaced by the correct new
calls/API.

Since (TX/RX) Work Queues buffers are fragmented, here we introduce
changes to the API in core driver, so that it gets a stride index and
returns the index of last stride on same fragment, and an additional
wrapping function that returns the number of physically contiguous
strides that can be written contiguously to the work queue.

This obsoletes the following API functions, and their buggy
usage in EN driver:
* mlx5_wq_cyc_get_frag_size()
* mlx5_wq_cyc_ctr2fragix()

The new API improves modularity and hides the details of such
calculation for mlx5e netdevice and mlx5_ib rdma drivers.

New calculation is also more efficient, and improves performance
as follows:

Packet rate test: pktgen, UDP / IPv4, 64byte, single ring, 8K ring size.

Before: 16,477,619 pps
After:  17,085,793 pps

3.7% improvement

Fixes: 3a2f703312 ("net/mlx5: Use order-0 allocations for all WQ types")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
Tariq Toukan 2018-08-21 14:41:41 +03:00 коммит произвёл Saeed Mahameed
Родитель a48bc51315
Коммит 37fdffb217
6 изменённых файлов: 31 добавлений и 32 удалений

Просмотреть файл

@ -432,10 +432,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
struct mlx5_wq_cyc *wq,
u16 pi, u16 frag_pi)
u16 pi, u16 nnops)
{
struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
edge_wi = wi + nnops;
@ -454,15 +453,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe;
u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
u16 pi, frag_pi;
u16 pi, contig_wqebbs_room;
int err;
int i;
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
}

Просмотреть файл

@ -290,10 +290,9 @@ dma_unmap_wqe_err:
static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
struct mlx5_wq_cyc *wq,
u16 pi, u16 frag_pi)
u16 pi, u16 nnops)
{
struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
edge_wi = wi + nnops;
@ -348,8 +347,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
u16 headlen, ihs, contig_wqebbs_room;
u16 ds_cnt, ds_cnt_inl = 0;
u16 headlen, ihs, frag_pi;
u8 num_wqebbs, opcode;
u32 num_bytes;
int num_dma;
@ -386,9 +385,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
if (unlikely(contig_wqebbs_room < num_wqebbs)) {
mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
}
@ -636,7 +635,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
u16 headlen, ihs, pi, frag_pi;
u16 headlen, ihs, pi, contig_wqebbs_room;
u16 ds_cnt, ds_cnt_inl = 0;
u8 num_wqebbs, opcode;
u32 num_bytes;
@ -672,13 +671,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
if (unlikely(contig_wqebbs_room < num_wqebbs)) {
mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
}
mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
mlx5i_sq_fetch_wqe(sq, &wqe, pi);
/* fill wqe */
wi = &sq->db.wqe_info[pi];

Просмотреть файл

@ -109,12 +109,11 @@ struct mlx5i_tx_wqe {
static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
struct mlx5i_tx_wqe **wqe,
u16 *pi)
u16 pi)
{
struct mlx5_wq_cyc *wq = &sq->wq;
*pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
memset(*wqe, 0, sizeof(**wqe));
}

Просмотреть файл

@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
return (u32)wq->fbc.sz_m1 + 1;
}
u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
{
return wq->fbc.frag_sz_m1 + 1;
}
u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
{
return wq->fbc.sz_m1 + 1;

Просмотреть файл

@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_cyc *wq,
struct mlx5_wq_ctrl *wq_ctrl);
u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *qpc, struct mlx5_wq_qp *wq,
@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
return ctr & wq->fbc.sz_m1;
}
static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
{
return ctr & wq->fbc.frag_sz_m1;
}
static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
{
return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
}
static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
{
return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
}
static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
{
int equal = (cc1 == cc2);

Просмотреть файл

@ -1032,6 +1032,14 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
((fbc->frag_sz_m1 & ix) << fbc->log_stride);
}
static inline u32
mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
{
u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1;
return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
}
int mlx5_cmd_init(struct mlx5_core_dev *dev);
void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
void mlx5_cmd_use_events(struct mlx5_core_dev *dev);