mlx5-updates-2023-03-28
Dragos Tatulea says: ==================== net/mlx5e: RX, Drop page_cache and fully use page_pool For page allocation on the rx path, the mlx5e driver has been using an internal page cache in tandem with the page pool. The internal page cache uses a queue for page recycling which has the issue of head of queue blocking. This patch series drops the internal page_cache altogether and uses the page_pool to implement everything that was done by the page_cache before: * Let the page_pool handle dma mapping and unmapping. * Use fragmented pages with fragment counter instead of tracking via page ref. * Enable skb recycling. The patch series has the following effects on the rx path: * Improved performance for the cases when there was low page recycling due to head of queue blocking in the internal page_cache. The test for this was running a single iperf TCP stream to a rx queue which is bound on the same cpu as the application. |-------------+--------+--------+------+---------| | rq type | before | after | unit | diff | |-------------+--------+--------+------+---------| | striding rq | 30.1 | 31.4 | Gbps | 4.14 % | | legacy rq | 30.2 | 33.0 | Gbps | 8.48 % | |-------------+--------+--------+------+---------| * Small XDP performance degradation. The test was is XDP drop program running on a single rx queue with small packets incoming it looks like this: |-------------+----------+----------+------+---------| | rq type | before | after | unit | diff | |-------------+----------+----------+------+---------| | striding rq | 19725449 | 18544617 | pps | -6.37 % | | legacy rq | 19879931 | 18631841 | pps | -6.70 % | |-------------+----------+----------+------+---------| This will be handled in a different patch series by adding support for multi-packet per page. * For other cases the performance is roughly the same. The above numbers were obtained on the following system: 24 core Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz 32 GB RAM ConnectX-7 single port The breakdown on the patch series is the following: * Preparations for introducing the mlx5e_frag_page struct. * Delete the mlx5e_page_cache struct. * Enable dma mapping from page_pool. * Enable skb recycling and fragment counting. * Do deferred release of pages (just before alloc) to ensure better page_pool cache utilization. ==================== -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEGhZs6bAKwk/OTgTpSD+KveBX+j4FAmQjUY8ACgkQSD+KveBX +j6tVAf/QHCbKgt9c2Q5EpFch2e4x3A/HfE7DbxTancIj0cc1bH98xd4wO574aE4 PCJ/aJ+9zTLvTUgUnKDaiqonfmcsF7v6d/ltoLW1PTNnPqdsjsXpVy76dnL81SWy u/g7h68cfeMdMjAAoewyVv+k7GeTIZCsIdvik3dWGFQ67IpE1k5dLbO13YBNW/5m Cm39RzD55tjgxS8GHdyFYAV4MwgHy+pdhTYR9LGzH80hfd02KqsCO38u1NIShuez 1rwjRF213Qdln20bMNSNiXG36JUV65mo+Q/XHKOEjB0qNKRcF5bzZovqHzP+R7QZ qhhhfce8c63UWpcXADP6k6qevW8+UA== =8F1t -----END PGP SIGNATURE----- Merge tag 'mlx5-updates-2023-03-28' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux Saeed Mahameed says: ==================== mlx5-updates-2023-03-28 Dragos Tatulea says: ==================== net/mlx5e: RX, Drop page_cache and fully use page_pool For page allocation on the rx path, the mlx5e driver has been using an internal page cache in tandem with the page pool. The internal page cache uses a queue for page recycling which has the issue of head of queue blocking. This patch series drops the internal page_cache altogether and uses the page_pool to implement everything that was done by the page_cache before: * Let the page_pool handle dma mapping and unmapping. * Use fragmented pages with fragment counter instead of tracking via page ref. * Enable skb recycling. The patch series has the following effects on the rx path: * Improved performance for the cases when there was low page recycling due to head of queue blocking in the internal page_cache. The test for this was running a single iperf TCP stream to a rx queue which is bound on the same cpu as the application. |-------------+--------+--------+------+---------| | rq type | before | after | unit | diff | |-------------+--------+--------+------+---------| | striding rq | 30.1 | 31.4 | Gbps | 4.14 % | | legacy rq | 30.2 | 33.0 | Gbps | 8.48 % | |-------------+--------+--------+------+---------| * Small XDP performance degradation. The test was is XDP drop program running on a single rx queue with small packets incoming it looks like this: |-------------+----------+----------+------+---------| | rq type | before | after | unit | diff | |-------------+----------+----------+------+---------| | striding rq | 19725449 | 18544617 | pps | -6.37 % | | legacy rq | 19879931 | 18631841 | pps | -6.70 % | |-------------+----------+----------+------+---------| This will be handled in a different patch series by adding support for multi-packet per page. * For other cases the performance is roughly the same. The above numbers were obtained on the following system: 24 core Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz 32 GB RAM ConnectX-7 single port The breakdown on the patch series is the following: * Preparations for introducing the mlx5e_frag_page struct. * Delete the mlx5e_page_cache struct. * Enable dma mapping from page_pool. * Enable skb recycling and fragment counting. * Do deferred release of pages (just before alloc) to ensure better page_pool cache utilization. ==================== * tag 'mlx5-updates-2023-03-28' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux: net/mlx5e: RX, Remove unnecessary recycle parameter and page_cache stats net/mlx5e: RX, Break the wqe bulk refill in smaller chunks net/mlx5e: RX, Increase WQE bulk size for legacy rq net/mlx5e: RX, Split off release path for xsk buffers for legacy rq net/mlx5e: RX, Defer page release in legacy rq for better recycling net/mlx5e: RX, Change wqe last_in_page field from bool to bit flags net/mlx5e: RX, Defer page release in striding rq for better recycling net/mlx5e: RX, Rename xdp_xmit_bitmap to a more generic name net/mlx5e: RX, Enable skb page recycling through the page_pool net/mlx5e: RX, Enable dma map and sync from page_pool allocator net/mlx5e: RX, Remove internal page_cache net/mlx5e: RX, Store SHAMPO header pages in array net/mlx5e: RX, Remove alloc unit layout constraint for striding rq net/mlx5e: RX, Remove alloc unit layout constraint for legacy rq net/mlx5e: RX, Remove mlx5e_alloc_unit argument in page allocation ==================== Link: https://lore.kernel.org/r/20230328205623.142075-1-saeed@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Коммит
7079d5e61a
|
@ -346,32 +346,6 @@ the software port.
|
|||
- The number of receive packets with CQE compression on ring i [#accel]_.
|
||||
- Acceleration
|
||||
|
||||
* - `rx[i]_cache_reuse`
|
||||
- The number of events of successful reuse of a page from a driver's
|
||||
internal page cache.
|
||||
- Acceleration
|
||||
|
||||
* - `rx[i]_cache_full`
|
||||
- The number of events of full internal page cache where driver can't put a
|
||||
page back to the cache for recycling (page will be freed).
|
||||
- Acceleration
|
||||
|
||||
* - `rx[i]_cache_empty`
|
||||
- The number of events where cache was empty - no page to give. Driver
|
||||
shall allocate new page.
|
||||
- Acceleration
|
||||
|
||||
* - `rx[i]_cache_busy`
|
||||
- The number of events where cache head was busy and cannot be recycled.
|
||||
Driver allocated new page.
|
||||
- Acceleration
|
||||
|
||||
* - `rx[i]_cache_waive`
|
||||
- The number of cache evacuation. This can occur due to page move to
|
||||
another NUMA node or page was pfmemalloc-ed and should be freed as soon
|
||||
as possible.
|
||||
- Acceleration
|
||||
|
||||
* - `rx[i]_arfs_err`
|
||||
- Number of flow rules that failed to be added to the flow table.
|
||||
- Error
|
||||
|
|
|
@ -475,11 +475,6 @@ struct mlx5e_txqsq {
|
|||
cqe_ts_to_ns ptp_cyc2time;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
union mlx5e_alloc_unit {
|
||||
struct page *page;
|
||||
struct xdp_buff *xsk;
|
||||
};
|
||||
|
||||
/* XDP packets can be transmitted in different ways. On completion, we need to
|
||||
* distinguish between them to clean up things in a proper way.
|
||||
*/
|
||||
|
@ -605,16 +600,35 @@ struct mlx5e_icosq {
|
|||
struct work_struct recover_work;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
struct mlx5e_frag_page {
|
||||
struct page *page;
|
||||
u16 frags;
|
||||
};
|
||||
|
||||
enum mlx5e_wqe_frag_flag {
|
||||
MLX5E_WQE_FRAG_LAST_IN_PAGE,
|
||||
MLX5E_WQE_FRAG_SKIP_RELEASE,
|
||||
};
|
||||
|
||||
struct mlx5e_wqe_frag_info {
|
||||
union mlx5e_alloc_unit *au;
|
||||
union {
|
||||
struct mlx5e_frag_page *frag_page;
|
||||
struct xdp_buff **xskp;
|
||||
};
|
||||
u32 offset;
|
||||
bool last_in_page;
|
||||
u8 flags;
|
||||
};
|
||||
|
||||
union mlx5e_alloc_units {
|
||||
DECLARE_FLEX_ARRAY(struct mlx5e_frag_page, frag_pages);
|
||||
DECLARE_FLEX_ARRAY(struct page *, pages);
|
||||
DECLARE_FLEX_ARRAY(struct xdp_buff *, xsk_buffs);
|
||||
};
|
||||
|
||||
struct mlx5e_mpw_info {
|
||||
u16 consumed_strides;
|
||||
DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
|
||||
union mlx5e_alloc_unit alloc_units[];
|
||||
DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
|
||||
union mlx5e_alloc_units alloc_units;
|
||||
};
|
||||
|
||||
#define MLX5E_MAX_RX_FRAGS 4
|
||||
|
@ -625,11 +639,6 @@ struct mlx5e_mpw_info {
|
|||
#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
|
||||
MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
|
||||
#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
|
||||
struct mlx5e_page_cache {
|
||||
u32 head;
|
||||
u32 tail;
|
||||
struct page *page_cache[MLX5E_CACHE_SIZE];
|
||||
};
|
||||
|
||||
struct mlx5e_rq;
|
||||
typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
|
||||
|
@ -661,19 +670,24 @@ struct mlx5e_rq_frags_info {
|
|||
struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS];
|
||||
u8 num_frags;
|
||||
u8 log_num_frags;
|
||||
u8 wqe_bulk;
|
||||
u16 wqe_bulk;
|
||||
u16 refill_unit;
|
||||
u8 wqe_index_mask;
|
||||
};
|
||||
|
||||
struct mlx5e_dma_info {
|
||||
dma_addr_t addr;
|
||||
struct page *page;
|
||||
union {
|
||||
struct mlx5e_frag_page *frag_page;
|
||||
struct page *page;
|
||||
};
|
||||
};
|
||||
|
||||
struct mlx5e_shampo_hd {
|
||||
u32 mkey;
|
||||
struct mlx5e_dma_info *info;
|
||||
struct page *last_page;
|
||||
struct mlx5e_frag_page *pages;
|
||||
u16 curr_page_index;
|
||||
u16 hd_per_wq;
|
||||
u16 hd_per_wqe;
|
||||
unsigned long *bitmap;
|
||||
|
@ -702,7 +716,7 @@ struct mlx5e_rq {
|
|||
struct {
|
||||
struct mlx5_wq_cyc wq;
|
||||
struct mlx5e_wqe_frag_info *frags;
|
||||
union mlx5e_alloc_unit *alloc_units;
|
||||
union mlx5e_alloc_units *alloc_units;
|
||||
struct mlx5e_rq_frags_info info;
|
||||
mlx5e_fp_skb_from_cqe skb_from_cqe;
|
||||
} wqe;
|
||||
|
@ -738,7 +752,6 @@ struct mlx5e_rq {
|
|||
struct mlx5e_rq_stats *stats;
|
||||
struct mlx5e_cq cq;
|
||||
struct mlx5e_cq_decomp cqd;
|
||||
struct mlx5e_page_cache page_cache;
|
||||
struct hwtstamp_config *tstamp;
|
||||
struct mlx5_clock *clock;
|
||||
struct mlx5e_icosq *icosq;
|
||||
|
|
|
@ -667,6 +667,48 @@ static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp)
|
|||
return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
|
||||
}
|
||||
|
||||
static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params,
|
||||
struct mlx5e_rq_frags_info *info)
|
||||
{
|
||||
u16 bulk_bound_rq_size = (1 << params->log_rq_mtu_frames) / 4;
|
||||
u32 bulk_bound_rq_size_in_bytes;
|
||||
u32 sum_frag_strides = 0;
|
||||
u32 wqe_bulk_in_bytes;
|
||||
u16 split_factor;
|
||||
u32 wqe_bulk;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < info->num_frags; i++)
|
||||
sum_frag_strides += info->arr[i].frag_stride;
|
||||
|
||||
/* For MTUs larger than PAGE_SIZE, align to PAGE_SIZE to reflect
|
||||
* amount of consumed pages per wqe in bytes.
|
||||
*/
|
||||
if (sum_frag_strides > PAGE_SIZE)
|
||||
sum_frag_strides = ALIGN(sum_frag_strides, PAGE_SIZE);
|
||||
|
||||
bulk_bound_rq_size_in_bytes = bulk_bound_rq_size * sum_frag_strides;
|
||||
|
||||
#define MAX_WQE_BULK_BYTES(xdp) ((xdp ? 256 : 512) * 1024)
|
||||
|
||||
/* A WQE bulk should not exceed min(512KB, 1/4 of rq size). For XDP
|
||||
* keep bulk size smaller to avoid filling the page_pool cache on
|
||||
* every bulk refill.
|
||||
*/
|
||||
wqe_bulk_in_bytes = min_t(u32, MAX_WQE_BULK_BYTES(params->xdp_prog),
|
||||
bulk_bound_rq_size_in_bytes);
|
||||
wqe_bulk = DIV_ROUND_UP(wqe_bulk_in_bytes, sum_frag_strides);
|
||||
|
||||
/* Make sure that allocations don't start when the page is still used
|
||||
* by older WQEs.
|
||||
*/
|
||||
info->wqe_bulk = max_t(u16, info->wqe_index_mask + 1, wqe_bulk);
|
||||
|
||||
split_factor = DIV_ROUND_UP(MAX_WQE_BULK_BYTES(params->xdp_prog),
|
||||
PP_ALLOC_CACHE_REFILL * PAGE_SIZE);
|
||||
info->refill_unit = DIV_ROUND_UP(info->wqe_bulk, split_factor);
|
||||
}
|
||||
|
||||
#define DEFAULT_FRAG_SIZE (2048)
|
||||
|
||||
static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
|
||||
|
@ -774,11 +816,14 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
|
|||
}
|
||||
|
||||
out:
|
||||
/* Bulking optimization to skip allocation until at least 8 WQEs can be
|
||||
* allocated in a row. At the same time, never start allocation when
|
||||
* the page is still used by older WQEs.
|
||||
/* Bulking optimization to skip allocation until a large enough number
|
||||
* of WQEs can be allocated in a row. Bulking also influences how well
|
||||
* deferred page release works.
|
||||
*/
|
||||
info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8);
|
||||
mlx5e_rx_compute_wqe_bulk_params(params, info);
|
||||
|
||||
mlx5_core_dbg(mdev, "%s: wqe_bulk = %u, wqe_bulk_refill_unit = %u\n",
|
||||
__func__, info->wqe_bulk, info->refill_unit);
|
||||
|
||||
info->log_num_frags = order_base_2(info->num_frags);
|
||||
|
||||
|
|
|
@ -121,9 +121,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
|
|||
|
||||
mlx5e_reset_icosq_cc_pc(icosq);
|
||||
|
||||
mlx5e_free_rx_in_progress_descs(rq);
|
||||
mlx5e_free_rx_missing_descs(rq);
|
||||
if (xskrq)
|
||||
mlx5e_free_rx_in_progress_descs(xskrq);
|
||||
mlx5e_free_rx_missing_descs(xskrq);
|
||||
|
||||
clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
|
||||
mlx5e_activate_icosq(icosq);
|
||||
|
|
|
@ -65,13 +65,11 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
|
|||
int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
|
||||
|
||||
/* RX */
|
||||
void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page);
|
||||
void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle);
|
||||
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq));
|
||||
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq));
|
||||
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
|
||||
void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
|
||||
void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
|
||||
void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq);
|
||||
|
||||
static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
|
||||
{
|
||||
|
@ -489,7 +487,7 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size
|
|||
|
||||
static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
|
||||
{
|
||||
size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
|
||||
size_t isz = struct_size(rq->mpwqe.info, alloc_units.frag_pages, rq->mpwqe.pages_per_wqe);
|
||||
|
||||
return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
|
||||
}
|
||||
|
|
|
@ -209,8 +209,6 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
|
|||
goto xdp_abort;
|
||||
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
|
||||
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
|
||||
if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
|
||||
mlx5e_page_dma_unmap(rq, virt_to_page(xdp->data));
|
||||
rq->stats->xdp_redirect++;
|
||||
return true;
|
||||
default:
|
||||
|
@ -507,7 +505,6 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
|
|||
static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
|
||||
struct mlx5e_xdp_wqe_info *wi,
|
||||
u32 *xsk_frames,
|
||||
bool recycle,
|
||||
struct xdp_frame_bulk *bq)
|
||||
{
|
||||
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
|
||||
|
@ -525,7 +522,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
|
|||
break;
|
||||
case MLX5E_XDP_XMIT_MODE_PAGE:
|
||||
/* XDP_TX from the regular RQ */
|
||||
mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
|
||||
page_pool_put_defragged_page(xdpi.page.rq->page_pool,
|
||||
xdpi.page.page, -1, true);
|
||||
break;
|
||||
case MLX5E_XDP_XMIT_MODE_XSK:
|
||||
/* AF_XDP send */
|
||||
|
@ -579,7 +577,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
|
|||
|
||||
sqcc += wi->num_wqebbs;
|
||||
|
||||
mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq);
|
||||
mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq);
|
||||
} while (!last_wqe);
|
||||
|
||||
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
|
||||
|
@ -626,7 +624,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
|
|||
|
||||
sq->cc += wi->num_wqebbs;
|
||||
|
||||
mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq);
|
||||
mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq);
|
||||
}
|
||||
|
||||
xdp_flush_frame_bulk(&bq);
|
||||
|
|
|
@ -22,6 +22,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
struct mlx5e_icosq *icosq = rq->icosq;
|
||||
struct mlx5_wq_cyc *wq = &icosq->wq;
|
||||
struct mlx5e_umr_wqe *umr_wqe;
|
||||
struct xdp_buff **xsk_buffs;
|
||||
int batch, i;
|
||||
u32 offset; /* 17-bit value with MTT. */
|
||||
u16 pi;
|
||||
|
@ -29,9 +30,9 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
|
||||
goto err;
|
||||
|
||||
BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
|
||||
XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff);
|
||||
batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
|
||||
xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs;
|
||||
batch = xsk_buff_alloc_batch(rq->xsk_pool, xsk_buffs,
|
||||
rq->mpwqe.pages_per_wqe);
|
||||
|
||||
/* If batch < pages_per_wqe, either:
|
||||
|
@ -41,8 +42,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
* the first error, which will mean there are no more valid descriptors.
|
||||
*/
|
||||
for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
|
||||
wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
|
||||
if (unlikely(!wi->alloc_units[batch].xsk))
|
||||
xsk_buffs[batch] = xsk_buff_alloc(rq->xsk_pool);
|
||||
if (unlikely(!xsk_buffs[batch]))
|
||||
goto err_reuse_batch;
|
||||
}
|
||||
|
||||
|
@ -52,8 +53,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
|
||||
if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
|
||||
for (i = 0; i < batch; i++) {
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
|
||||
|
||||
umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
|
||||
.ptag = cpu_to_be64(addr | MLX5_EN_WR),
|
||||
|
@ -62,8 +63,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
}
|
||||
} else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
|
||||
for (i = 0; i < batch; i++) {
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
|
||||
|
||||
umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
|
||||
.key = rq->mkey_be,
|
||||
|
@ -75,8 +76,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
|
||||
|
||||
for (i = 0; i < batch; i++) {
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
|
||||
|
||||
umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
|
||||
.key = rq->mkey_be,
|
||||
|
@ -102,8 +103,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
__be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
|
||||
|
||||
for (i = 0; i < batch; i++) {
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
|
||||
|
||||
umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
|
||||
.key = rq->mkey_be,
|
||||
|
@ -119,7 +120,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
}
|
||||
}
|
||||
|
||||
bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
|
||||
bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
|
||||
wi->consumed_strides = 0;
|
||||
|
||||
umr_wqe->ctrl.opmod_idx_opcode =
|
||||
|
@ -149,7 +150,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
|
||||
err_reuse_batch:
|
||||
while (--batch >= 0)
|
||||
xsk_buff_free(wi->alloc_units[batch].xsk);
|
||||
xsk_buff_free(xsk_buffs[batch]);
|
||||
|
||||
err:
|
||||
rq->stats->buff_alloc_err++;
|
||||
|
@ -163,13 +164,10 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
|
|||
u32 contig, alloc;
|
||||
int i;
|
||||
|
||||
/* mlx5e_init_frags_partition creates a 1:1 mapping between
|
||||
* rq->wqe.frags and rq->wqe.alloc_units, which allows us to
|
||||
* allocate XDP buffers straight into alloc_units.
|
||||
/* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the
|
||||
* rq->wqe.alloc_units->xsk_buffs array allocated here.
|
||||
*/
|
||||
BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
|
||||
sizeof(rq->wqe.alloc_units[0].xsk));
|
||||
buffs = (struct xdp_buff **)rq->wqe.alloc_units;
|
||||
buffs = rq->wqe.alloc_units->xsk_buffs;
|
||||
contig = mlx5_wq_cyc_get_size(wq) - ix;
|
||||
if (wqe_bulk <= contig) {
|
||||
alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
|
||||
|
@ -189,8 +187,9 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
|
|||
/* Assumes log_num_frags == 0. */
|
||||
frag = &rq->wqe.frags[j];
|
||||
|
||||
addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
|
||||
addr = xsk_buff_xdp_get_frame_dma(*frag->xskp);
|
||||
wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
|
||||
frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
|
||||
}
|
||||
|
||||
return alloc;
|
||||
|
@ -211,12 +210,13 @@ int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
|
|||
/* Assumes log_num_frags == 0. */
|
||||
frag = &rq->wqe.frags[j];
|
||||
|
||||
frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
|
||||
if (unlikely(!frag->au->xsk))
|
||||
*frag->xskp = xsk_buff_alloc(rq->xsk_pool);
|
||||
if (unlikely(!*frag->xskp))
|
||||
return i;
|
||||
|
||||
addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
|
||||
addr = xsk_buff_xdp_get_frame_dma(*frag->xskp);
|
||||
wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
|
||||
frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
|
||||
}
|
||||
|
||||
return wqe_bulk;
|
||||
|
@ -251,7 +251,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
|
|||
u32 head_offset,
|
||||
u32 page_idx)
|
||||
{
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[page_idx].xsk);
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units.xsk_buffs[page_idx]);
|
||||
struct bpf_prog *prog;
|
||||
|
||||
/* Check packet size. Note LRO doesn't use linear SKB */
|
||||
|
@ -291,7 +291,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
|
|||
prog = rcu_dereference(rq->xdp_prog);
|
||||
if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
|
||||
if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
|
||||
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
|
||||
__set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
|
||||
return NULL; /* page/packet was consumed by XDP */
|
||||
}
|
||||
|
||||
|
@ -306,7 +306,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
|
|||
struct mlx5_cqe64 *cqe,
|
||||
u32 cqe_bcnt)
|
||||
{
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->au->xsk);
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(*wi->xskp);
|
||||
struct bpf_prog *prog;
|
||||
|
||||
/* wi->offset is not used in this function, because xdp->data and the
|
||||
|
|
|
@ -262,23 +262,30 @@ static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)
|
|||
|
||||
shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL,
|
||||
node);
|
||||
if (!shampo->bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq,
|
||||
sizeof(*shampo->info)),
|
||||
GFP_KERNEL, node);
|
||||
if (!shampo->info) {
|
||||
kvfree(shampo->bitmap);
|
||||
return -ENOMEM;
|
||||
}
|
||||
shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq,
|
||||
sizeof(*shampo->pages)),
|
||||
GFP_KERNEL, node);
|
||||
if (!shampo->bitmap || !shampo->info || !shampo->pages)
|
||||
goto err_nomem;
|
||||
|
||||
return 0;
|
||||
|
||||
err_nomem:
|
||||
kvfree(shampo->info);
|
||||
kvfree(shampo->bitmap);
|
||||
kvfree(shampo->pages);
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
|
||||
{
|
||||
kvfree(rq->mpwqe.shampo->bitmap);
|
||||
kvfree(rq->mpwqe.shampo->info);
|
||||
kvfree(rq->mpwqe.shampo->pages);
|
||||
}
|
||||
|
||||
static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
|
||||
|
@ -286,13 +293,23 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
|
|||
int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
|
||||
size_t alloc_size;
|
||||
|
||||
alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units,
|
||||
alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info,
|
||||
alloc_units.frag_pages,
|
||||
rq->mpwqe.pages_per_wqe));
|
||||
|
||||
rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node);
|
||||
if (!rq->mpwqe.info)
|
||||
return -ENOMEM;
|
||||
|
||||
/* For deferred page release (release right before alloc), make sure
|
||||
* that on first round release is not called.
|
||||
*/
|
||||
for (int i = 0; i < wq_sz; i++) {
|
||||
struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, i);
|
||||
|
||||
bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
|
||||
}
|
||||
|
||||
mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
|
||||
|
||||
return 0;
|
||||
|
@ -499,14 +516,12 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
|
|||
struct mlx5e_wqe_frag_info *prev = NULL;
|
||||
int i;
|
||||
|
||||
if (rq->xsk_pool) {
|
||||
/* Assumptions used by XSK batched allocator. */
|
||||
WARN_ON(rq->wqe.info.num_frags != 1);
|
||||
WARN_ON(rq->wqe.info.log_num_frags != 0);
|
||||
WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE);
|
||||
}
|
||||
WARN_ON(rq->xsk_pool);
|
||||
|
||||
next_frag.au = &rq->wqe.alloc_units[0];
|
||||
next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0];
|
||||
|
||||
/* Skip first release due to deferred release. */
|
||||
next_frag.flags = BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
|
||||
|
||||
for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
|
||||
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
|
||||
|
@ -516,10 +531,11 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
|
|||
|
||||
for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
|
||||
if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
|
||||
next_frag.au++;
|
||||
/* Pages are assigned at runtime. */
|
||||
next_frag.frag_page++;
|
||||
next_frag.offset = 0;
|
||||
if (prev)
|
||||
prev->last_in_page = true;
|
||||
prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE);
|
||||
}
|
||||
*frag = next_frag;
|
||||
|
||||
|
@ -530,25 +546,68 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
|
|||
}
|
||||
|
||||
if (prev)
|
||||
prev->last_in_page = true;
|
||||
prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE);
|
||||
}
|
||||
|
||||
static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node)
|
||||
static void mlx5e_init_xsk_buffs(struct mlx5e_rq *rq)
|
||||
{
|
||||
int len = wq_sz << rq->wqe.info.log_num_frags;
|
||||
int i;
|
||||
|
||||
rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)),
|
||||
GFP_KERNEL, node);
|
||||
if (!rq->wqe.alloc_units)
|
||||
/* Assumptions used by XSK batched allocator. */
|
||||
WARN_ON(rq->wqe.info.num_frags != 1);
|
||||
WARN_ON(rq->wqe.info.log_num_frags != 0);
|
||||
WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE);
|
||||
|
||||
/* Considering the above assumptions a fragment maps to a single
|
||||
* xsk_buff.
|
||||
*/
|
||||
for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
|
||||
rq->wqe.frags[i].xskp = &rq->wqe.alloc_units->xsk_buffs[i];
|
||||
|
||||
/* Skip first release due to deferred release as WQES are
|
||||
* not allocated yet.
|
||||
*/
|
||||
rq->wqe.frags[i].flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
|
||||
}
|
||||
}
|
||||
|
||||
static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node)
|
||||
{
|
||||
int wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
|
||||
int len = wq_sz << rq->wqe.info.log_num_frags;
|
||||
struct mlx5e_wqe_frag_info *frags;
|
||||
union mlx5e_alloc_units *aus;
|
||||
int aus_sz;
|
||||
|
||||
if (rq->xsk_pool)
|
||||
aus_sz = sizeof(*aus->xsk_buffs);
|
||||
else
|
||||
aus_sz = sizeof(*aus->frag_pages);
|
||||
|
||||
aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node);
|
||||
if (!aus)
|
||||
return -ENOMEM;
|
||||
|
||||
mlx5e_init_frags_partition(rq);
|
||||
frags = kvzalloc_node(array_size(len, sizeof(*frags)), GFP_KERNEL, node);
|
||||
if (!frags) {
|
||||
kvfree(aus);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rq->wqe.alloc_units = aus;
|
||||
rq->wqe.frags = frags;
|
||||
|
||||
if (rq->xsk_pool)
|
||||
mlx5e_init_xsk_buffs(rq);
|
||||
else
|
||||
mlx5e_init_frags_partition(rq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mlx5e_free_au_list(struct mlx5e_rq *rq)
|
||||
static void mlx5e_free_wqe_alloc_info(struct mlx5e_rq *rq)
|
||||
{
|
||||
kvfree(rq->wqe.frags);
|
||||
kvfree(rq->wqe.alloc_units);
|
||||
}
|
||||
|
||||
|
@ -693,7 +752,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
|
|||
struct mlx5e_rq_param *rqp,
|
||||
int node, struct mlx5e_rq *rq)
|
||||
{
|
||||
struct page_pool_params pp_params = { 0 };
|
||||
struct mlx5_core_dev *mdev = rq->mdev;
|
||||
void *rqc = rqp->rqc;
|
||||
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
|
||||
|
@ -778,18 +836,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
|
|||
rq->wqe.info = rqp->frags_info;
|
||||
rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
|
||||
|
||||
rq->wqe.frags =
|
||||
kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
|
||||
(wq_sz << rq->wqe.info.log_num_frags)),
|
||||
GFP_KERNEL, node);
|
||||
if (!rq->wqe.frags) {
|
||||
err = -ENOMEM;
|
||||
goto err_rq_wq_destroy;
|
||||
}
|
||||
|
||||
err = mlx5e_init_au_list(rq, wq_sz, node);
|
||||
err = mlx5e_init_wqe_alloc_info(rq, node);
|
||||
if (err)
|
||||
goto err_rq_frags;
|
||||
goto err_rq_wq_destroy;
|
||||
}
|
||||
|
||||
if (xsk) {
|
||||
|
@ -798,12 +847,15 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
|
|||
xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq);
|
||||
} else {
|
||||
/* Create a page_pool and register it with rxq */
|
||||
struct page_pool_params pp_params = { 0 };
|
||||
|
||||
pp_params.order = 0;
|
||||
pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
|
||||
pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | PP_FLAG_PAGE_FRAG;
|
||||
pp_params.pool_size = pool_size;
|
||||
pp_params.nid = node;
|
||||
pp_params.dev = rq->pdev;
|
||||
pp_params.dma_dir = rq->buff.map_dir;
|
||||
pp_params.max_len = PAGE_SIZE;
|
||||
|
||||
/* page_pool can be used even when there is no rq->xdp_prog,
|
||||
* given page_pool does not handle DMA mapping there is no
|
||||
|
@ -869,9 +921,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
|
|||
rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
|
||||
}
|
||||
|
||||
rq->page_cache.head = 0;
|
||||
rq->page_cache.tail = 0;
|
||||
|
||||
return 0;
|
||||
|
||||
err_destroy_page_pool:
|
||||
|
@ -888,9 +937,7 @@ err_rq_drop_page:
|
|||
mlx5e_free_mpwqe_rq_drop_page(rq);
|
||||
break;
|
||||
default: /* MLX5_WQ_TYPE_CYCLIC */
|
||||
mlx5e_free_au_list(rq);
|
||||
err_rq_frags:
|
||||
kvfree(rq->wqe.frags);
|
||||
mlx5e_free_wqe_alloc_info(rq);
|
||||
}
|
||||
err_rq_wq_destroy:
|
||||
mlx5_wq_destroy(&rq->wq_ctrl);
|
||||
|
@ -904,7 +951,6 @@ err_rq_xdp_prog:
|
|||
static void mlx5e_free_rq(struct mlx5e_rq *rq)
|
||||
{
|
||||
struct bpf_prog *old_prog;
|
||||
int i;
|
||||
|
||||
if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
|
||||
old_prog = rcu_dereference_protected(rq->xdp_prog,
|
||||
|
@ -921,17 +967,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
|
|||
mlx5e_rq_free_shampo(rq);
|
||||
break;
|
||||
default: /* MLX5_WQ_TYPE_CYCLIC */
|
||||
kvfree(rq->wqe.frags);
|
||||
mlx5e_free_au_list(rq);
|
||||
}
|
||||
|
||||
for (i = rq->page_cache.head; i != rq->page_cache.tail;
|
||||
i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
|
||||
/* With AF_XDP, page_cache is not used, so this loop is not
|
||||
* entered, and it's safe to call mlx5e_page_release_dynamic
|
||||
* directly.
|
||||
*/
|
||||
mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false);
|
||||
mlx5e_free_wqe_alloc_info(rq);
|
||||
}
|
||||
|
||||
xdp_rxq_info_unreg(&rq->xdp_rxq);
|
||||
|
@ -1094,7 +1130,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
|
|||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
|
||||
void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq)
|
||||
{
|
||||
struct mlx5_wq_ll *wq;
|
||||
u16 head;
|
||||
|
@ -1106,8 +1142,12 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
|
|||
wq = &rq->mpwqe.wq;
|
||||
head = wq->head;
|
||||
|
||||
/* Outstanding UMR WQEs (in progress) start at wq->head */
|
||||
for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
|
||||
/* Release WQEs that are in missing state: they have been
|
||||
* popped from the list after completion but were not freed
|
||||
* due to deferred release.
|
||||
* Also free the linked-list reserved entry, hence the "+ 1".
|
||||
*/
|
||||
for (i = 0; i < mlx5_wq_ll_missing(wq) + 1; i++) {
|
||||
rq->dealloc_wqe(rq, head);
|
||||
head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
|
||||
}
|
||||
|
@ -1134,7 +1174,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
|
|||
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
|
||||
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
|
||||
|
||||
mlx5e_free_rx_in_progress_descs(rq);
|
||||
mlx5e_free_rx_missing_descs(rq);
|
||||
|
||||
while (!mlx5_wq_ll_is_empty(wq)) {
|
||||
struct mlx5e_rx_wqe_ll *wqe;
|
||||
|
@ -1152,12 +1192,21 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
|
|||
0, true);
|
||||
} else {
|
||||
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
|
||||
u16 missing = mlx5_wq_cyc_missing(wq);
|
||||
u16 head = mlx5_wq_cyc_get_head(wq);
|
||||
|
||||
while (!mlx5_wq_cyc_is_empty(wq)) {
|
||||
wqe_ix = mlx5_wq_cyc_get_tail(wq);
|
||||
rq->dealloc_wqe(rq, wqe_ix);
|
||||
mlx5_wq_cyc_pop(wq);
|
||||
}
|
||||
/* Missing slots might also contain unreleased pages due to
|
||||
* deferred release.
|
||||
*/
|
||||
while (missing--) {
|
||||
wqe_ix = mlx5_wq_cyc_ctr2ix(wq, head++);
|
||||
rq->dealloc_wqe(rq, wqe_ix);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -271,98 +271,35 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
|
|||
return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem);
|
||||
}
|
||||
|
||||
static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page)
|
||||
#define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64)
|
||||
|
||||
static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq,
|
||||
struct mlx5e_frag_page *frag_page)
|
||||
{
|
||||
struct mlx5e_page_cache *cache = &rq->page_cache;
|
||||
u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
|
||||
struct mlx5e_rq_stats *stats = rq->stats;
|
||||
struct page *page;
|
||||
|
||||
if (tail_next == cache->head) {
|
||||
stats->cache_full++;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!dev_page_is_reusable(page)) {
|
||||
stats->cache_waive++;
|
||||
return false;
|
||||
}
|
||||
|
||||
cache->page_cache[cache->tail] = page;
|
||||
cache->tail = tail_next;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
|
||||
{
|
||||
struct mlx5e_page_cache *cache = &rq->page_cache;
|
||||
struct mlx5e_rq_stats *stats = rq->stats;
|
||||
dma_addr_t addr;
|
||||
|
||||
if (unlikely(cache->head == cache->tail)) {
|
||||
stats->cache_empty++;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (page_ref_count(cache->page_cache[cache->head]) != 1) {
|
||||
stats->cache_busy++;
|
||||
return false;
|
||||
}
|
||||
|
||||
au->page = cache->page_cache[cache->head];
|
||||
cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
|
||||
stats->cache_reuse++;
|
||||
|
||||
addr = page_pool_get_dma_addr(au->page);
|
||||
/* Non-XSK always uses PAGE_SIZE. */
|
||||
dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au)
|
||||
{
|
||||
dma_addr_t addr;
|
||||
|
||||
if (mlx5e_rx_cache_get(rq, au))
|
||||
return 0;
|
||||
|
||||
au->page = page_pool_dev_alloc_pages(rq->page_pool);
|
||||
if (unlikely(!au->page))
|
||||
page = page_pool_dev_alloc_pages(rq->page_pool);
|
||||
if (unlikely(!page))
|
||||
return -ENOMEM;
|
||||
|
||||
/* Non-XSK always uses PAGE_SIZE. */
|
||||
addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir);
|
||||
if (unlikely(dma_mapping_error(rq->pdev, addr))) {
|
||||
page_pool_recycle_direct(rq->page_pool, au->page);
|
||||
au->page = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
page_pool_set_dma_addr(au->page, addr);
|
||||
page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX);
|
||||
|
||||
*frag_page = (struct mlx5e_frag_page) {
|
||||
.page = page,
|
||||
.frags = 0,
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page)
|
||||
static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq,
|
||||
struct mlx5e_frag_page *frag_page)
|
||||
{
|
||||
dma_addr_t dma_addr = page_pool_get_dma_addr(page);
|
||||
u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags;
|
||||
struct page *page = frag_page->page;
|
||||
|
||||
dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir,
|
||||
DMA_ATTR_SKIP_CPU_SYNC);
|
||||
page_pool_set_dma_addr(page, 0);
|
||||
}
|
||||
|
||||
void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle)
|
||||
{
|
||||
if (likely(recycle)) {
|
||||
if (mlx5e_rx_cache_put(rq, page))
|
||||
return;
|
||||
|
||||
mlx5e_page_dma_unmap(rq, page);
|
||||
page_pool_recycle_direct(rq->page_pool, page);
|
||||
} else {
|
||||
mlx5e_page_dma_unmap(rq, page);
|
||||
page_pool_release_page(rq->page_pool, page);
|
||||
put_page(page);
|
||||
}
|
||||
if (page_pool_defrag_page(page, drain_count) == 0)
|
||||
page_pool_put_defragged_page(rq->page_pool, page, -1, true);
|
||||
}
|
||||
|
||||
static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
|
||||
|
@ -371,22 +308,31 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
|
|||
int err = 0;
|
||||
|
||||
if (!frag->offset)
|
||||
/* On first frag (offset == 0), replenish page (alloc_unit actually).
|
||||
* Other frags that point to the same alloc_unit (with a different
|
||||
/* On first frag (offset == 0), replenish page.
|
||||
* Other frags that point to the same page (with a different
|
||||
* offset) should just use the new one without replenishing again
|
||||
* by themselves.
|
||||
*/
|
||||
err = mlx5e_page_alloc_pool(rq, frag->au);
|
||||
err = mlx5e_page_alloc_fragmented(rq, frag->frag_page);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
|
||||
struct mlx5e_wqe_frag_info *frag,
|
||||
bool recycle)
|
||||
static bool mlx5e_frag_can_release(struct mlx5e_wqe_frag_info *frag)
|
||||
{
|
||||
if (frag->last_in_page)
|
||||
mlx5e_page_release_dynamic(rq, frag->au->page, recycle);
|
||||
#define CAN_RELEASE_MASK \
|
||||
(BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE) | BIT(MLX5E_WQE_FRAG_SKIP_RELEASE))
|
||||
|
||||
#define CAN_RELEASE_VALUE BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE)
|
||||
|
||||
return (frag->flags & CAN_RELEASE_MASK) == CAN_RELEASE_VALUE;
|
||||
}
|
||||
|
||||
static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
|
||||
struct mlx5e_wqe_frag_info *frag)
|
||||
{
|
||||
if (mlx5e_frag_can_release(frag))
|
||||
mlx5e_page_release_fragmented(rq, frag->frag_page);
|
||||
}
|
||||
|
||||
static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
|
||||
|
@ -409,8 +355,10 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
|
|||
if (unlikely(err))
|
||||
goto free_frags;
|
||||
|
||||
frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
|
||||
|
||||
headroom = i == 0 ? rq->buff.headroom : 0;
|
||||
addr = page_pool_get_dma_addr(frag->au->page);
|
||||
addr = page_pool_get_dma_addr(frag->frag_page->page);
|
||||
wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom);
|
||||
}
|
||||
|
||||
|
@ -418,35 +366,66 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
|
|||
|
||||
free_frags:
|
||||
while (--i >= 0)
|
||||
mlx5e_put_rx_frag(rq, --frag, true);
|
||||
mlx5e_put_rx_frag(rq, --frag);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
|
||||
struct mlx5e_wqe_frag_info *wi,
|
||||
bool recycle)
|
||||
struct mlx5e_wqe_frag_info *wi)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (rq->xsk_pool) {
|
||||
/* The `recycle` parameter is ignored, and the page is always
|
||||
* put into the Reuse Ring, because there is no way to return
|
||||
* the page to the userspace when the interface goes down.
|
||||
*/
|
||||
xsk_buff_free(wi->au->xsk);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
|
||||
mlx5e_put_rx_frag(rq, wi, recycle);
|
||||
mlx5e_put_rx_frag(rq, wi);
|
||||
}
|
||||
|
||||
static void mlx5e_xsk_free_rx_wqe(struct mlx5e_wqe_frag_info *wi)
|
||||
{
|
||||
if (!(wi->flags & BIT(MLX5E_WQE_FRAG_SKIP_RELEASE)))
|
||||
xsk_buff_free(*wi->xskp);
|
||||
}
|
||||
|
||||
static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
|
||||
{
|
||||
struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
|
||||
|
||||
mlx5e_free_rx_wqe(rq, wi, false);
|
||||
if (rq->xsk_pool)
|
||||
mlx5e_xsk_free_rx_wqe(wi);
|
||||
else
|
||||
mlx5e_free_rx_wqe(rq, wi);
|
||||
}
|
||||
|
||||
static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
|
||||
{
|
||||
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < wqe_bulk; i++) {
|
||||
int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
|
||||
struct mlx5e_wqe_frag_info *wi;
|
||||
|
||||
wi = get_frag(rq, j);
|
||||
/* The page is always put into the Reuse Ring, because there
|
||||
* is no way to return the page to the userspace when the
|
||||
* interface goes down.
|
||||
*/
|
||||
mlx5e_xsk_free_rx_wqe(wi);
|
||||
}
|
||||
}
|
||||
|
||||
static void mlx5e_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
|
||||
{
|
||||
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < wqe_bulk; i++) {
|
||||
int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
|
||||
struct mlx5e_wqe_frag_info *wi;
|
||||
|
||||
wi = get_frag(rq, j);
|
||||
mlx5e_free_rx_wqe(rq, wi);
|
||||
}
|
||||
}
|
||||
|
||||
static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
|
||||
|
@ -467,18 +446,42 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
|
|||
return i;
|
||||
}
|
||||
|
||||
static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
|
||||
{
|
||||
int remaining = wqe_bulk;
|
||||
int i = 0;
|
||||
|
||||
/* The WQE bulk is split into smaller bulks that are sized
|
||||
* according to the page pool cache refill size to avoid overflowing
|
||||
* the page pool cache due to too many page releases at once.
|
||||
*/
|
||||
do {
|
||||
int refill = min_t(u16, rq->wqe.info.refill_unit, remaining);
|
||||
int alloc_count;
|
||||
|
||||
mlx5e_free_rx_wqes(rq, ix + i, refill);
|
||||
alloc_count = mlx5e_alloc_rx_wqes(rq, ix + i, refill);
|
||||
i += alloc_count;
|
||||
if (unlikely(alloc_count != refill))
|
||||
break;
|
||||
|
||||
remaining -= refill;
|
||||
} while (remaining);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline void
|
||||
mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
|
||||
union mlx5e_alloc_unit *au, u32 frag_offset, u32 len,
|
||||
struct page *page, u32 frag_offset, u32 len,
|
||||
unsigned int truesize)
|
||||
{
|
||||
dma_addr_t addr = page_pool_get_dma_addr(au->page);
|
||||
dma_addr_t addr = page_pool_get_dma_addr(page);
|
||||
|
||||
dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len,
|
||||
rq->buff.map_dir);
|
||||
page_ref_inc(au->page);
|
||||
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
|
||||
au->page, frag_offset, len, truesize);
|
||||
page, frag_offset, len, truesize);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -496,30 +499,36 @@ mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb,
|
|||
}
|
||||
|
||||
static void
|
||||
mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
|
||||
mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
|
||||
{
|
||||
union mlx5e_alloc_unit *alloc_units = wi->alloc_units;
|
||||
bool no_xdp_xmit;
|
||||
int i;
|
||||
|
||||
/* A common case for AF_XDP. */
|
||||
if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe))
|
||||
if (bitmap_full(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe))
|
||||
return;
|
||||
|
||||
no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
|
||||
no_xdp_xmit = bitmap_empty(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
|
||||
|
||||
if (rq->xsk_pool) {
|
||||
/* The `recycle` parameter is ignored, and the page is always
|
||||
* put into the Reuse Ring, because there is no way to return
|
||||
* the page to the userspace when the interface goes down.
|
||||
struct xdp_buff **xsk_buffs = wi->alloc_units.xsk_buffs;
|
||||
|
||||
/* The page is always put into the Reuse Ring, because there
|
||||
* is no way to return the page to userspace when the interface
|
||||
* goes down.
|
||||
*/
|
||||
for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
|
||||
if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
|
||||
xsk_buff_free(alloc_units[i].xsk);
|
||||
if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap))
|
||||
xsk_buff_free(xsk_buffs[i]);
|
||||
} else {
|
||||
for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
|
||||
if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
|
||||
mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle);
|
||||
for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) {
|
||||
if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) {
|
||||
struct mlx5e_frag_page *frag_page;
|
||||
|
||||
frag_page = &wi->alloc_units.frag_pages[i];
|
||||
mlx5e_page_release_fragmented(rq, frag_page);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -583,7 +592,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
|
|||
struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
|
||||
u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
|
||||
u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey;
|
||||
struct page *page = shampo->last_page;
|
||||
u16 page_index = shampo->curr_page_index;
|
||||
struct mlx5e_frag_page *frag_page;
|
||||
u64 addr = shampo->last_addr;
|
||||
struct mlx5e_dma_info *dma_info;
|
||||
struct mlx5e_umr_wqe *umr_wqe;
|
||||
|
@ -597,6 +607,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
|
|||
umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
|
||||
build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
|
||||
|
||||
frag_page = &shampo->pages[page_index];
|
||||
|
||||
for (i = 0; i < entries; i++, index++) {
|
||||
dma_info = &shampo->info[index];
|
||||
if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
|
||||
|
@ -605,16 +617,20 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
|
|||
header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
|
||||
MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
|
||||
if (!(header_offset & (PAGE_SIZE - 1))) {
|
||||
union mlx5e_alloc_unit au;
|
||||
page_index = (page_index + 1) & (shampo->hd_per_wq - 1);
|
||||
frag_page = &shampo->pages[page_index];
|
||||
|
||||
err = mlx5e_page_alloc_pool(rq, &au);
|
||||
err = mlx5e_page_alloc_fragmented(rq, frag_page);
|
||||
if (unlikely(err))
|
||||
goto err_unmap;
|
||||
page = dma_info->page = au.page;
|
||||
addr = dma_info->addr = page_pool_get_dma_addr(au.page);
|
||||
|
||||
addr = page_pool_get_dma_addr(frag_page->page);
|
||||
|
||||
dma_info->addr = addr;
|
||||
dma_info->frag_page = frag_page;
|
||||
} else {
|
||||
dma_info->addr = addr + header_offset;
|
||||
dma_info->page = page;
|
||||
dma_info->frag_page = frag_page;
|
||||
}
|
||||
|
||||
update_klm:
|
||||
|
@ -632,7 +648,7 @@ update_klm:
|
|||
};
|
||||
|
||||
shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1);
|
||||
shampo->last_page = page;
|
||||
shampo->curr_page_index = page_index;
|
||||
shampo->last_addr = addr;
|
||||
sq->pc += wqe_bbs;
|
||||
sq->doorbell_cseg = &umr_wqe->ctrl;
|
||||
|
@ -644,7 +660,7 @@ err_unmap:
|
|||
dma_info = &shampo->info[--index];
|
||||
if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
|
||||
dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
|
||||
mlx5e_page_release_dynamic(rq, dma_info->page, true);
|
||||
mlx5e_page_release_fragmented(rq, dma_info->frag_page);
|
||||
}
|
||||
}
|
||||
rq->stats->buff_alloc_err++;
|
||||
|
@ -693,8 +709,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
|
|||
static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
||||
{
|
||||
struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
|
||||
union mlx5e_alloc_unit *au = &wi->alloc_units[0];
|
||||
struct mlx5e_icosq *sq = rq->icosq;
|
||||
struct mlx5e_frag_page *frag_page;
|
||||
struct mlx5_wq_cyc *wq = &sq->wq;
|
||||
struct mlx5e_umr_wqe *umr_wqe;
|
||||
u32 offset; /* 17-bit value with MTT. */
|
||||
|
@ -712,13 +728,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
|
||||
memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
|
||||
|
||||
for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) {
|
||||
frag_page = &wi->alloc_units.frag_pages[0];
|
||||
|
||||
for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) {
|
||||
dma_addr_t addr;
|
||||
|
||||
err = mlx5e_page_alloc_pool(rq, au);
|
||||
err = mlx5e_page_alloc_fragmented(rq, frag_page);
|
||||
if (unlikely(err))
|
||||
goto err_unmap;
|
||||
addr = page_pool_get_dma_addr(au->page);
|
||||
addr = page_pool_get_dma_addr(frag_page->page);
|
||||
umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
|
||||
.ptag = cpu_to_be64(addr | MLX5_EN_WR),
|
||||
};
|
||||
|
@ -735,7 +753,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
sizeof(*umr_wqe->inline_mtts) * pad);
|
||||
}
|
||||
|
||||
bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
|
||||
bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
|
||||
wi->consumed_strides = 0;
|
||||
|
||||
umr_wqe->ctrl.opmod_idx_opcode =
|
||||
|
@ -759,8 +777,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
|||
|
||||
err_unmap:
|
||||
while (--i >= 0) {
|
||||
au--;
|
||||
mlx5e_page_release_dynamic(rq, au->page, true);
|
||||
frag_page--;
|
||||
mlx5e_page_release_fragmented(rq, frag_page);
|
||||
}
|
||||
|
||||
err:
|
||||
|
@ -778,8 +796,8 @@ err:
|
|||
void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close)
|
||||
{
|
||||
struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
|
||||
struct mlx5e_frag_page *deleted_page = NULL;
|
||||
int hd_per_wq = shampo->hd_per_wq;
|
||||
struct page *deleted_page = NULL;
|
||||
struct mlx5e_dma_info *hd_info;
|
||||
int i, index = start;
|
||||
|
||||
|
@ -792,10 +810,12 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close
|
|||
|
||||
hd_info = &shampo->info[index];
|
||||
hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE);
|
||||
if (hd_info->page != deleted_page) {
|
||||
deleted_page = hd_info->page;
|
||||
mlx5e_page_release_dynamic(rq, hd_info->page, false);
|
||||
if (hd_info->frag_page && hd_info->frag_page != deleted_page) {
|
||||
deleted_page = hd_info->frag_page;
|
||||
mlx5e_page_release_fragmented(rq, hd_info->frag_page);
|
||||
}
|
||||
|
||||
hd_info->frag_page = NULL;
|
||||
}
|
||||
|
||||
if (start + len > hd_per_wq) {
|
||||
|
@ -810,8 +830,8 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close
|
|||
static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
||||
{
|
||||
struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
|
||||
/* Don't recycle, this function is called on rq/netdev close */
|
||||
mlx5e_free_rx_mpwqe(rq, wi, false);
|
||||
/* This function is called on rq/netdev close. */
|
||||
mlx5e_free_rx_mpwqe(rq, wi);
|
||||
}
|
||||
|
||||
INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
|
||||
|
@ -838,17 +858,20 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
|
|||
*/
|
||||
wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask;
|
||||
|
||||
if (!rq->xsk_pool)
|
||||
count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
|
||||
else if (likely(!rq->xsk_pool->dma_need_sync))
|
||||
if (!rq->xsk_pool) {
|
||||
count = mlx5e_refill_rx_wqes(rq, head, wqe_bulk);
|
||||
} else if (likely(!rq->xsk_pool->dma_need_sync)) {
|
||||
mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk);
|
||||
count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk);
|
||||
else
|
||||
} else {
|
||||
mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk);
|
||||
/* If dma_need_sync is true, it's more efficient to call
|
||||
* xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch,
|
||||
* because the latter does the same check and returns only one
|
||||
* frame.
|
||||
*/
|
||||
count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk);
|
||||
}
|
||||
|
||||
mlx5_wq_cyc_push_n(wq, count);
|
||||
if (unlikely(count != wqe_bulk)) {
|
||||
|
@ -1029,6 +1052,11 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
|
|||
head = rq->mpwqe.actual_wq_head;
|
||||
i = missing;
|
||||
do {
|
||||
struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, head);
|
||||
|
||||
/* Deferred free for better page pool cache usage. */
|
||||
mlx5e_free_rx_mpwqe(rq, wi);
|
||||
|
||||
alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) :
|
||||
mlx5e_alloc_rx_mpwqe(rq, head);
|
||||
|
||||
|
@ -1133,7 +1161,7 @@ static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
|
|||
struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index];
|
||||
u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom;
|
||||
|
||||
return page_address(last_head->page) + head_offset;
|
||||
return page_address(last_head->frag_page->page) + head_offset;
|
||||
}
|
||||
|
||||
static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4)
|
||||
|
@ -1586,7 +1614,7 @@ static struct sk_buff *
|
|||
mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
|
||||
struct mlx5_cqe64 *cqe, u32 cqe_bcnt)
|
||||
{
|
||||
union mlx5e_alloc_unit *au = wi->au;
|
||||
struct mlx5e_frag_page *frag_page = wi->frag_page;
|
||||
u16 rx_headroom = rq->buff.headroom;
|
||||
struct bpf_prog *prog;
|
||||
struct sk_buff *skb;
|
||||
|
@ -1595,11 +1623,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
|
|||
dma_addr_t addr;
|
||||
u32 frag_size;
|
||||
|
||||
va = page_address(au->page) + wi->offset;
|
||||
va = page_address(frag_page->page) + wi->offset;
|
||||
data = va + rx_headroom;
|
||||
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
|
||||
|
||||
addr = page_pool_get_dma_addr(au->page);
|
||||
addr = page_pool_get_dma_addr(frag_page->page);
|
||||
dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
|
||||
frag_size, rq->buff.map_dir);
|
||||
net_prefetch(data);
|
||||
|
@ -1623,7 +1651,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
|
|||
return NULL;
|
||||
|
||||
/* queue up for recycling/reuse */
|
||||
page_ref_inc(au->page);
|
||||
skb_mark_for_recycle(skb);
|
||||
frag_page->frags++;
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
@ -1634,8 +1663,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
|||
{
|
||||
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
|
||||
struct mlx5e_wqe_frag_info *head_wi = wi;
|
||||
union mlx5e_alloc_unit *au = wi->au;
|
||||
u16 rx_headroom = rq->buff.headroom;
|
||||
struct mlx5e_frag_page *frag_page;
|
||||
struct skb_shared_info *sinfo;
|
||||
struct mlx5e_xdp_buff mxbuf;
|
||||
u32 frag_consumed_bytes;
|
||||
|
@ -1645,10 +1674,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
|||
u32 truesize;
|
||||
void *va;
|
||||
|
||||
va = page_address(au->page) + wi->offset;
|
||||
frag_page = wi->frag_page;
|
||||
|
||||
va = page_address(frag_page->page) + wi->offset;
|
||||
frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
|
||||
|
||||
addr = page_pool_get_dma_addr(au->page);
|
||||
addr = page_pool_get_dma_addr(frag_page->page);
|
||||
dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
|
||||
rq->buff.frame0_sz, rq->buff.map_dir);
|
||||
net_prefetchw(va); /* xdp_frame data area */
|
||||
|
@ -1665,11 +1696,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
|||
while (cqe_bcnt) {
|
||||
skb_frag_t *frag;
|
||||
|
||||
au = wi->au;
|
||||
frag_page = wi->frag_page;
|
||||
|
||||
frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
|
||||
|
||||
addr = page_pool_get_dma_addr(au->page);
|
||||
addr = page_pool_get_dma_addr(frag_page->page);
|
||||
dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
|
||||
frag_consumed_bytes, rq->buff.map_dir);
|
||||
|
||||
|
@ -1683,11 +1714,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
|||
}
|
||||
|
||||
frag = &sinfo->frags[sinfo->nr_frags++];
|
||||
__skb_frag_set_page(frag, au->page);
|
||||
|
||||
__skb_frag_set_page(frag, frag_page->page);
|
||||
skb_frag_off_set(frag, wi->offset);
|
||||
skb_frag_size_set(frag, frag_consumed_bytes);
|
||||
|
||||
if (page_is_pfmemalloc(au->page))
|
||||
if (page_is_pfmemalloc(frag_page->page))
|
||||
xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp);
|
||||
|
||||
sinfo->xdp_frags_size += frag_consumed_bytes;
|
||||
|
@ -1704,7 +1736,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
|||
int i;
|
||||
|
||||
for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++)
|
||||
mlx5e_put_rx_frag(rq, &head_wi[i], true);
|
||||
mlx5e_put_rx_frag(rq, &head_wi[i]);
|
||||
}
|
||||
return NULL; /* page/packet was consumed by XDP */
|
||||
}
|
||||
|
@ -1716,21 +1748,17 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
|||
if (unlikely(!skb))
|
||||
return NULL;
|
||||
|
||||
page_ref_inc(head_wi->au->page);
|
||||
skb_mark_for_recycle(skb);
|
||||
head_wi->frag_page->frags++;
|
||||
|
||||
if (xdp_buff_has_frags(&mxbuf.xdp)) {
|
||||
int i;
|
||||
|
||||
/* sinfo->nr_frags is reset by build_skb, calculate again. */
|
||||
xdp_update_skb_shared_info(skb, wi - head_wi - 1,
|
||||
sinfo->xdp_frags_size, truesize,
|
||||
xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
|
||||
|
||||
for (i = 0; i < sinfo->nr_frags; i++) {
|
||||
skb_frag_t *frag = &sinfo->frags[i];
|
||||
|
||||
page_ref_inc(skb_frag_page(frag));
|
||||
}
|
||||
for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++)
|
||||
pwi->frag_page->frags++;
|
||||
}
|
||||
|
||||
return skb;
|
||||
|
@ -1768,7 +1796,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
|||
|
||||
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
|
||||
mlx5e_handle_rx_err_cqe(rq, cqe);
|
||||
goto free_wqe;
|
||||
goto wq_cyc_pop;
|
||||
}
|
||||
|
||||
skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe,
|
||||
|
@ -1782,9 +1810,9 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
|||
/* do not return page to cache,
|
||||
* it will be returned on XDP_TX completion.
|
||||
*/
|
||||
goto wq_cyc_pop;
|
||||
wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
|
||||
}
|
||||
goto free_wqe;
|
||||
goto wq_cyc_pop;
|
||||
}
|
||||
|
||||
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
|
||||
|
@ -1792,13 +1820,11 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
|||
if (mlx5e_cqe_regb_chain(cqe))
|
||||
if (!mlx5e_tc_update_skb_nic(cqe, skb)) {
|
||||
dev_kfree_skb_any(skb);
|
||||
goto free_wqe;
|
||||
goto wq_cyc_pop;
|
||||
}
|
||||
|
||||
napi_gro_receive(rq->cq.napi, skb);
|
||||
|
||||
free_wqe:
|
||||
mlx5e_free_rx_wqe(rq, wi, true);
|
||||
wq_cyc_pop:
|
||||
mlx5_wq_cyc_pop(wq);
|
||||
}
|
||||
|
@ -1822,7 +1848,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
|||
|
||||
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
|
||||
mlx5e_handle_rx_err_cqe(rq, cqe);
|
||||
goto free_wqe;
|
||||
goto wq_cyc_pop;
|
||||
}
|
||||
|
||||
skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
|
||||
|
@ -1835,9 +1861,9 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
|||
/* do not return page to cache,
|
||||
* it will be returned on XDP_TX completion.
|
||||
*/
|
||||
goto wq_cyc_pop;
|
||||
wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
|
||||
}
|
||||
goto free_wqe;
|
||||
goto wq_cyc_pop;
|
||||
}
|
||||
|
||||
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
|
||||
|
@ -1847,8 +1873,6 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
|||
|
||||
mlx5e_rep_tc_receive(cqe, rq, skb);
|
||||
|
||||
free_wqe:
|
||||
mlx5e_free_rx_wqe(rq, wi, true);
|
||||
wq_cyc_pop:
|
||||
mlx5_wq_cyc_pop(wq);
|
||||
}
|
||||
|
@ -1901,7 +1925,6 @@ mpwrq_cqe_out:
|
|||
|
||||
wq = &rq->mpwqe.wq;
|
||||
wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
|
||||
mlx5e_free_rx_mpwqe(rq, wi, true);
|
||||
mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
|
||||
}
|
||||
|
||||
|
@ -1913,7 +1936,8 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = {
|
|||
|
||||
static void
|
||||
mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
|
||||
union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset)
|
||||
struct mlx5e_frag_page *frag_page,
|
||||
u32 data_bcnt, u32 data_offset)
|
||||
{
|
||||
net_prefetchw(skb->data);
|
||||
|
||||
|
@ -1927,12 +1951,13 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
|
|||
else
|
||||
truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
|
||||
|
||||
mlx5e_add_skb_frag(rq, skb, au, data_offset,
|
||||
frag_page->frags++;
|
||||
mlx5e_add_skb_frag(rq, skb, frag_page->page, data_offset,
|
||||
pg_consumed_bytes, truesize);
|
||||
|
||||
data_bcnt -= pg_consumed_bytes;
|
||||
data_offset = 0;
|
||||
au++;
|
||||
frag_page++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1941,11 +1966,11 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
|
|||
struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset,
|
||||
u32 page_idx)
|
||||
{
|
||||
union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
|
||||
struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx];
|
||||
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
|
||||
struct mlx5e_frag_page *head_page = frag_page;
|
||||
u32 frag_offset = head_offset + headlen;
|
||||
u32 byte_cnt = cqe_bcnt - headlen;
|
||||
union mlx5e_alloc_unit *head_au = au;
|
||||
struct sk_buff *skb;
|
||||
dma_addr_t addr;
|
||||
|
||||
|
@ -1960,14 +1985,15 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
|
|||
|
||||
/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
|
||||
if (unlikely(frag_offset >= PAGE_SIZE)) {
|
||||
au++;
|
||||
frag_page++;
|
||||
frag_offset -= PAGE_SIZE;
|
||||
}
|
||||
|
||||
mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset);
|
||||
skb_mark_for_recycle(skb);
|
||||
mlx5e_fill_skb_data(skb, rq, frag_page, byte_cnt, frag_offset);
|
||||
/* copy header */
|
||||
addr = page_pool_get_dma_addr(head_au->page);
|
||||
mlx5e_copy_skb_header(rq, skb, head_au->page, addr,
|
||||
addr = page_pool_get_dma_addr(head_page->page);
|
||||
mlx5e_copy_skb_header(rq, skb, head_page->page, addr,
|
||||
head_offset, head_offset, headlen);
|
||||
/* skb linear part was allocated with headlen and aligned to long */
|
||||
skb->tail += headlen;
|
||||
|
@ -1981,7 +2007,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
|||
struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset,
|
||||
u32 page_idx)
|
||||
{
|
||||
union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
|
||||
struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx];
|
||||
u16 rx_headroom = rq->buff.headroom;
|
||||
struct bpf_prog *prog;
|
||||
struct sk_buff *skb;
|
||||
|
@ -1996,11 +2022,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
va = page_address(au->page) + head_offset;
|
||||
va = page_address(frag_page->page) + head_offset;
|
||||
data = va + rx_headroom;
|
||||
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
|
||||
|
||||
addr = page_pool_get_dma_addr(au->page);
|
||||
addr = page_pool_get_dma_addr(frag_page->page);
|
||||
dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset,
|
||||
frag_size, rq->buff.map_dir);
|
||||
net_prefetch(data);
|
||||
|
@ -2013,7 +2039,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
|||
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
|
||||
if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
|
||||
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
|
||||
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
|
||||
__set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
|
||||
return NULL; /* page/packet was consumed by XDP */
|
||||
}
|
||||
|
||||
|
@ -2027,7 +2053,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
|||
return NULL;
|
||||
|
||||
/* queue up for recycling/reuse */
|
||||
page_ref_inc(au->page);
|
||||
skb_mark_for_recycle(skb);
|
||||
frag_page->frags++;
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
@ -2044,7 +2071,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
|||
void *hdr, *data;
|
||||
u32 frag_size;
|
||||
|
||||
hdr = page_address(head->page) + head_offset;
|
||||
hdr = page_address(head->frag_page->page) + head_offset;
|
||||
data = hdr + rx_headroom;
|
||||
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size);
|
||||
|
||||
|
@ -2058,9 +2085,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
|||
if (unlikely(!skb))
|
||||
return NULL;
|
||||
|
||||
/* queue up for recycling/reuse */
|
||||
page_ref_inc(head->page);
|
||||
|
||||
head->frag_page->frags++;
|
||||
} else {
|
||||
/* allocate SKB and copy header for large header */
|
||||
rq->stats->gro_large_hds++;
|
||||
|
@ -2072,13 +2097,17 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
|||
}
|
||||
|
||||
prefetchw(skb->data);
|
||||
mlx5e_copy_skb_header(rq, skb, head->page, head->addr,
|
||||
mlx5e_copy_skb_header(rq, skb, head->frag_page->page, head->addr,
|
||||
head_offset + rx_headroom,
|
||||
rx_headroom, head_size);
|
||||
/* skb linear part was allocated with headlen and aligned to long */
|
||||
skb->tail += head_size;
|
||||
skb->len += head_size;
|
||||
}
|
||||
|
||||
/* queue up for recycling/reuse */
|
||||
skb_mark_for_recycle(skb);
|
||||
|
||||
return skb;
|
||||
}
|
||||
|
||||
|
@ -2123,8 +2152,10 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
|
|||
u64 addr = shampo->info[header_index].addr;
|
||||
|
||||
if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
|
||||
shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE);
|
||||
mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true);
|
||||
struct mlx5e_dma_info *dma_info = &shampo->info[header_index];
|
||||
|
||||
dma_info->addr = ALIGN_DOWN(addr, PAGE_SIZE);
|
||||
mlx5e_page_release_fragmented(rq, dma_info->frag_page);
|
||||
}
|
||||
bitmap_clear(shampo->bitmap, header_index, 1);
|
||||
}
|
||||
|
@ -2145,7 +2176,6 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
|
|||
bool match = cqe->shampo.match;
|
||||
struct mlx5e_rq_stats *stats = rq->stats;
|
||||
struct mlx5e_rx_wqe_ll *wqe;
|
||||
union mlx5e_alloc_unit *au;
|
||||
struct mlx5e_mpw_info *wi;
|
||||
struct mlx5_wq_ll *wq;
|
||||
|
||||
|
@ -2195,8 +2225,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
|
|||
}
|
||||
|
||||
if (likely(head_size)) {
|
||||
au = &wi->alloc_units[page_idx];
|
||||
mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset);
|
||||
struct mlx5e_frag_page *frag_page;
|
||||
|
||||
frag_page = &wi->alloc_units.frag_pages[page_idx];
|
||||
mlx5e_fill_skb_data(*skb, rq, frag_page, data_bcnt, data_offset);
|
||||
}
|
||||
|
||||
mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
|
||||
|
@ -2210,7 +2242,6 @@ mpwrq_cqe_out:
|
|||
|
||||
wq = &rq->mpwqe.wq;
|
||||
wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
|
||||
mlx5e_free_rx_mpwqe(rq, wi, true);
|
||||
mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
|
||||
}
|
||||
|
||||
|
@ -2270,7 +2301,6 @@ mpwrq_cqe_out:
|
|||
|
||||
wq = &rq->mpwqe.wq;
|
||||
wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
|
||||
mlx5e_free_rx_mpwqe(rq, wi, true);
|
||||
mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
|
||||
}
|
||||
|
||||
|
@ -2489,7 +2519,7 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
|||
|
||||
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
|
||||
rq->stats->wqe_err++;
|
||||
goto wq_free_wqe;
|
||||
goto wq_cyc_pop;
|
||||
}
|
||||
|
||||
skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
|
||||
|
@ -2497,17 +2527,16 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
|||
mlx5e_skb_from_cqe_nonlinear,
|
||||
rq, wi, cqe, cqe_bcnt);
|
||||
if (!skb)
|
||||
goto wq_free_wqe;
|
||||
goto wq_cyc_pop;
|
||||
|
||||
mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
|
||||
if (unlikely(!skb->dev)) {
|
||||
dev_kfree_skb_any(skb);
|
||||
goto wq_free_wqe;
|
||||
goto wq_cyc_pop;
|
||||
}
|
||||
napi_gro_receive(rq->cq.napi, skb);
|
||||
|
||||
wq_free_wqe:
|
||||
mlx5e_free_rx_wqe(rq, wi, true);
|
||||
wq_cyc_pop:
|
||||
mlx5_wq_cyc_pop(wq);
|
||||
}
|
||||
|
||||
|
@ -2582,12 +2611,12 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe
|
|||
|
||||
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
|
||||
rq->stats->wqe_err++;
|
||||
goto free_wqe;
|
||||
goto wq_cyc_pop;
|
||||
}
|
||||
|
||||
skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe, cqe_bcnt);
|
||||
if (!skb)
|
||||
goto free_wqe;
|
||||
goto wq_cyc_pop;
|
||||
|
||||
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
|
||||
skb_push(skb, ETH_HLEN);
|
||||
|
@ -2596,8 +2625,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe
|
|||
rq->netdev->devlink_port);
|
||||
dev_kfree_skb_any(skb);
|
||||
|
||||
free_wqe:
|
||||
mlx5e_free_rx_wqe(rq, wi, false);
|
||||
wq_cyc_pop:
|
||||
mlx5_wq_cyc_pop(wq);
|
||||
}
|
||||
|
||||
|
|
|
@ -179,11 +179,6 @@ static const struct counter_desc sw_stats_desc[] = {
|
|||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
|
||||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
|
||||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
|
||||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
|
||||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
|
||||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
|
||||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
|
||||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
|
||||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
|
||||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
|
||||
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
|
||||
|
@ -358,11 +353,6 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s,
|
|||
s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
|
||||
s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
|
||||
s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
|
||||
s->rx_cache_reuse += rq_stats->cache_reuse;
|
||||
s->rx_cache_full += rq_stats->cache_full;
|
||||
s->rx_cache_empty += rq_stats->cache_empty;
|
||||
s->rx_cache_busy += rq_stats->cache_busy;
|
||||
s->rx_cache_waive += rq_stats->cache_waive;
|
||||
s->rx_congst_umr += rq_stats->congst_umr;
|
||||
s->rx_arfs_err += rq_stats->arfs_err;
|
||||
s->rx_recover += rq_stats->recover;
|
||||
|
@ -1978,11 +1968,6 @@ static const struct counter_desc rq_stats_desc[] = {
|
|||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
|
||||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
|
||||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
|
||||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
|
||||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
|
||||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
|
||||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
|
||||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
|
||||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
|
||||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
|
||||
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
|
||||
|
@ -2163,11 +2148,6 @@ static const struct counter_desc ptp_rq_stats_desc[] = {
|
|||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
|
||||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
|
||||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
|
||||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) },
|
||||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) },
|
||||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) },
|
||||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) },
|
||||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) },
|
||||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) },
|
||||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) },
|
||||
{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) },
|
||||
|
|
|
@ -193,11 +193,6 @@ struct mlx5e_sw_stats {
|
|||
u64 rx_buff_alloc_err;
|
||||
u64 rx_cqe_compress_blks;
|
||||
u64 rx_cqe_compress_pkts;
|
||||
u64 rx_cache_reuse;
|
||||
u64 rx_cache_full;
|
||||
u64 rx_cache_empty;
|
||||
u64 rx_cache_busy;
|
||||
u64 rx_cache_waive;
|
||||
u64 rx_congst_umr;
|
||||
u64 rx_arfs_err;
|
||||
u64 rx_recover;
|
||||
|
@ -362,11 +357,6 @@ struct mlx5e_rq_stats {
|
|||
u64 buff_alloc_err;
|
||||
u64 cqe_compress_blks;
|
||||
u64 cqe_compress_pkts;
|
||||
u64 cache_reuse;
|
||||
u64 cache_full;
|
||||
u64 cache_empty;
|
||||
u64 cache_busy;
|
||||
u64 cache_waive;
|
||||
u64 congst_umr;
|
||||
u64 arfs_err;
|
||||
u64 recover;
|
||||
|
|
Загрузка…
Ссылка в новой задаче