Merge branch 'xdp: recycle Page Pool backed skbs built from XDP frames'
Alexander Lobakin says: ==================== Yeah, I still remember that "Who needs cpumap nowadays" (c), but anyway. __xdp_build_skb_from_frame() missed the moment when the networking stack became able to recycle skb pages backed by a page_pool. This was making e.g. cpumap redirect even less effective than simple %XDP_PASS. veth was also affected in some scenarios. A lot of drivers use skb_mark_for_recycle() already, it's been almost two years and seems like there are no issues in using it in the generic code too. {__,}xdp_release_frame() can be then removed as it losts its last user. Page Pool becomes then zero-alloc (or almost) in the abovementioned cases, too. Other memory type models (who needs them at this point) have no changes. Some numbers on 1 Xeon Platinum core bombed with 27 Mpps of 64-byte IPv6 UDP, iavf w/XDP[0] (CONFIG_PAGE_POOL_STATS is enabled): Plain %XDP_PASS on baseline, Page Pool driver: src cpu Rx drops dst cpu Rx 2.1 Mpps N/A 2.1 Mpps cpumap redirect (cross-core, w/o leaving its NUMA node) on baseline: 6.8 Mpps 5.0 Mpps 1.8 Mpps cpumap redirect with skb PP recycling: 7.9 Mpps 5.7 Mpps 2.2 Mpps +22% (from cpumap redir on baseline) [0] https://github.com/alobakin/linux/commits/iavf-xdp ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Коммит
5584d9e63e
|
@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PAGE_POOL
|
||||
static inline void skb_mark_for_recycle(struct sk_buff *skb)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_POOL
|
||||
skb->pp_recycle = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _LINUX_SKBUFF_H */
|
||||
|
|
|
@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
|
|||
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
|
||||
struct xdp_frame_bulk *bq);
|
||||
|
||||
/* When sending xdp_frame into the network stack, then there is no
|
||||
* return point callback, which is needed to release e.g. DMA-mapping
|
||||
* resources with page_pool. Thus, have explicit function to release
|
||||
* frame resources.
|
||||
*/
|
||||
void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
|
||||
static inline void xdp_release_frame(struct xdp_frame *xdpf)
|
||||
{
|
||||
struct xdp_mem_info *mem = &xdpf->mem;
|
||||
struct skb_shared_info *sinfo;
|
||||
int i;
|
||||
|
||||
/* Curr only page_pool needs this */
|
||||
if (mem->type != MEM_TYPE_PAGE_POOL)
|
||||
return;
|
||||
|
||||
if (likely(!xdp_frame_has_frags(xdpf)))
|
||||
goto out;
|
||||
|
||||
sinfo = xdp_get_shared_info_from_frame(xdpf);
|
||||
for (i = 0; i < sinfo->nr_frags; i++) {
|
||||
struct page *page = skb_frag_page(&sinfo->frags[i]);
|
||||
|
||||
__xdp_release_frame(page_address(page), mem);
|
||||
}
|
||||
out:
|
||||
__xdp_release_frame(xdpf->data, mem);
|
||||
}
|
||||
|
||||
static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
|
||||
{
|
||||
struct skb_shared_info *sinfo;
|
||||
|
|
|
@ -531,21 +531,6 @@ out:
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_return_buff);
|
||||
|
||||
/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
|
||||
void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
|
||||
{
|
||||
struct xdp_mem_allocator *xa;
|
||||
struct page *page;
|
||||
|
||||
rcu_read_lock();
|
||||
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
|
||||
page = virt_to_head_page(data);
|
||||
if (xa)
|
||||
page_pool_release_page(xa->page_pool, page);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__xdp_release_frame);
|
||||
|
||||
void xdp_attachment_setup(struct xdp_attachment_info *info,
|
||||
struct netdev_bpf *bpf)
|
||||
{
|
||||
|
@ -658,8 +643,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
|
|||
* - RX ring dev queue index (skb_record_rx_queue)
|
||||
*/
|
||||
|
||||
/* Until page_pool get SKB return path, release DMA here */
|
||||
xdp_release_frame(xdpf);
|
||||
if (xdpf->mem.type == MEM_TYPE_PAGE_POOL)
|
||||
skb_mark_for_recycle(skb);
|
||||
|
||||
/* Allow SKB to reuse area used by xdp_frame */
|
||||
xdp_scrub_frame(xdpf);
|
||||
|
|
|
@ -4,6 +4,19 @@
|
|||
|
||||
#define ETH_ALEN 6
|
||||
#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
|
||||
|
||||
/**
|
||||
* enum frame_mark - magics to distinguish page/packet paths
|
||||
* @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC.
|
||||
* @MARK_IN: frame is being processed by the input XDP prog.
|
||||
* @MARK_SKB: frame did hit the TC ingress hook as an skb.
|
||||
*/
|
||||
enum frame_mark {
|
||||
MARK_XMIT = 0U,
|
||||
MARK_IN = 0x42,
|
||||
MARK_SKB = 0x45,
|
||||
};
|
||||
|
||||
const volatile int ifindex_out;
|
||||
const volatile int ifindex_in;
|
||||
const volatile __u8 expect_dst[ETH_ALEN];
|
||||
|
@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp)
|
|||
if (*metadata != 0x42)
|
||||
return XDP_ABORTED;
|
||||
|
||||
if (*payload == 0) {
|
||||
*payload = 0x42;
|
||||
if (*payload == MARK_XMIT)
|
||||
pkts_seen_zero++;
|
||||
}
|
||||
|
||||
*payload = MARK_IN;
|
||||
|
||||
if (bpf_xdp_adjust_meta(xdp, 4))
|
||||
return XDP_ABORTED;
|
||||
|
@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static bool check_pkt(void *data, void *data_end)
|
||||
static bool check_pkt(void *data, void *data_end, const __u32 mark)
|
||||
{
|
||||
struct ipv6hdr *iph = data + sizeof(struct ethhdr);
|
||||
__u8 *payload = data + HDR_SZ;
|
||||
|
@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end)
|
|||
if (payload + 1 > data_end)
|
||||
return false;
|
||||
|
||||
if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
|
||||
if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN)
|
||||
return false;
|
||||
|
||||
/* reset the payload so the same packet doesn't get counted twice when
|
||||
* it cycles back through the kernel path and out the dst veth
|
||||
*/
|
||||
*payload = 0;
|
||||
*payload = mark;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp)
|
|||
void *data = (void *)(long)xdp->data;
|
||||
void *data_end = (void *)(long)xdp->data_end;
|
||||
|
||||
if (check_pkt(data, data_end))
|
||||
if (check_pkt(data, data_end, MARK_XMIT))
|
||||
pkts_seen_xdp++;
|
||||
|
||||
/* Return XDP_DROP to make sure the data page is recycled, like when it
|
||||
* exits a physical NIC. Recycled pages will be counted in the
|
||||
/* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like
|
||||
* it exited a physical NIC. Those pages will be counted in the
|
||||
* pkts_seen_zero counter above.
|
||||
*/
|
||||
return XDP_DROP;
|
||||
|
@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb)
|
|||
void *data = (void *)(long)skb->data;
|
||||
void *data_end = (void *)(long)skb->data_end;
|
||||
|
||||
if (check_pkt(data, data_end))
|
||||
if (check_pkt(data, data_end, MARK_SKB))
|
||||
pkts_seen_tc++;
|
||||
|
||||
/* Will be either recycled or freed, %MARK_SKB makes sure it won't
|
||||
* hit any of the counters above.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче