From 7ba7aeabbaba484347cc98fbe9045769ca0d118d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 7 Jun 2019 21:20:34 +0200 Subject: [PATCH 1/7] net: Don't disable interrupts in napi_alloc_frag() netdev_alloc_frag() can be used from any context and is used by NAPI and non-NAPI drivers. Non-NAPI drivers use it in interrupt context and NAPI drivers use it during initial allocation (->ndo_open() or ->ndo_change_mtu()). Some NAPI drivers share the same function for the initial allocation and the allocation in their NAPI callback. The interrupts are disabled in order to ensure locked access from every context to `netdev_alloc_cache'. Let netdev_alloc_frag() check if interrupts are disabled. If they are, use `netdev_alloc_cache' otherwise disable BH and invoke __napi_alloc_frag() for the allocation. The IRQ check is cheaper compared to disabling & enabling interrupts and memory allocation with disabled interrupts does not work on -RT. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/core/skbuff.c | 53 ++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 23c9bf8fc322..ede23fa9bc5a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -366,34 +366,6 @@ struct napi_alloc_cache { static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); -static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) -{ - struct page_frag_cache *nc; - unsigned long flags; - void *data; - - local_irq_save(flags); - nc = this_cpu_ptr(&netdev_alloc_cache); - data = page_frag_alloc(nc, fragsz, gfp_mask); - local_irq_restore(flags); - return data; -} - -/** - * netdev_alloc_frag - allocate a page fragment - * @fragsz: fragment size - * - * Allocates a frag from a page for receive buffer. - * Uses GFP_ATOMIC allocations. - */ -void *netdev_alloc_frag(unsigned int fragsz) -{ - fragsz = SKB_DATA_ALIGN(fragsz); - - return __netdev_alloc_frag(fragsz, GFP_ATOMIC); -} -EXPORT_SYMBOL(netdev_alloc_frag); - static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -409,6 +381,31 @@ void *napi_alloc_frag(unsigned int fragsz) } EXPORT_SYMBOL(napi_alloc_frag); +/** + * netdev_alloc_frag - allocate a page fragment + * @fragsz: fragment size + * + * Allocates a frag from a page for receive buffer. + * Uses GFP_ATOMIC allocations. + */ +void *netdev_alloc_frag(unsigned int fragsz) +{ + struct page_frag_cache *nc; + void *data; + + fragsz = SKB_DATA_ALIGN(fragsz); + if (in_irq() || irqs_disabled()) { + nc = this_cpu_ptr(&netdev_alloc_cache); + data = page_frag_alloc(nc, fragsz, GFP_ATOMIC); + } else { + local_bh_disable(); + data = __napi_alloc_frag(fragsz, GFP_ATOMIC); + local_bh_enable(); + } + return data; +} +EXPORT_SYMBOL(netdev_alloc_frag); + /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on From 92dcabd7a0ea0fd88d414f39092132f848652772 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 7 Jun 2019 21:20:35 +0200 Subject: [PATCH 2/7] net: Don't disable interrupts in __netdev_alloc_skb() __netdev_alloc_skb() can be used from any context and is used by NAPI and non-NAPI drivers. Non-NAPI drivers use it in interrupt context and NAPI drivers use it during initial allocation (->ndo_open() or ->ndo_change_mtu()). Some NAPI drivers share the same function for the initial allocation and the allocation in their NAPI callback. The interrupts are disabled in order to ensure locked access from every context to `netdev_alloc_cache'. Let __netdev_alloc_skb() check if interrupts are disabled. If they are, use `netdev_alloc_cache'. Otherwise disable BH and use `napi_alloc_cache.page'. The IRQ check is cheaper compared to disabling & enabling interrupts and memory allocation with disabled interrupts does not work on -RT. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/core/skbuff.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ede23fa9bc5a..bab9484f1631 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -423,7 +423,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, gfp_t gfp_mask) { struct page_frag_cache *nc; - unsigned long flags; struct sk_buff *skb; bool pfmemalloc; void *data; @@ -444,13 +443,17 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; - local_irq_save(flags); - - nc = this_cpu_ptr(&netdev_alloc_cache); - data = page_frag_alloc(nc, len, gfp_mask); - pfmemalloc = nc->pfmemalloc; - - local_irq_restore(flags); + if (in_irq() || irqs_disabled()) { + nc = this_cpu_ptr(&netdev_alloc_cache); + data = page_frag_alloc(nc, len, gfp_mask); + pfmemalloc = nc->pfmemalloc; + } else { + local_bh_disable(); + nc = this_cpu_ptr(&napi_alloc_cache.page); + data = page_frag_alloc(nc, len, gfp_mask); + pfmemalloc = nc->pfmemalloc; + local_bh_enable(); + } if (unlikely(!data)) return NULL; From f9dae5554aed4d55ab82a344f9fa698423ffac95 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 7 Jun 2019 21:20:36 +0200 Subject: [PATCH 3/7] dpaa2-eth: Remove preempt_disable() from seed_pool() According to the comment, the preempt_disable() statement is required due to synchronisation in napi_alloc_frag(). The awful truth is that local_bh_disable() is required because otherwise the NAPI poll callback can be invoked while the open function setup buffers. This isn't unlikely since the dpaa2 provides multiple devices. The usage of napi_alloc_frag() has been removed in commit 27c874867c4e9 ("dpaa2-eth: Use a single page per Rx buffer") which means that the comment is not accurate and the preempt_disable() statement is not required. Remove the outdated comment and the no longer required preempt_disable(). Cc: Ioana Radulescu Acked-by: Ioana Radulescu Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 753957ec72be..27ecd06a63b7 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -997,13 +997,6 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid) int i, j; int new_count; - /* This is the lazy seeding of Rx buffer pools. - * dpaa2_add_bufs() is also used on the Rx hotpath and calls - * napi_alloc_frag(). The trouble with that is that it in turn ends up - * calling this_cpu_ptr(), which mandates execution in atomic context. - * Rather than splitting up the code, do a one-off preempt disable. - */ - preempt_disable(); for (j = 0; j < priv->num_channels; j++) { for (i = 0; i < DPAA2_ETH_NUM_BUFS; i += DPAA2_ETH_BUFS_PER_CMD) { @@ -1011,12 +1004,10 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid) priv->channel[j]->buf_count += new_count; if (new_count < DPAA2_ETH_BUFS_PER_CMD) { - preempt_enable(); return -ENOMEM; } } } - preempt_enable(); return 0; } From 90bc6d4b5466009d4a5c8d3a18dbf72b0f5ec807 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 7 Jun 2019 21:20:37 +0200 Subject: [PATCH 4/7] dpaa2-eth: Use napi_alloc_frag() The driver is using netdev_alloc_frag() for allocation in the ->ndo_start_xmit() path. That one is always invoked in a BH disabled region so we could also use napi_alloc_frag(). Use napi_alloc_frag() for skb allocation. Cc: Ioana Radulescu Acked-by: Ioana Radulescu Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 27ecd06a63b7..8a438331e942 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -555,7 +555,7 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv, /* Prepare the HW SGT structure */ sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry) * num_dma_bufs; - sgt_buf = netdev_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN); + sgt_buf = napi_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN); if (unlikely(!sgt_buf)) { err = -ENOMEM; goto sgt_buf_alloc_failed; From 3a89aae4b49e2df99fff112cdd055f1a67971623 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 7 Jun 2019 21:20:38 +0200 Subject: [PATCH 5/7] bnx2x: Use napi_alloc_frag() SKB allocation via bnx2x_frag_alloc() is always performed in NAPI context. Preemptible context passes GFP_KERNEL and bnx2x_frag_alloc() uses then __get_free_page() for the allocation. Use napi_alloc_frag() for memory allocation. Cc: Ariel Elior Cc: Sudarsana Kalluru Cc: GR-everest-linux-l2@marvell.com Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 008ad0ca89ba..c4986b519191 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -684,7 +684,7 @@ static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask) if (unlikely(gfpflags_allow_blocking(gfp_mask))) return (void *)__get_free_page(gfp_mask); - return netdev_alloc_frag(fp->rx_frag_size); + return napi_alloc_frag(fp->rx_frag_size); } return kmalloc(fp->rx_buf_size + NET_SKB_PAD, gfp_mask); From 49eef82dcd5498718ad49305514c56a2c3b7dee0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 7 Jun 2019 21:20:39 +0200 Subject: [PATCH 6/7] tg3: Use napi_alloc_frag() tg3_alloc_rx_data() uses netdev_alloc_frag() for skb allocation. All callers of tg3_alloc_rx_data() either hold tp->lock (which is held with BH disabled) or run in NAPI context. Use napi_alloc_frag() for skb allocations. Cc: Siva Reddy Kallam Cc: Prashant Sreedharan Cc: Michael Chan Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 6d1f9c822548..4c404d2213f9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6710,7 +6710,7 @@ static int tg3_alloc_rx_data(struct tg3 *tp, struct tg3_rx_prodring_set *tpr, skb_size = SKB_DATA_ALIGN(data_size + TG3_RX_OFFSET(tp)) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); if (skb_size <= PAGE_SIZE) { - data = netdev_alloc_frag(skb_size); + data = napi_alloc_frag(skb_size); *frag_size = skb_size; } else { data = kmalloc(skb_size, GFP_ATOMIC); From 6dcdd884e2a4bb57b0ed3654ff28974ae17d2a08 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 7 Jun 2019 21:20:40 +0200 Subject: [PATCH 7/7] net: hwbm: Make the hwbm_pool lock a mutex Based on review, `lock' is only acquired in hwbm_pool_add() which is invoked via ->probe(), ->resume() and ->ndo_change_mtu(). Based on this the lock can become a mutex and there is no need to disable interrupts during the procedure. Now that the lock is a mutex, hwbm_pool_add() no longer invokes hwbm_pool_refill() in an atomic context so we can pass GFP_KERNEL to hwbm_pool_refill() and remove the `gfp' argument from hwbm_pool_add(). Cc: Thomas Petazzoni Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- drivers/net/ethernet/marvell/mvneta_bm.c | 4 ++-- include/net/hwbm.h | 6 +++--- net/core/hwbm.c | 15 +++++++-------- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 94dc0a272644..895bfed26a8a 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1119,7 +1119,7 @@ static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu) SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size)); /* Fill entire long pool */ - num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC); + num = hwbm_pool_add(hwbm_pool, hwbm_pool->size); if (num != hwbm_pool->size) { WARN(1, "pool %d: %d of %d allocated\n", bm_pool->id, num, hwbm_pool->size); diff --git a/drivers/net/ethernet/marvell/mvneta_bm.c b/drivers/net/ethernet/marvell/mvneta_bm.c index de468e1bdba9..82ee2bcca6fd 100644 --- a/drivers/net/ethernet/marvell/mvneta_bm.c +++ b/drivers/net/ethernet/marvell/mvneta_bm.c @@ -190,7 +190,7 @@ struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); hwbm_pool->construct = mvneta_bm_construct; hwbm_pool->priv = new_pool; - spin_lock_init(&hwbm_pool->lock); + mutex_init(&hwbm_pool->buf_lock); /* Create new pool */ err = mvneta_bm_pool_create(priv, new_pool); @@ -201,7 +201,7 @@ struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id, } /* Allocate buffers for this pool */ - num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC); + num = hwbm_pool_add(hwbm_pool, hwbm_pool->size); if (num != hwbm_pool->size) { WARN(1, "pool %d: %d of %d allocated\n", new_pool->id, num, hwbm_pool->size); diff --git a/include/net/hwbm.h b/include/net/hwbm.h index 89085e2e2da5..81643cf8a1c4 100644 --- a/include/net/hwbm.h +++ b/include/net/hwbm.h @@ -12,18 +12,18 @@ struct hwbm_pool { /* constructor called during alocation */ int (*construct)(struct hwbm_pool *bm_pool, void *buf); /* protect acces to the buffer counter*/ - spinlock_t lock; + struct mutex buf_lock; /* private data */ void *priv; }; #ifdef CONFIG_HWBM void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf); int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp); -int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp); +int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num); #else void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {} int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) { return 0; } -int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp) +int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num) { return 0; } #endif /* CONFIG_HWBM */ #endif /* _HWBM_H */ diff --git a/net/core/hwbm.c b/net/core/hwbm.c index fd822ca5a245..ac1a66df9adc 100644 --- a/net/core/hwbm.c +++ b/net/core/hwbm.c @@ -43,34 +43,33 @@ int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) } EXPORT_SYMBOL_GPL(hwbm_pool_refill); -int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp) +int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num) { int err, i; - unsigned long flags; - spin_lock_irqsave(&bm_pool->lock, flags); + mutex_lock(&bm_pool->buf_lock); if (bm_pool->buf_num == bm_pool->size) { pr_warn("pool already filled\n"); - spin_unlock_irqrestore(&bm_pool->lock, flags); + mutex_unlock(&bm_pool->buf_lock); return bm_pool->buf_num; } if (buf_num + bm_pool->buf_num > bm_pool->size) { pr_warn("cannot allocate %d buffers for pool\n", buf_num); - spin_unlock_irqrestore(&bm_pool->lock, flags); + mutex_unlock(&bm_pool->buf_lock); return 0; } if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) { pr_warn("Adding %d buffers to the %d current buffers will overflow\n", buf_num, bm_pool->buf_num); - spin_unlock_irqrestore(&bm_pool->lock, flags); + mutex_unlock(&bm_pool->buf_lock); return 0; } for (i = 0; i < buf_num; i++) { - err = hwbm_pool_refill(bm_pool, gfp); + err = hwbm_pool_refill(bm_pool, GFP_KERNEL); if (err < 0) break; } @@ -79,7 +78,7 @@ int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp) bm_pool->buf_num += i; pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num); - spin_unlock_irqrestore(&bm_pool->lock, flags); + mutex_unlock(&bm_pool->buf_lock); return i; }