From 625788b5844511cf4c30cffa7fa0bc3a69cebc82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Mar 2022 21:14:20 -0800 Subject: [PATCH] net: add per-cpu storage and net->core_stats Before adding yet another possibly contended atomic_long_t, it is time to add per-cpu storage for existing ones: dev->tx_dropped, dev->rx_dropped, and dev->rx_nohandler Because many devices do not have to increment such counters, allocate the per-cpu storage on demand, so that dev_get_stats() does not have to spend considerable time folding zero counters. Note that some drivers have abused these counters which were supposed to be only used by core networking stack. v4: should use per_cpu_ptr() in dev_get_stats() (Jakub) v3: added a READ_ONCE() in netdev_core_stats_alloc() (Paolo) v2: add a missing include (reported by kernel test robot ) Change in netdev_core_stats_alloc() (Jakub) Signed-off-by: Eric Dumazet Cc: jeffreyji Reviewed-by: Brian Vazquez Reviewed-by: Jakub Kicinski Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20220311051420.2608812-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +- .../net/ethernet/hisilicon/hns/hns_ethtool.c | 4 +- .../ethernet/qualcomm/rmnet/rmnet_handlers.c | 2 +- drivers/net/ipvlan/ipvlan_core.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/net_failover.c | 2 +- drivers/net/tun.c | 16 +++--- drivers/net/vxlan/vxlan_core.c | 2 +- include/linux/netdevice.h | 46 +++++++++++++---- include/net/bonding.h | 2 +- net/core/dev.c | 51 +++++++++++++++---- net/core/gro_cells.c | 2 +- net/hsr/hsr_device.c | 2 +- net/xfrm/xfrm_device.c | 2 +- 15 files changed, 101 insertions(+), 40 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 55e0ba2a163d..15eddca7b4b6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5120,7 +5120,7 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, if (xmit_suc) return NETDEV_TX_OK; - atomic_long_inc(&bond_dev->tx_dropped); + dev_core_stats_tx_dropped_inc(bond_dev); return NET_XMIT_DROP; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2de02950086f..92a1a43b3bee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -370,7 +370,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) i = skb_get_queue_mapping(skb); if (unlikely(i >= bp->tx_nr_rings)) { dev_kfree_skb_any(skb); - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); return NETDEV_TX_OK; } @@ -646,7 +646,7 @@ tx_kick_pending: if (txr->kick_pending) bnxt_txr_db_kick(bp, txr, txr->tx_prod); txr->tx_buf_ring[txr->tx_prod].skb = NULL; - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index d7a27c244d48..54faf0f2d1d8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -887,8 +887,8 @@ static void hns_get_ethtool_stats(struct net_device *netdev, p[21] = net_stats->rx_compressed; p[22] = net_stats->tx_compressed; - p[23] = netdev->rx_dropped.counter; - p[24] = netdev->tx_dropped.counter; + p[23] = 0; /* was netdev->rx_dropped.counter */ + p[24] = 0; /* was netdev->tx_dropped.counter */ p[25] = priv->tx_timeout_count; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index bfbd7847f946..a313242a762e 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -207,7 +207,7 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) dev = skb->dev; port = rmnet_get_port_rcu(dev); if (unlikely(!port)) { - atomic_long_inc(&skb->dev->rx_nohandler); + dev_core_stats_rx_nohandler_inc(skb->dev); kfree_skb(skb); goto done; } diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index c613900c3811..6ffb27419e64 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -555,7 +555,7 @@ static void ipvlan_multicast_enqueue(struct ipvl_port *port, schedule_work(&port->wq); } else { spin_unlock(&port->backlog.lock); - atomic_long_inc(&skb->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb(skb); } } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 33753a2fde29..4b77819e9328 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -371,7 +371,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, free_nskb: kfree_skb(nskb); err: - atomic_long_inc(&skb->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(skb->dev); } static void macvlan_flush_sources(struct macvlan_port *port, diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index 86ec5aae4289..21a0435c02de 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -89,7 +89,7 @@ static int net_failover_close(struct net_device *dev) static netdev_tx_t net_failover_drop_xmit(struct sk_buff *skb, struct net_device *dev) { - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2b9a22669a12..276a0e42ca8e 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1135,7 +1135,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; drop: - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); skb_tx_error(skb); kfree_skb_reason(skb, drop_reason); rcu_read_unlock(); @@ -1291,7 +1291,7 @@ resample: void *frame = tun_xdp_to_ptr(xdp); if (__ptr_ring_produce(&tfile->tx_ring, frame)) { - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); break; } nxmit++; @@ -1626,7 +1626,7 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, trace_xdp_exception(tun->dev, xdp_prog, act); fallthrough; case XDP_DROP: - atomic_long_inc(&tun->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(tun->dev); break; } @@ -1797,7 +1797,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, */ skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp); if (IS_ERR(skb)) { - atomic_long_inc(&tun->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(tun->dev); return PTR_ERR(skb); } if (!skb) @@ -1826,7 +1826,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) - atomic_long_inc(&tun->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(tun->dev); if (frags) mutex_unlock(&tfile->napi_mutex); return PTR_ERR(skb); @@ -1841,7 +1841,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, err = -EFAULT; drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; drop: - atomic_long_inc(&tun->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(tun->dev); kfree_skb_reason(skb, drop_reason); if (frags) { tfile->napi.skb = NULL; @@ -1876,7 +1876,7 @@ drop: pi.proto = htons(ETH_P_IPV6); break; default: - atomic_long_inc(&tun->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(tun->dev); kfree_skb(skb); return -EINVAL; } @@ -1956,7 +1956,7 @@ drop: skb_headlen(skb)); if (unlikely(headlen > skb_headlen(skb))) { - atomic_long_inc(&tun->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(tun->dev); napi_free_frags(&tfile->napi); rcu_read_unlock(); mutex_unlock(&tfile->napi_mutex); diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 3872f76ea1d3..de97ff98d36e 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1760,7 +1760,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (unlikely(!(vxlan->dev->flags & IFF_UP))) { rcu_read_unlock(); - atomic_long_inc(&vxlan->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(vxlan->dev); vxlan_vnifilter_count(vxlan, vni, vninode, VXLAN_VNI_STATS_RX_DROPS, 0); goto drop; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index acd3cf69b61f..0d994710b335 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -194,6 +195,14 @@ struct net_device_stats { unsigned long tx_compressed; }; +/* per-cpu stats, allocated on demand. + * Try to fit them in a single cache line, for dev_get_stats() sake. + */ +struct net_device_core_stats { + local_t rx_dropped; + local_t tx_dropped; + local_t rx_nohandler; +} __aligned(4 * sizeof(local_t)); #include #include @@ -1735,12 +1744,8 @@ enum netdev_ml_priv_type { * @stats: Statistics struct, which was left as a legacy, use * rtnl_link_stats64 instead * - * @rx_dropped: Dropped packets by core network, + * @core_stats: core networking counters, * do not use this in drivers - * @tx_dropped: Dropped packets by core network, - * do not use this in drivers - * @rx_nohandler: nohandler dropped packets by core network on - * inactive devices, do not use this in drivers * @carrier_up_count: Number of times the carrier has been up * @carrier_down_count: Number of times the carrier has been down * @@ -2023,9 +2028,7 @@ struct net_device { struct net_device_stats stats; /* not used by modern drivers */ - atomic_long_t rx_dropped; - atomic_long_t tx_dropped; - atomic_long_t rx_nohandler; + struct net_device_core_stats __percpu *core_stats; /* Stats to monitor link on/off, flapping */ atomic_t carrier_up_count; @@ -3839,13 +3842,38 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } +struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev); + +static inline struct net_device_core_stats *dev_core_stats(struct net_device *dev) +{ + /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ + struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); + + if (likely(p)) + return this_cpu_ptr(p); + + return netdev_core_stats_alloc(dev); +} + +#define DEV_CORE_STATS_INC(FIELD) \ +static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ +{ \ + struct net_device_core_stats *p = dev_core_stats(dev); \ + \ + if (p) \ + local_inc(&p->FIELD); \ +} +DEV_CORE_STATS_INC(rx_dropped) +DEV_CORE_STATS_INC(tx_dropped) +DEV_CORE_STATS_INC(rx_nohandler) + static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, const bool check_mtu) { if (skb_orphan_frags(skb, GFP_ATOMIC) || unlikely(!__is_skb_forwardable(dev, skb, check_mtu))) { - atomic_long_inc(&dev->rx_dropped); + dev_core_stats_rx_dropped_inc(dev); kfree_skb(skb); return NET_RX_DROP; } diff --git a/include/net/bonding.h b/include/net/bonding.h index d0dfe727e0b1..b14f4c0b4e9e 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -770,7 +770,7 @@ extern const struct sysfs_ops slave_sysfs_ops; static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); return NET_XMIT_DROP; } diff --git a/net/core/dev.c b/net/core/dev.c index 7ed27c178a1f..8d25ec5b3af7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3633,7 +3633,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device out_kfree_skb: kfree_skb(skb); out_null: - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); return NULL; } @@ -4184,7 +4184,7 @@ recursion_alert: rc = -ENETDOWN; rcu_read_unlock_bh(); - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); kfree_skb_list(skb); return rc; out: @@ -4236,7 +4236,7 @@ int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id) local_bh_enable(); return ret; drop: - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); kfree_skb_list(skb); return NET_XMIT_DROP; } @@ -4602,7 +4602,7 @@ drop: sd->dropped++; rps_unlock_irq_restore(sd, &flags); - atomic_long_inc(&skb->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb_reason(skb, reason); return NET_RX_DROP; } @@ -5357,10 +5357,10 @@ check_vlan_id: } else { drop: if (!deliver_exact) { - atomic_long_inc(&skb->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb_reason(skb, SKB_DROP_REASON_PTYPE_ABSENT); } else { - atomic_long_inc(&skb->dev->rx_nohandler); + dev_core_stats_rx_nohandler_inc(skb->dev); kfree_skb(skb); } /* Jamal, now you will not able to escape explaining @@ -10280,6 +10280,25 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, } EXPORT_SYMBOL(netdev_stats_to_stats64); +struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev) +{ + struct net_device_core_stats __percpu *p; + + p = alloc_percpu_gfp(struct net_device_core_stats, + GFP_ATOMIC | __GFP_NOWARN); + + if (p && cmpxchg(&dev->core_stats, NULL, p)) + free_percpu(p); + + /* This READ_ONCE() pairs with the cmpxchg() above */ + p = READ_ONCE(dev->core_stats); + if (!p) + return NULL; + + return this_cpu_ptr(p); +} +EXPORT_SYMBOL(netdev_core_stats_alloc); + /** * dev_get_stats - get network device statistics * @dev: device to get statistics from @@ -10294,6 +10313,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, struct rtnl_link_stats64 *storage) { const struct net_device_ops *ops = dev->netdev_ops; + const struct net_device_core_stats __percpu *p; if (ops->ndo_get_stats64) { memset(storage, 0, sizeof(*storage)); @@ -10303,9 +10323,20 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, } else { netdev_stats_to_stats64(storage, &dev->stats); } - storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped); - storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped); - storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler); + + /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ + p = READ_ONCE(dev->core_stats); + if (p) { + const struct net_device_core_stats *core_stats; + int i; + + for_each_possible_cpu(i) { + core_stats = per_cpu_ptr(p, i); + storage->rx_dropped += local_read(&core_stats->rx_dropped); + storage->tx_dropped += local_read(&core_stats->tx_dropped); + storage->rx_nohandler += local_read(&core_stats->rx_nohandler); + } + } return storage; } EXPORT_SYMBOL(dev_get_stats); @@ -10567,6 +10598,8 @@ void free_netdev(struct net_device *dev) free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; #endif + free_percpu(dev->core_stats); + dev->core_stats = NULL; free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index 8462f926ab45..541c7a72a28a 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -28,7 +28,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { drop: - atomic_long_inc(&dev->rx_dropped); + dev_core_stats_rx_dropped_inc(dev); kfree_skb(skb); res = NET_RX_DROP; goto unlock; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 7f250216433d..6ffef47e9be5 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -221,7 +221,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_len(skb); hsr_forward_skb(skb, master); } else { - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); } return NETDEV_TX_OK; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 39bce5d764de..3e3448ada1bb 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -143,7 +143,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur segs = skb_gso_segment(skb, esp_features); if (IS_ERR(segs)) { kfree_skb(skb); - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); return NULL; } else { consume_skb(skb);