From 9c90c9b3e50e16d03c7f87d63e9db373974781e0 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Mon, 23 May 2022 22:05:24 +0200 Subject: [PATCH 01/80] Revert "net: af_key: add check for pfkey_broadcast in function pfkey_process" This reverts commit 4dc2a5a8f6754492180741facf2a8787f2c415d7. A non-zero return value from pfkey_broadcast() does not necessarily mean an error occurred as this function returns -ESRCH when no registered listener received the message. In particular, a call with BROADCAST_PROMISC_ONLY flag and null one_sk argument can never return zero so that this commit in fact prevents processing any PF_KEY message. One visible effect is that racoon daemon fails to find encryption algorithms like aes and refuses to start. Excluding -ESRCH return value would fix this but it's not obvious that we really want to bail out here and most other callers of pfkey_broadcast() also ignore the return value. Also, as pointed out by Steffen Klassert, PF_KEY is kind of deprecated and newer userspace code should use netlink instead so that we should only disturb the code for really important fixes. v2: add a comment explaining why is the return value ignored Signed-off-by: Michal Kubecek Signed-off-by: Steffen Klassert --- net/key/af_key.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 339d95df19d3..d93bde657359 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2826,10 +2826,12 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, - BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); - if (err) - return err; + /* Non-zero return value of pfkey_broadcast() does not always signal + * an error and even on an actual error we may still want to process + * the message so rather ignore the return value. + */ + pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, + BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); From 6821ad8770340825f17962cf5ef64ebaffee7fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Wed, 18 May 2022 14:05:48 -0700 Subject: [PATCH 02/80] xfrm: do not set IPv4 DF flag when encapsulating IPv6 frames <= 1280 bytes. One may want to have DF set on large packets to support discovering path mtu and limiting the size of generated packets (hence not setting the XFRM_STATE_NOPMTUDISC tunnel flag), while still supporting networks that are incapable of carrying even minimal sized IPv6 frames (post encapsulation). Having IPv4 Don't Frag bit set on encapsulated IPv6 frames that are not larger than the minimum IPv6 mtu of 1280 isn't useful, because the resulting ICMP Fragmentation Required error isn't actionable (even assuming you receive it) because IPv6 will not drop it's path mtu below 1280 anyway. While the IPv4 stack could prefrag the packets post encap, this requires the ICMP error to be successfully delivered and causes a loss of the original IPv6 frame (thus requiring a retransmit and latency hit). Luckily with IPv4 if we simply don't set the DF flag, we'll just make further fragmenting the packets some other router's problems. We'll still learn the correct IPv4 path mtu through encapsulation of larger IPv6 frames. I'm still not convinced this patch is entirely sufficient to make everything happy... but I don't see how it could possibly make things worse. See also recent: 4ff2980b6bd2 'xfrm: fix tunnel model fragmentation behavior' and friends Cc: Lorenzo Colitti Cc: Eric Dumazet Cc: Lina Wang Cc: Steffen Klassert Signed-off-by: Maciej Zenczykowski Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index d4935b3b9983..555ab35cd119 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -273,6 +273,7 @@ static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) */ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) { + bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU); struct dst_entry *dst = skb_dst(skb); struct iphdr *top_iph; int flags; @@ -303,7 +304,7 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) if (flags & XFRM_STATE_NOECN) IP_ECN_clear(top_iph); - top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); From 4934609dda03ec90ca5052deecbe455b09a44e21 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 23 May 2022 16:17:06 +0000 Subject: [PATCH 03/80] amt: fix typo in amt AMT_MSG_TEARDOWM is defined, But it should be AMT_MSG_TEARDOWN. Fixes: b9022b53adad ("amt: add control plane of amt interface") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- drivers/net/amt.c | 2 +- include/net/amt.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/amt.c b/drivers/net/amt.c index de4ea518c793..f41668ddd94a 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -57,7 +57,7 @@ static char *type_str[] = { "AMT_MSG_MEMBERSHIP_QUERY", "AMT_MSG_MEMBERSHIP_UPDATE", "AMT_MSG_MULTICAST_DATA", - "AMT_MSG_TEARDOWM", + "AMT_MSG_TEARDOWN", }; static char *action_str[] = { diff --git a/include/net/amt.h b/include/net/amt.h index 7a4db8b903ee..0e40c3d64fcf 100644 --- a/include/net/amt.h +++ b/include/net/amt.h @@ -15,7 +15,7 @@ enum amt_msg_type { AMT_MSG_MEMBERSHIP_QUERY, AMT_MSG_MEMBERSHIP_UPDATE, AMT_MSG_MULTICAST_DATA, - AMT_MSG_TEARDOWM, + AMT_MSG_TEARDOWN, __AMT_MSG_MAX, }; From ac1dbf55981b88d64312858ea06e3e63001f085d Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 23 May 2022 16:17:07 +0000 Subject: [PATCH 04/80] amt: fix return value of amt_update_handler() If a relay receives an update message, it lookup a tunnel. and if there is no tunnel for that message, it should be treated as an error, not a success. But amt_update_handler() returns false, which means success. Fixes: cbc21dc1cfe9 ("amt: add data plane of amt interface") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- drivers/net/amt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/amt.c b/drivers/net/amt.c index f41668ddd94a..635de07b2e40 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -2423,7 +2423,7 @@ static bool amt_update_handler(struct amt_dev *amt, struct sk_buff *skb) } } - return false; + return true; report: iph = ip_hdr(skb); From 1a1a0e80e005cbdc2c250fc858e1d8570f4e4acb Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 23 May 2022 16:17:08 +0000 Subject: [PATCH 05/80] amt: fix possible memory leak in amt_rcv() If an amt receives packets and it finds socket. If it can't find a socket, it should free a received skb. But it doesn't. So, a memory leak would possibly occur. Fixes: cbc21dc1cfe9 ("amt: add data plane of amt interface") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- drivers/net/amt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/amt.c b/drivers/net/amt.c index 635de07b2e40..ebee5f07a208 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -2679,7 +2679,7 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb) amt = rcu_dereference_sk_user_data(sk); if (!amt) { err = true; - goto out; + goto drop; } skb->dev = amt->dev; From 0b7180072a9df5e18af5b58410fec38230848a8d Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Tue, 24 May 2022 11:55:58 +0530 Subject: [PATCH 06/80] net: ethernet: ti: am65-cpsw: Fix fwnode passed to phylink_create() am65-cpsw-nuss driver incorrectly uses fwnode member of common ethernet device's "struct device_node" instead of using fwnode member of the port's "struct device_node" in phylink_create(). This results in all ports having the same phy data when there are multiple ports with their phy properties populated in their respective nodes rather than the common ethernet device node. Fix it here by using fwnode member of the port's node. Fixes: e8609e69470f ("net: ethernet: ti: am65-cpsw: Convert to PHYLINK") Signed-off-by: Siddharth Vadapalli Link: https://lore.kernel.org/r/20220524062558.19296-1-s-vadapalli@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 34197c67f8d9..77bdda97b2b0 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1981,7 +1982,9 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) phy_interface_set_rgmii(port->slave.phylink_config.supported_interfaces); - phylink = phylink_create(&port->slave.phylink_config, dev->fwnode, port->slave.phy_if, + phylink = phylink_create(&port->slave.phylink_config, + of_node_to_fwnode(port->slave.phy_node), + port->slave.phy_if, &am65_cpsw_phylink_mac_ops); if (IS_ERR(phylink)) return PTR_ERR(phylink); From d0bbe0328fe552ff3211b3cbbf6117a885a3d5b2 Mon Sep 17 00:00:00 2001 From: Min Li Date: Tue, 24 May 2022 10:45:01 -0400 Subject: [PATCH 07/80] ptp: ptp_clockmatrix: fix is_single_shot is_single_shot should return false for the power_of_2 mask Fixes: bec67592521e ("ptp: ptp_clockmatrix: Add PTP_CLK_REQ_EXTTS support") Signed-off-by: Min Li Link: https://lore.kernel.org/r/1653403501-12621-1-git-send-email-min.li.xe@renesas.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_clockmatrix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c index cb258e1448d5..c9d451bf89e2 100644 --- a/drivers/ptp/ptp_clockmatrix.c +++ b/drivers/ptp/ptp_clockmatrix.c @@ -267,7 +267,7 @@ static int arm_tod_read_trig_sel_refclk(struct idtcm_channel *channel, u8 ref) static bool is_single_shot(u8 mask) { /* Treat single bit ToD masks as continuous trigger */ - return mask <= 8 && is_power_of_2(mask); + return !(mask <= 8 && is_power_of_2(mask)); } static int idtcm_extts_enable(struct idtcm_channel *channel, From 215cd9897afbce9a964d4afbeec58c086d6cb170 Mon Sep 17 00:00:00 2001 From: luyun Date: Wed, 25 May 2022 11:18:19 +0800 Subject: [PATCH 08/80] selftests/net: enable lo.accept_local in psock_snd test The psock_snd test sends and receives packets over loopback, and the test results depend on parameter settings: Set rp_filter=0, or set rp_filter=1 and accept_local=1 so that the test will pass. Otherwise, this test will fail with Resource temporarily unavailable: sudo ./psock_snd.sh dgram tx: 128 rx: 142 ./psock_snd: recv: Resource temporarily unavailable For most distro kernel releases(like Ubuntu or Centos), the parameter rp_filter is enabled by default, so it's necessary to enable the parameter lo.accept_local in psock_snd test. And this test runs inside a netns, changing a sysctl is fine. Signed-off-by: luyun Reviewed-by: Jackie Liu Tested-by: Hangbin Liu Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20220525031819.866684-1-luyun_611@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/psock_snd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c index 7d15e10a9fb6..edf1e6f80d41 100644 --- a/tools/testing/selftests/net/psock_snd.c +++ b/tools/testing/selftests/net/psock_snd.c @@ -389,6 +389,8 @@ int main(int argc, char **argv) error(1, errno, "ip link set mtu"); if (system("ip addr add dev lo 172.17.0.1/24")) error(1, errno, "ip addr add"); + if (system("sysctl -w net.ipv4.conf.lo.accept_local=1")) + error(1, errno, "sysctl lo.accept_local"); run_test(); From b3b1a17538d3ef6a9667b2271216fd16d7678ab5 Mon Sep 17 00:00:00 2001 From: liuyacan Date: Wed, 25 May 2022 16:54:08 +0800 Subject: [PATCH 09/80] net/smc: set ini->smcrv2.ib_dev_v2 to NULL if SMC-Rv2 is unavailable In the process of checking whether RDMAv2 is available, the current implementation first sets ini->smcrv2.ib_dev_v2, and then allocates smc buf desc and register rmb, but the latter may fail. In this case, the pointer should be reset. Fixes: e49300a6bf62 ("net/smc: add listen processing for SMC-Rv2") Signed-off-by: liuyacan Reviewed-by: Karsten Graul Link: https://lore.kernel.org/r/20220525085408.812273-1-liuyacan@corp.netease.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index a201bf29af98..433bb5a7df31 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2161,6 +2161,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, not_found: ini->smcr_version &= ~SMC_V2; + ini->smcrv2.ib_dev_v2 = NULL; ini->check_smcrv2 = false; } From ed6cd6a17896561b9f51ab4c0d9bbb29e762b597 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 25 May 2022 00:56:18 +0200 Subject: [PATCH 10/80] net, neigh: Set lower cap for neigh_managed_work rearming Yuwei reported that plain reuse of DELAY_PROBE_TIME to rearm work queue in neigh_managed_work is problematic if user explicitly configures the DELAY_PROBE_TIME to 0 for a neighbor table. Such misconfig can then hog CPU to 100% processing the system work queue. Instead, set lower interval bound to HZ which is totally sufficient. Yuwei is additionally looking into making the interval separately configurable from DELAY_PROBE_TIME. Reported-by: Yuwei Wang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/netdev/797c3c53-ce1b-9f60-e253-cda615788f4a@iogearbox.net Reviewed-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/3b8c5aa906c52c3a8c995d1b2e8ccf650ea7c716.1653432794.git.daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 47b6c1f0fdbb..54625287ee5b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1579,7 +1579,7 @@ static void neigh_managed_work(struct work_struct *work) list_for_each_entry(neigh, &tbl->managed_list, managed_list) neigh_event_send_probe(neigh, NULL, false); queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, - NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME)); + max(NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME), HZ)); write_unlock_bh(&tbl->lock); } From 6c465408a7709cf180cde7569e141191b67a175c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 May 2022 16:11:53 +0200 Subject: [PATCH 11/80] dt-bindings: net: adin: Fix adi,phy-output-clock description syntax "make dt_binding_check": Documentation/devicetree/bindings/net/adi,adin.yaml:40:77: [error] syntax error: mapping values are not allowed here (syntax) The first line of the description ends with a colon, hence the block needs to be marked with a "|". Fixes: 1f77204e11f8b9e5 ("dt-bindings: net: adin: document phy clock output properties") Signed-off-by: Geert Uytterhoeven Acked-by: Rob Herring Link: https://lore.kernel.org/r/6fcef2665a6cd86a021509a84c5956ec2efd93ed.1653401420.git.geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/adi,adin.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/adi,adin.yaml b/Documentation/devicetree/bindings/net/adi,adin.yaml index 77750df0c2c4..88611720545d 100644 --- a/Documentation/devicetree/bindings/net/adi,adin.yaml +++ b/Documentation/devicetree/bindings/net/adi,adin.yaml @@ -37,7 +37,8 @@ properties: default: 8 adi,phy-output-clock: - description: Select clock output on GP_CLK pin. Two clocks are available: + description: | + Select clock output on GP_CLK pin. Two clocks are available: A 25MHz reference and a free-running 125MHz. The phy can alternatively automatically switch between the reference and the 125MHz clocks based on its internal state. From 520778042ccca019f3ffa136dd0ca565c486cedd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 May 2022 10:36:38 +0200 Subject: [PATCH 12/80] netfilter: nf_tables: disallow non-stateful expression in sets earlier Since 3e135cd499bf ("netfilter: nft_dynset: dynamic stateful expression instantiation"), it is possible to attach stateful expressions to set elements. cd5125d8f518 ("netfilter: nf_tables: split set destruction in deactivate and destroy phase") introduces conditional destruction on the object to accomodate transaction semantics. nft_expr_init() calls expr->ops->init() first, then check for NFT_STATEFUL_EXPR, this stills allows to initialize a non-stateful lookup expressions which points to a set, which might lead to UAF since the set is not properly detached from the set->binding for this case. Anyway, this combination is non-sense from nf_tables perspective. This patch fixes this problem by checking for NFT_STATEFUL_EXPR before expr->ops->init() is called. The reporter provides a KASAN splat and a poc reproducer (similar to those autogenerated by syzbot to report use-after-free errors). It is unknown to me if they are using syzbot or if they use similar automated tool to locate the bug that they are reporting. For the record, this is the KASAN splat. [ 85.431824] ================================================================== [ 85.432901] BUG: KASAN: use-after-free in nf_tables_bind_set+0x81b/0xa20 [ 85.433825] Write of size 8 at addr ffff8880286f0e98 by task poc/776 [ 85.434756] [ 85.434999] CPU: 1 PID: 776 Comm: poc Tainted: G W 5.18.0+ #2 [ 85.436023] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 Fixes: 0b2d8a7b638b ("netfilter: nf_tables: add helper functions for expression handling") Reported-and-tested-by: Aaron Adams Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 12fc9cda4a2c..f296dfe86b62 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2873,27 +2873,31 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, err = nf_tables_expr_parse(ctx, nla, &expr_info); if (err < 0) - goto err1; + goto err_expr_parse; + + err = -EOPNOTSUPP; + if (!(expr_info.ops->type->flags & NFT_EXPR_STATEFUL)) + goto err_expr_stateful; err = -ENOMEM; expr = kzalloc(expr_info.ops->size, GFP_KERNEL_ACCOUNT); if (expr == NULL) - goto err2; + goto err_expr_stateful; err = nf_tables_newexpr(ctx, &expr_info, expr); if (err < 0) - goto err3; + goto err_expr_new; return expr; -err3: +err_expr_new: kfree(expr); -err2: +err_expr_stateful: owner = expr_info.ops->type->owner; if (expr_info.ops->type->release_ops) expr_info.ops->type->release_ops(expr_info.ops); module_put(owner); -err1: +err_expr_parse: return ERR_PTR(err); } @@ -5413,9 +5417,6 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, return expr; err = -EOPNOTSUPP; - if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL)) - goto err_set_elem_expr; - if (expr->ops->type->flags & NFT_EXPR_GC) { if (set->flags & NFT_SET_TIMEOUT) goto err_set_elem_expr; From 558254b0b602b8605d7246a10cfeb584b1fcabfc Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 24 May 2022 14:50:01 +0200 Subject: [PATCH 13/80] netfilter: nft_limit: Clone packet limits' cost value When cloning a packet-based limit expression, copy the cost value as well. Otherwise the new limit is not functional anymore. Fixes: 3b9e2ea6c11bf ("netfilter: nft_limit: move stateful fields out of expression data") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_limit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 04ea8b9bf202..981addb2d051 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -213,6 +213,8 @@ static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst); struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src); + priv_dst->cost = priv_src->cost; + return nft_limit_clone(&priv_dst->limit, &priv_src->limit); } From b58cdd4388b1d8f5bee9f5a3897a7e780d1eaa48 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 26 May 2022 01:12:39 +0200 Subject: [PATCH 14/80] net: lan966x: check devm_of_phy_get() for -EDEFER_PROBE At the moment, if devm_of_phy_get() returns an error the serdes simply isn't set. While it is bad to ignore an error in general, there is a particular bug that network isn't working if the serdes driver is compiled as a module. In that case, devm_of_phy_get() returns -EDEFER_PROBE and the error is silently ignored. The serdes is optional, it is not there if the port is using RGMII, in which case devm_of_phy_get() returns -ENODEV. Rearrange the error handling so that -ENODEV will be handled but other error codes will abort the probing. Fixes: d28d6d2e37d1 ("net: lan966x: add port module support") Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20220525231239.1307298-1-michael@walle.cc Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 6ad68b422129..5784c4161e5e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -1120,8 +1120,13 @@ static int lan966x_probe(struct platform_device *pdev) lan966x->ports[p]->fwnode = fwnode_handle_get(portnp); serdes = devm_of_phy_get(lan966x->dev, to_of_node(portnp), NULL); - if (!IS_ERR(serdes)) - lan966x->ports[p]->serdes = serdes; + if (PTR_ERR(serdes) == -ENODEV) + serdes = NULL; + if (IS_ERR(serdes)) { + err = PTR_ERR(serdes); + goto cleanup_ports; + } + lan966x->ports[p]->serdes = serdes; lan966x_port_init(lan966x->ports[p]); } From a54ce3703613e41fe1d98060b62ec09a3984dc28 Mon Sep 17 00:00:00 2001 From: Vincent Ray Date: Wed, 25 May 2022 17:17:46 -0700 Subject: [PATCH 15/80] net: sched: fixed barrier to prevent skbuff sticking in qdisc backlog In qdisc_run_begin(), smp_mb__before_atomic() used before test_bit() does not provide any ordering guarantee as test_bit() is not an atomic operation. This, added to the fact that the spin_trylock() call at the beginning of qdisc_run_begin() does not guarantee acquire semantics if it does not grab the lock, makes it possible for the following statement : if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) to be executed before an enqueue operation called before qdisc_run_begin(). As a result the following race can happen : CPU 1 CPU 2 qdisc_run_begin() qdisc_run_begin() /* true */ set(MISSED) . /* returns false */ . . /* sees MISSED = 1 */ . /* so qdisc not empty */ . __qdisc_run() . . . pfifo_fast_dequeue() ----> /* may be done here */ . | . clear(MISSED) | . . | . smp_mb __after_atomic(); | . . | . /* recheck the queue */ | . /* nothing => exit */ | enqueue(skb1) | . | qdisc_run_begin() | . | spin_trylock() /* fail */ | . | smp_mb__before_atomic() /* not enough */ | . ---- if (test_bit(MISSED)) return false; /* exit */ In the above scenario, CPU 1 and CPU 2 both try to grab the qdisc->seqlock at the same time. Only CPU 2 succeeds and enters the bypass code path, where it emits its skb then calls __qdisc_run(). CPU1 fails, sets MISSED and goes down the traditionnal enqueue() + dequeue() code path. But when executing qdisc_run_begin() for the second time, after enqueuing its skbuff, it sees the MISSED bit still set (by itself) and consequently chooses to exit early without setting it again nor trying to grab the spinlock again. Meanwhile CPU2 has seen MISSED = 1, cleared it, checked the queue and found it empty, so it returned. At the end of the sequence, we end up with skb1 enqueued in the backlog, both CPUs out of __dev_xmit_skb(), the MISSED bit not set, and no __netif_schedule() called made. skb1 will now linger in the qdisc until somebody later performs a full __qdisc_run(). Associated to the bypass capacity of the qdisc, and the ability of the TCP layer to avoid resending packets which it knows are still in the qdisc, this can lead to serious traffic "holes" in a TCP connection. We fix this by replacing the smp_mb__before_atomic() / test_bit() / set_bit() / smp_mb__after_atomic() sequence inside qdisc_run_begin() by a single test_and_set_bit() call, which is more concise and enforces the needed memory barriers. Fixes: 89837eb4b246 ("net: sched: add barrier to ensure correct ordering for lockless qdisc") Signed-off-by: Vincent Ray Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20220526001746.2437669-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 36 ++++++++---------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9bab396c1f3b..80973ce820f3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -187,37 +187,17 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) if (spin_trylock(&qdisc->seqlock)) return true; - /* Paired with smp_mb__after_atomic() to make sure - * STATE_MISSED checking is synchronized with clearing - * in pfifo_fast_dequeue(). + /* No need to insist if the MISSED flag was already set. + * Note that test_and_set_bit() also gives us memory ordering + * guarantees wrt potential earlier enqueue() and below + * spin_trylock(), both of which are necessary to prevent races */ - smp_mb__before_atomic(); - - /* If the MISSED flag is set, it means other thread has - * set the MISSED flag before second spin_trylock(), so - * we can return false here to avoid multi cpus doing - * the set_bit() and second spin_trylock() concurrently. - */ - if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) + if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state)) return false; - /* Set the MISSED flag before the second spin_trylock(), - * if the second spin_trylock() return false, it means - * other cpu holding the lock will do dequeuing for us - * or it will see the MISSED flag set after releasing - * lock and reschedule the net_tx_action() to do the - * dequeuing. - */ - set_bit(__QDISC_STATE_MISSED, &qdisc->state); - - /* spin_trylock() only has load-acquire semantic, so use - * smp_mb__after_atomic() to ensure STATE_MISSED is set - * before doing the second spin_trylock(). - */ - smp_mb__after_atomic(); - - /* Retry again in case other CPU may not see the new flag - * after it releases the lock at the end of qdisc_run_end(). + /* Try to take the lock again to make sure that we will either + * grab it or the CPU that still has it will see MISSED set + * when testing it in qdisc_run_end() */ return spin_trylock(&qdisc->seqlock); } From e7e7104e2d5ddf3806a28695670f21bef471f1e1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 26 May 2022 11:02:42 +0300 Subject: [PATCH 16/80] net: ethernet: mtk_eth_soc: out of bounds read in mtk_hwlro_get_fdir_entry() The "fsp->location" variable comes from user via ethtool_get_rxnfc(). Check that it is valid to prevent an out of bounds read. Fixes: 7aab747e5563 ("net: ethernet: mediatek: add ethtool functions to configure RX flows of HW LRO") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a9d4fd8945bb..b3b3c079a0fa 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2212,6 +2212,9 @@ static int mtk_hwlro_get_fdir_entry(struct net_device *dev, struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + if (fsp->location >= ARRAY_SIZE(mac->hwlro_ip)) + return -EINVAL; + /* only tcp dst ipv4 is meaningful, others are meaningless */ fsp->flow_type = TCP_V4_FLOW; fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]); From 5dd89d2fc438457811cbbec07999ce0d80051ff5 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 26 May 2022 12:52:08 +0400 Subject: [PATCH 17/80] net: ethernet: ti: am65-cpsw-nuss: Fix some refcount leaks of_get_child_by_name() returns a node pointer with refcount incremented, we should use of_node_put() on it when not need anymore. am65_cpsw_init_cpts() and am65_cpsw_nuss_probe() don't release the refcount in error case. Add missing of_node_put() to avoid refcount leak. Fixes: b1f66a5bee07 ("net: ethernet: ti: am65-cpsw-nuss: enable packet timestamping support") Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver") Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 77bdda97b2b0..fb92d4c1547d 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1789,6 +1789,7 @@ static int am65_cpsw_init_cpts(struct am65_cpsw_common *common) if (IS_ERR(cpts)) { int ret = PTR_ERR(cpts); + of_node_put(node); if (ret == -EOPNOTSUPP) { dev_info(dev, "cpts disabled\n"); return 0; @@ -2665,9 +2666,9 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) if (!node) return -ENOENT; common->port_num = of_get_child_count(node); + of_node_put(node); if (common->port_num < 1 || common->port_num > AM65_CPSW_MAX_PORTS) return -ENOENT; - of_node_put(node); common->rx_flow_id_base = -1; init_completion(&common->tdown_complete); From 02ded5a173619b11728b8bf75a3fd995a2c1ff28 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 26 May 2022 18:52:08 +0400 Subject: [PATCH 18/80] net: dsa: mv88e6xxx: Fix refcount leak in mv88e6xxx_mdios_register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit of_get_child_by_name() returns a node pointer with refcount incremented, we should use of_node_put() on it when done. mv88e6xxx_mdio_register() pass the device node to of_mdiobus_register(). We don't need the device node after it. Add missing of_node_put() to avoid refcount leak. Fixes: a3c53be55c95 ("net: dsa: mv88e6xxx: Support multiple MDIO busses") Signed-off-by: Miaoqian Lin Reviewed-by: Marek Behún Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 5d2c57a7c708..0b49d243e00b 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3960,6 +3960,7 @@ static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip, */ child = of_get_child_by_name(np, "mdio"); err = mv88e6xxx_mdio_register(chip, child, false); + of_node_put(child); if (err) return err; From ffd219efd9ee1ceccc7ccfa9361fd40705680fc3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 May 2022 00:02:03 +0200 Subject: [PATCH 19/80] netfilter: nfnetlink: fix warn in nfnetlink_unbind syzbot reports following warn: WARNING: CPU: 0 PID: 3600 at net/netfilter/nfnetlink.c:703 nfnetlink_unbind+0x357/0x3b0 net/netfilter/nfnetlink.c:694 The syzbot generated program does this: socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER) = 3 setsockopt(3, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, [1], 4) = 0 ... which triggers 'WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == 0)' check. Instead of counting, just enable reporting for every bind request and check if we still have listeners on unbind. While at it, also add the needed bounds check on nfnl_group2type[] access. Reported-by: Reported-by: Fixes: 2794cdb0b97b ("netfilter: nfnetlink: allow to detect if ctnetlink listeners exist") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index ad3bbe34ca88..2f7c477fc9e7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -45,7 +45,6 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket"); static unsigned int nfnetlink_pernet_id __read_mostly; struct nfnl_net { - unsigned int ctnetlink_listeners; struct sock *nfnl; }; @@ -673,18 +672,8 @@ static int nfnetlink_bind(struct net *net, int group) #ifdef CONFIG_NF_CONNTRACK_EVENTS if (type == NFNL_SUBSYS_CTNETLINK) { - struct nfnl_net *nfnlnet = nfnl_pernet(net); - nfnl_lock(NFNL_SUBSYS_CTNETLINK); - - if (WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == UINT_MAX)) { - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); - return -EOVERFLOW; - } - - nfnlnet->ctnetlink_listeners++; - if (nfnlnet->ctnetlink_listeners == 1) - WRITE_ONCE(net->ct.ctnetlink_has_listener, true); + WRITE_ONCE(net->ct.ctnetlink_has_listener, true); nfnl_unlock(NFNL_SUBSYS_CTNETLINK); } #endif @@ -694,15 +683,12 @@ static int nfnetlink_bind(struct net *net, int group) static void nfnetlink_unbind(struct net *net, int group) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - int type = nfnl_group2type[group]; - - if (type == NFNL_SUBSYS_CTNETLINK) { - struct nfnl_net *nfnlnet = nfnl_pernet(net); + if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) + return; + if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) { nfnl_lock(NFNL_SUBSYS_CTNETLINK); - WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == 0); - nfnlnet->ctnetlink_listeners--; - if (nfnlnet->ctnetlink_listeners == 0) + if (!nfnetlink_has_listeners(net, group)) WRITE_ONCE(net->ct.ctnetlink_has_listener, false); nfnl_unlock(NFNL_SUBSYS_CTNETLINK); } From 56b14ecec97f39118bf85c9ac2438c5a949509ed Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 May 2022 00:02:04 +0200 Subject: [PATCH 20/80] netfilter: conntrack: re-fetch conntrack after insertion In case the conntrack is clashing, insertion can free skb->_nfct and set skb->_nfct to the already-confirmed entry. This wasn't found before because the conntrack entry and the extension space used to free'd after an rcu grace period, plus the race needs events enabled to trigger. Reported-by: Fixes: 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race") Fixes: 2ad9d7747c10 ("netfilter: conntrack: free extension area immediately") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 6406cfee34c2..37866c8386e2 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -58,8 +58,13 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) int ret = NF_ACCEPT; if (ct) { - if (!nf_ct_is_confirmed(ct)) + if (!nf_ct_is_confirmed(ct)) { ret = __nf_conntrack_confirm(skb); + + if (ret == NF_ACCEPT) + ct = (struct nf_conn *)skb_nfct(skb); + } + if (ret == NF_ACCEPT && nf_ct_ecache_exist(ct)) nf_ct_deliver_cached_events(ct); } From aeed55a08d0b2bf2e3986b110b04d2a5aca192a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 May 2022 00:02:05 +0200 Subject: [PATCH 21/80] netfilter: cttimeout: fix slab-out-of-bounds read in cttimeout_net_exit syzbot reports: BUG: KASAN: slab-out-of-bounds in __list_del_entry_valid+0xcc/0xf0 lib/list_debug.c:42 [..] list_del include/linux/list.h:148 [inline] cttimeout_net_exit+0x211/0x540 net/netfilter/nfnetlink_cttimeout.c:617 No reproducer so far. Looking at recent changes in this area its clear that the free_head must not be at the end of the structure because nf_ct_timeout structure has variable size. Reported-by: Fixes: 78222bacfca9 ("netfilter: cttimeout: decouple unlink and free on netns destruction") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index f069c24c6146..af15102bc696 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -35,12 +35,13 @@ static unsigned int nfct_timeout_id __read_mostly; struct ctnl_timeout { struct list_head head; + struct list_head free_head; struct rcu_head rcu_head; refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; - struct nf_ct_timeout timeout; - struct list_head free_head; + /* must be at the end */ + struct nf_ct_timeout timeout; }; struct nfct_timeout_pernet { From b53c116642502b0c85ecef78bff4f826a7dd4145 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 20 May 2022 00:02:06 +0200 Subject: [PATCH 22/80] netfilter: nf_tables: set element extended ACK reporting support Report the element that causes problems via netlink extended ACK for set element commands. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f296dfe86b62..f4f1d0a2da43 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5348,8 +5348,10 @@ static int nf_tables_getsetelem(struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_get_set_elem(&ctx, set, attr); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); break; + } } return err; @@ -6126,8 +6128,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); return err; + } } if (nft_net->validate_state == NFT_VALIDATE_DO) @@ -6397,8 +6401,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); break; + } } return err; } From 79dfeb2916d7deb7ed11e2fb782d109d445a0cd4 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Fri, 27 May 2022 09:47:28 +0530 Subject: [PATCH 23/80] net: lan743x: PCI11010 / PCI11414 fix Fix the MDIO interface declarations to reflect what is currently supported by the PCI11010 / PCI11414 devices (C22 for RGMII and C22_C45 for SGMII) Signed-off-by: Raju Lakkaraju Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index efbddf24ba31..af81236b4b4e 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1164,9 +1164,14 @@ static int lan743x_phy_open(struct lan743x_adapter *adapter) if (!phydev) goto return_error; - ret = phy_connect_direct(netdev, phydev, - lan743x_phy_link_status_change, - PHY_INTERFACE_MODE_GMII); + if (adapter->is_pci11x1x) + ret = phy_connect_direct(netdev, phydev, + lan743x_phy_link_status_change, + PHY_INTERFACE_MODE_RGMII); + else + ret = phy_connect_direct(netdev, phydev, + lan743x_phy_link_status_change, + PHY_INTERFACE_MODE_GMII); if (ret) goto return_error; } @@ -2936,20 +2941,27 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "SGMII operation\n"); + adapter->mdiobus->probe_capabilities = MDIOBUS_C22_C45; + adapter->mdiobus->read = lan743x_mdiobus_c45_read; + adapter->mdiobus->write = lan743x_mdiobus_c45_write; + adapter->mdiobus->name = "lan743x-mdiobus-c45"; + netif_dbg(adapter, drv, adapter->netdev, + "lan743x-mdiobus-c45\n"); } else { sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, - "(R)GMII operation\n"); + "RGMII operation\n"); + // Only C22 support when RGMII I/F + adapter->mdiobus->probe_capabilities = MDIOBUS_C22; + adapter->mdiobus->read = lan743x_mdiobus_read; + adapter->mdiobus->write = lan743x_mdiobus_write; + adapter->mdiobus->name = "lan743x-mdiobus"; + netif_dbg(adapter, drv, adapter->netdev, + "lan743x-mdiobus\n"); } - - adapter->mdiobus->probe_capabilities = MDIOBUS_C22_C45; - adapter->mdiobus->read = lan743x_mdiobus_c45_read; - adapter->mdiobus->write = lan743x_mdiobus_c45_write; - adapter->mdiobus->name = "lan743x-mdiobus-c45"; - netif_dbg(adapter, drv, adapter->netdev, "lan743x-mdiobus-c45\n"); } else { adapter->mdiobus->read = lan743x_mdiobus_read; adapter->mdiobus->write = lan743x_mdiobus_write; From 2c262b21de6dc93ac4d8c7a4cea0da4226b451fb Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Fri, 27 May 2022 10:29:06 +0200 Subject: [PATCH 24/80] net: usb: qmi_wwan: add Telit 0x1250 composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for Telit LN910Cx 0x1250 composition 0x1250: rmnet, tty, tty, tty, tty Signed-off-by: Carlo Lobrano Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 79f8bd849b1a..a659d6fb0b12 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1366,6 +1366,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1230, 2)}, /* Telit LE910Cx */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1250, 0)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */ From 59ed76fe2f981bccde37bdddb465f260a96a2404 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 26 May 2022 12:16:08 -0700 Subject: [PATCH 25/80] selftests/bpf: fix stacktrace_build_id with missing kprobe/urandom_read Kernel function urandom_read is replaced with urandom_read_iter. Therefore, kprobe on urandom_read is not working any more: [root@eth50-1 bpf]# ./test_progs -n 161 test_stacktrace_build_id:PASS:skel_open_and_load 0 nsec libbpf: kprobe perf_event_open() failed: No such file or directory libbpf: prog 'oncpu': failed to create kprobe 'urandom_read+0x0' \ perf event: No such file or directory libbpf: prog 'oncpu': failed to auto-attach: -2 test_stacktrace_build_id:FAIL:attach_tp err -2 161 stacktrace_build_id:FAIL Fix this by replacing urandom_read with urandom_read_iter in the test. Fixes: 1b388e7765f2 ("random: convert to using fops->read_iter()") Reported-by: Mykola Lysenko Signed-off-by: Song Liu Acked-by: David Vernet Link: https://lore.kernel.org/r/20220526191608.2364049-1-song@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index 6c62bfb8bb6f..0c4426592a26 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -39,7 +39,7 @@ struct { __type(value, stack_trace_t); } stack_amap SEC(".maps"); -SEC("kprobe/urandom_read") +SEC("kprobe/urandom_read_iter") int oncpu(struct pt_regs *args) { __u32 max_len = sizeof(struct bpf_stack_build_id) From caff1fa4118cec4dfd4336521ebd22a6408a1e3e Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 24 May 2022 10:12:27 +0800 Subject: [PATCH 26/80] bpf: Fix probe read error in ___bpf_prog_run() I think there is something wrong with BPF_PROBE_MEM in ___bpf_prog_run() in big-endian machine. Let's make a test and see what will happen if we want to load a 'u16' with BPF_PROBE_MEM. Let's make the src value '0x0001', the value of dest register will become 0x0001000000000000, as the value will be loaded to the first 2 byte of DST with following code: bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); Obviously, the value in DST is not correct. In fact, we can compare BPF_PROBE_MEM with LDX_MEM_H: DST = *(SIZE *)(unsigned long) (SRC + insn->off); If the memory load is done by LDX_MEM_H, the value in DST will be 0x1 now. And I think this error results in the test case 'test_bpf_sk_storage_map' failing: test_bpf_sk_storage_map:PASS:bpf_iter_bpf_sk_storage_map__open_and_load 0 nsec test_bpf_sk_storage_map:PASS:socket 0 nsec test_bpf_sk_storage_map:PASS:map_update 0 nsec test_bpf_sk_storage_map:PASS:socket 0 nsec test_bpf_sk_storage_map:PASS:map_update 0 nsec test_bpf_sk_storage_map:PASS:socket 0 nsec test_bpf_sk_storage_map:PASS:map_update 0 nsec test_bpf_sk_storage_map:PASS:attach_iter 0 nsec test_bpf_sk_storage_map:PASS:create_iter 0 nsec test_bpf_sk_storage_map:PASS:read 0 nsec test_bpf_sk_storage_map:FAIL:ipv6_sk_count got 0 expected 3 $10/26 bpf_iter/bpf_sk_storage_map:FAIL The code of the test case is simply, it will load sk->sk_family to the register with BPF_PROBE_MEM and check if it is AF_INET6. With this patch, now the test case 'bpf_iter' can pass: $10 bpf_iter:OK Fixes: 2a02759ef5f8 ("bpf: Add support for BTF pointers to interpreter") Signed-off-by: Menglong Dong Signed-off-by: Daniel Borkmann Reviewed-by: Jiang Biao Reviewed-by: Hao Peng Cc: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/20220524021228.533216-1-imagedong@tencent.com --- kernel/bpf/core.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index cacd8684c3c4..5f6f3f829b36 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1953,6 +1953,11 @@ out: CONT; \ LDX_MEM_##SIZEOP: \ DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ + CONT; \ + LDX_PROBE_MEM_##SIZEOP: \ + bpf_probe_read_kernel(&DST, sizeof(SIZE), \ + (const void *)(long) (SRC + insn->off)); \ + DST = *((SIZE *)&DST); \ CONT; LDST(B, u8) @@ -1960,15 +1965,6 @@ out: LDST(W, u32) LDST(DW, u64) #undef LDST -#define LDX_PROBE(SIZEOP, SIZE) \ - LDX_PROBE_MEM_##SIZEOP: \ - bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \ - CONT; - LDX_PROBE(B, 1) - LDX_PROBE(H, 2) - LDX_PROBE(W, 4) - LDX_PROBE(DW, 8) -#undef LDX_PROBE #define ATOMIC_ALU_OP(BOP, KOP) \ case BOP: \ From 4dc160a52da1330d54d561f9dac8a9e0ef333895 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Thu, 26 May 2022 17:13:18 +0300 Subject: [PATCH 27/80] dt-bindings: net: Update ADIN PHY maintainers Update the dt-bindings maintainers section. Signed-off-by: Alexandru Tachici Link: https://lore.kernel.org/r/20220526141318.77146-1-alexandru.tachici@analog.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/adi,adin.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/adi,adin.yaml b/Documentation/devicetree/bindings/net/adi,adin.yaml index 88611720545d..929cf8c0b0fd 100644 --- a/Documentation/devicetree/bindings/net/adi,adin.yaml +++ b/Documentation/devicetree/bindings/net/adi,adin.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Analog Devices ADIN1200/ADIN1300 PHY maintainers: - - Alexandru Ardelean + - Alexandru Tachici description: | Bindings for Analog Devices Industrial Ethernet PHYs From 155c0c90bca918de6e4327275dfc1d97fd604115 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 26 May 2022 10:23:13 -0500 Subject: [PATCH 28/80] net: ipa: fix page free in ipa_endpoint_trans_release() Currently the (possibly compound) page used for receive buffers are freed using __free_pages(). But according to this comment above the definition of that function, that's wrong: If you want to use the page's reference count to decide when to free the allocation, you should allocate a compound page, and use put_page() instead of __free_pages(). Convert the call to __free_pages() in ipa_endpoint_trans_release() to use put_page() instead. Fixes: ed23f02680caa ("net: ipa: define per-endpoint receive buffer size") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_endpoint.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 385aa63ab4bb..e92aa9447f6e 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1418,11 +1418,8 @@ void ipa_endpoint_trans_release(struct ipa_endpoint *endpoint, } else { struct page *page = trans->data; - if (page) { - u32 buffer_size = endpoint->config.rx.buffer_size; - - __free_pages(page, get_order(buffer_size)); - } + if (page) + put_page(page); } } From 70132763d5d2e94cd185e3aa92ac6a3ba89068fa Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 26 May 2022 10:23:14 -0500 Subject: [PATCH 29/80] net: ipa: fix page free in ipa_endpoint_replenish_one() Currently the (possibly compound) pages used for receive buffers are freed using __free_pages(). But according to this comment above the definition of that function, that's wrong: If you want to use the page's reference count to decide when to free the allocation, you should allocate a compound page, and use put_page() instead of __free_pages(). Convert the call to __free_pages() in ipa_endpoint_replenish_one() to use put_page() instead. Fixes: 6a606b90153b8 ("net: ipa: allocate transaction in replenish loop") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_endpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index e92aa9447f6e..d3b3255ac3d1 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1095,7 +1095,7 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint, ret = gsi_trans_page_add(trans, page, len, offset); if (ret) - __free_pages(page, get_order(buffer_size)); + put_page(page); else trans->data = page; /* transaction owns page now */ From e225c9a5a74b12e9ef8516f30a3db2c7eb866ee1 Mon Sep 17 00:00:00 2001 From: Guangguan Wang Date: Sat, 28 May 2022 14:54:57 +0800 Subject: [PATCH 30/80] net/smc: fixes for converting from "struct smc_cdc_tx_pend **" to "struct smc_wr_tx_pend_priv *" "struct smc_cdc_tx_pend **" can not directly convert to "struct smc_wr_tx_pend_priv *". Fixes: 2bced6aefa3d ("net/smc: put slot when connection is killed") Signed-off-by: Guangguan Wang Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 5c731f27996e..53f63bfbaf5f 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -82,7 +82,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, /* abnormal termination */ if (!rc) smc_wr_tx_put_slot(link, - (struct smc_wr_tx_pend_priv *)pend); + (struct smc_wr_tx_pend_priv *)(*pend)); rc = -EPIPE; } return rc; From 2f1de254a25bee55600287e2eb2cc12ea389c129 Mon Sep 17 00:00:00 2001 From: Ke Liu Date: Sat, 28 May 2022 04:54:37 +0000 Subject: [PATCH 31/80] net: phy: Directly use ida_alloc()/free() Use ida_alloc()/ida_free() instead of deprecated ida_simple_get()/ida_simple_remove(). Signed-off-by: Ke Liu Signed-off-by: David S. Miller --- drivers/net/phy/fixed_phy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index c65fb5f5d2dc..03abe6233bbb 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -180,7 +180,7 @@ static void fixed_phy_del(int phy_addr) if (fp->link_gpiod) gpiod_put(fp->link_gpiod); kfree(fp); - ida_simple_remove(&phy_fixed_ida, phy_addr); + ida_free(&phy_fixed_ida, phy_addr); return; } } @@ -244,13 +244,13 @@ static struct phy_device *__fixed_phy_register(unsigned int irq, } /* Get the next available PHY address, up to PHY_MAX_ADDR */ - phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL); + phy_addr = ida_alloc_max(&phy_fixed_ida, PHY_MAX_ADDR - 1, GFP_KERNEL); if (phy_addr < 0) return ERR_PTR(phy_addr); ret = fixed_phy_add_gpiod(irq, phy_addr, status, gpiod); if (ret < 0) { - ida_simple_remove(&phy_fixed_ida, phy_addr); + ida_free(&phy_fixed_ida, phy_addr); return ERR_PTR(ret); } From 11825765291a93d8e7f44230da67b9f607c777bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 May 2022 14:28:29 -0700 Subject: [PATCH 32/80] tcp: fix tcp_mtup_probe_success vs wrong snd_cwnd syzbot got a new report [1] finally pointing to a very old bug, added in initial support for MTU probing. tcp_mtu_probe() has checks about starting an MTU probe if tcp_snd_cwnd(tp) >= 11. But nothing prevents tcp_snd_cwnd(tp) to be reduced later and before the MTU probe succeeds. This bug would lead to potential zero-divides. Debugging added in commit 40570375356c ("tcp: add accessors to read/set tp->snd_cwnd") has paid off :) While we are at it, address potential overflows in this code. [1] WARNING: CPU: 1 PID: 14132 at include/net/tcp.h:1219 tcp_mtup_probe_success+0x366/0x570 net/ipv4/tcp_input.c:2712 Modules linked in: CPU: 1 PID: 14132 Comm: syz-executor.2 Not tainted 5.18.0-syzkaller-07857-gbabf0bb978e3 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:tcp_snd_cwnd_set include/net/tcp.h:1219 [inline] RIP: 0010:tcp_mtup_probe_success+0x366/0x570 net/ipv4/tcp_input.c:2712 Code: 74 08 48 89 ef e8 da 80 17 f9 48 8b 45 00 65 48 ff 80 80 03 00 00 48 83 c4 30 5b 41 5c 41 5d 41 5e 41 5f 5d c3 e8 aa b0 c5 f8 <0f> 0b e9 16 fe ff ff 48 8b 4c 24 08 80 e1 07 38 c1 0f 8c c7 fc ff RSP: 0018:ffffc900079e70f8 EFLAGS: 00010287 RAX: ffffffff88c0f7f6 RBX: ffff8880756e7a80 RCX: 0000000000040000 RDX: ffffc9000c6c4000 RSI: 0000000000031f9e RDI: 0000000000031f9f RBP: 0000000000000000 R08: ffffffff88c0f606 R09: ffffc900079e7520 R10: ffffed101011226d R11: 1ffff1101011226c R12: 1ffff1100eadcf50 R13: ffff8880756e72c0 R14: 1ffff1100eadcf89 R15: dffffc0000000000 FS: 00007f643236e700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1ab3f1e2a0 CR3: 0000000064fe7000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_clean_rtx_queue+0x223a/0x2da0 net/ipv4/tcp_input.c:3356 tcp_ack+0x1962/0x3c90 net/ipv4/tcp_input.c:3861 tcp_rcv_established+0x7c8/0x1ac0 net/ipv4/tcp_input.c:5973 tcp_v6_do_rcv+0x57b/0x1210 net/ipv6/tcp_ipv6.c:1476 sk_backlog_rcv include/net/sock.h:1061 [inline] __release_sock+0x1d8/0x4c0 net/core/sock.c:2849 release_sock+0x5d/0x1c0 net/core/sock.c:3404 sk_stream_wait_memory+0x700/0xdc0 net/core/stream.c:145 tcp_sendmsg_locked+0x111d/0x3fc0 net/ipv4/tcp.c:1410 tcp_sendmsg+0x2c/0x40 net/ipv4/tcp.c:1448 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] __sys_sendto+0x439/0x5c0 net/socket.c:2119 __do_sys_sendto net/socket.c:2131 [inline] __se_sys_sendto net/socket.c:2127 [inline] __x64_sys_sendto+0xda/0xf0 net/socket.c:2127 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f6431289109 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f643236e168 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f643139c100 RCX: 00007f6431289109 RDX: 00000000d0d0c2ac RSI: 0000000020000080 RDI: 000000000000000a RBP: 00007f64312e308d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff372533af R14: 00007f643236e300 R15: 0000000000022000 Fixes: 5d424d5a674f ("[TCP]: MTU probing") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3231af73e430..2e2a9ece9af2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2706,12 +2706,15 @@ static void tcp_mtup_probe_success(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + u64 val; - /* FIXME: breaks with very large cwnd */ tp->prior_ssthresh = tcp_current_ssthresh(sk); - tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) * - tcp_mss_to_mtu(sk, tp->mss_cache) / - icsk->icsk_mtup.probe_size); + + val = (u64)tcp_snd_cwnd(tp) * tcp_mss_to_mtu(sk, tp->mss_cache); + do_div(val, icsk->icsk_mtup.probe_size); + DEBUG_NET_WARN_ON_ONCE((u32)val != val); + tcp_snd_cwnd_set(tp, max_t(u32, 1U, val)); + tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_ssthresh = tcp_current_ssthresh(sk); From d8064c10560d53a841b08ab2e11dbd5d28873955 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 27 May 2022 16:37:47 -0400 Subject: [PATCH 33/80] net: dpaa: Convert to SPDX identifiers This converts these files to use SPDX idenfifiers instead of license text. Signed-off-by: Sean Anderson Reviewed-by: Madalin Bucur Signed-off-by: David S. Miller --- .../net/ethernet/freescale/dpaa/dpaa_eth.c | 31 ++---------------- .../net/ethernet/freescale/dpaa/dpaa_eth.h | 31 ++---------------- .../ethernet/freescale/dpaa/dpaa_eth_sysfs.c | 32 ++----------------- .../ethernet/freescale/dpaa/dpaa_eth_trace.h | 32 ++----------------- .../ethernet/freescale/dpaa/dpaa_ethtool.c | 32 ++----------------- 5 files changed, 15 insertions(+), 143 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index c78883c3a2c8..45634579adb6 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -1,32 +1,7 @@ -/* Copyright 2008 - 2016 Freescale Semiconductor Inc. +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later +/* + * Copyright 2008 - 2016 Freescale Semiconductor Inc. * Copyright 2020 NXP - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h index daf894a97050..35b8cea7f886 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h @@ -1,31 +1,6 @@ -/* Copyright 2008 - 2016 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */ +/* + * Copyright 2008 - 2016 Freescale Semiconductor Inc. */ #ifndef __DPAA_H diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c index ee62d25cac81..4fee74c024bd 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c @@ -1,32 +1,6 @@ -/* Copyright 2008-2016 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later +/* + * Copyright 2008 - 2016 Freescale Semiconductor Inc. */ #include diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h index 409c1dc39430..889f89df9930 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h @@ -1,32 +1,6 @@ -/* Copyright 2013-2015 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */ +/* + * Copyright 2013-2015 Freescale Semiconductor Inc. */ #undef TRACE_SYSTEM diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 5750f9a56393..73f07881ce2d 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -1,32 +1,6 @@ -/* Copyright 2008-2016 Freescale Semiconductor, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later +/* + * Copyright 2008 - 2016 Freescale Semiconductor Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt From 0649e4d63420ebc8cbebef3e9d39e12ffc5eb9fa Mon Sep 17 00:00:00 2001 From: Yu Xiao Date: Fri, 27 May 2022 20:24:24 +0200 Subject: [PATCH 34/80] nfp: only report pause frame configuration for physical device Only report pause frame configuration for physical device. Logical port of both PCI PF and PCI VF do not support it. Fixes: 9fdc5d85a8fe ("nfp: update ethtool reporting of pauseframe control") Signed-off-by: Yu Xiao Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 61c8b450aafb..df0afd271a21 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -289,8 +289,6 @@ nfp_net_get_link_ksettings(struct net_device *netdev, /* Init to unknowns */ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); - ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.port = PORT_OTHER; cmd->base.speed = SPEED_UNKNOWN; cmd->base.duplex = DUPLEX_UNKNOWN; @@ -298,6 +296,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev, port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); if (eth_port) { + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.autoneg = eth_port->aneg != NFP_ANEG_DISABLED ? AUTONEG_ENABLE : AUTONEG_DISABLE; nfp_net_set_fec_link_mode(eth_port, cmd); From 911799172d2f703cc14e9000c55d124d5e0a91a2 Mon Sep 17 00:00:00 2001 From: keliu Date: Fri, 27 May 2022 06:42:25 +0000 Subject: [PATCH 35/80] net: nfc: Directly use ida_alloc()/free() Use ida_alloc()/ida_free() instead of deprecated ida_simple_get()/ida_simple_remove() . Signed-off-by: keliu Signed-off-by: David S. Miller --- net/nfc/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/nfc/core.c b/net/nfc/core.c index 6ff3e10ff8e3..eb2c0959e5b6 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -975,7 +975,7 @@ static void nfc_release(struct device *d) kfree(se); } - ida_simple_remove(&nfc_index_ida, dev->idx); + ida_free(&nfc_index_ida, dev->idx); kfree(dev); } @@ -1066,7 +1066,7 @@ struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, if (!dev) return NULL; - rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL); + rc = ida_alloc(&nfc_index_ida, GFP_KERNEL); if (rc < 0) goto err_free_dev; dev->idx = rc; From 5e1eeef69c0fef6249b794bda5d68f95a65d062f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 27 May 2022 14:44:39 +0800 Subject: [PATCH 36/80] bonding: NS target should accept link local address When setting bond NS target, we use bond_is_ip6_target_ok() to check if the address valid. The link local address was wrongly rejected in bond_changelink(), as most time the user just set the ARP/NS target to gateway, while the IPv6 gateway is always a link local address when user set up interface via SLAAC. So remove the link local addr check when setting bond NS target. Fixes: 129e3c1bab24 ("bonding: add new option ns_ip6_target") Reported-by: Li Liang Signed-off-by: Hangbin Liu Reviewed-by: Jonathan Toppins Acked-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_netlink.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index f427fa1737c7..6f404f9c34e3 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -290,11 +290,6 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], addr6 = nla_get_in6_addr(attr); - if (ipv6_addr_type(&addr6) & IPV6_ADDR_LINKLOCAL) { - NL_SET_ERR_MSG(extack, "Invalid IPv6 addr6"); - return -EINVAL; - } - bond_opt_initextra(&newval, &addr6, sizeof(addr6)); err = __bond_opt_set(bond, BOND_OPT_NS_TARGETS, &newval); From 18eeb4dea65cb587057740d6b7b71a13bcfececd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 27 May 2022 11:25:47 +0200 Subject: [PATCH 37/80] net: enetc: Use pci_release_region() to release some resources Some resources are allocated using pci_request_region(). It is more straightforward to release them with pci_release_region(). Fixes: 231ece36f50d ("enetc: Add mdio bus driver for the PCIe MDIO endpoint") Signed-off-by: Christophe JAILLET Reviewed-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c index 15f37c5b8dc1..dafb26f81f95 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c @@ -69,7 +69,7 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev, return 0; err_mdiobus_reg: - pci_release_mem_regions(pdev); + pci_release_region(pdev, 0); err_pci_mem_reg: pci_disable_device(pdev); err_pci_enable: @@ -88,7 +88,7 @@ static void enetc_pci_mdio_remove(struct pci_dev *pdev) mdiobus_unregister(bus); mdio_priv = bus->priv; iounmap(mdio_priv->hw->port); - pci_release_mem_regions(pdev); + pci_release_region(pdev, 0); pci_disable_device(pdev); } From 2e102b53f8a778f872dc137f4c7ac548705817aa Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Fri, 27 May 2022 10:05:28 +0200 Subject: [PATCH 38/80] sfc: fix considering that all channels have TX queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally, all channels have RX and TX queues, but this is not true if modparam efx_separate_tx_channels=1 is used. In that cases, some channels only have RX queues and others only TX queues (or more preciselly, they have them allocated, but not initialized). Fix efx_channel_has_tx_queues to return the correct value for this case too. Messages shown at probe time before the fix: sfc 0000:03:00.0 ens6f0np0: MC command 0x82 inlen 544 failed rc=-22 (raw=0) arg=0 ------------[ cut here ]------------ netdevice: ens6f0np0: failed to initialise TXQ -1 WARNING: CPU: 1 PID: 626 at drivers/net/ethernet/sfc/ef10.c:2393 efx_ef10_tx_init+0x201/0x300 [sfc] [...] stripped RIP: 0010:efx_ef10_tx_init+0x201/0x300 [sfc] [...] stripped Call Trace: efx_init_tx_queue+0xaa/0xf0 [sfc] efx_start_channels+0x49/0x120 [sfc] efx_start_all+0x1f8/0x430 [sfc] efx_net_open+0x5a/0xe0 [sfc] __dev_open+0xd0/0x190 __dev_change_flags+0x1b3/0x220 dev_change_flags+0x21/0x60 [...] stripped Messages shown at remove time before the fix: sfc 0000:03:00.0 ens6f0np0: failed to flush 10 queues sfc 0000:03:00.0 ens6f0np0: failed to flush queues Fixes: 8700aff08984 ("sfc: fix channel allocation with brute force") Reported-by: Tianhao Zhao Signed-off-by: Martin Habets Tested-by: Íñigo Huguet Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/net_driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 318db906a154..723bbeea5d0c 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1530,7 +1530,7 @@ static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel) static inline bool efx_channel_has_tx_queues(struct efx_channel *channel) { - return true; + return channel && channel->channel >= channel->efx->tx_channel_offset; } static inline unsigned int efx_channel_num_tx_queues(struct efx_channel *channel) From c308dfd1b43ef0d4c3e57b741bb3462eb7a7f4a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Fri, 27 May 2022 10:05:29 +0200 Subject: [PATCH 39/80] sfc: fix wrong tx channel offset with efx_separate_tx_channels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tx_channel_offset is calculated in efx_allocate_msix_channels, but it is also calculated again in efx_set_channels because it was originally done there, and when efx_allocate_msix_channels was introduced it was forgotten to be removed from efx_set_channels. Moreover, the old calculation is wrong when using efx_separate_tx_channels because now we can have XDP channels after the TX channels, so n_channels - n_tx_channels doesn't point to the first TX channel. Remove the old calculation from efx_set_channels, and add the initialization of this variable if MSI or legacy interrupts are used, next to the initialization of the rest of the related variables, where it was missing. Fixes: 3990a8fffbda ("sfc: allocate channels for XDP tx queues") Reported-by: Tianhao Zhao Signed-off-by: Íñigo Huguet Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index f4919e7ee77b..032b8c0bd788 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -298,6 +298,7 @@ int efx_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1; efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->tx_channel_offset = 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; rc = pci_enable_msi(efx->pci_dev); @@ -318,6 +319,7 @@ int efx_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->tx_channel_offset = 1; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; efx->legacy_irq = efx->pci_dev->irq; @@ -954,10 +956,6 @@ int efx_set_channels(struct efx_nic *efx) struct efx_channel *channel; int rc; - efx->tx_channel_offset = - efx_separate_tx_channels ? - efx->n_channels - efx->n_tx_channels : 0; - if (efx->xdp_tx_queue_count) { EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); From d944e09ea839033476e43fe03db0121b7be5154e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 May 2022 18:02:01 +0200 Subject: [PATCH 40/80] wifi: libertas: use variable-size data in assoc req/resp cmd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The firmware has a 512 limit here, but we use less, so gcc starts complaining about it: drivers/net/wireless/marvell/libertas/cfg.c:1198:63: warning: array subscript ‘struct cmd_ds_802_11_associate_response[0]’ is partly outside array bounds of ‘unsigned char[203]’ [-Warray-bounds] 1198 | "aid 0x%04x\n", status, le16_to_cpu(resp->statuscode), | ^~ Since we size the command and response buffer per our needs and not per the firmware maximum, change to a variable size data array and put the 512 only into a comment. In the end, that's actually what the code always wanted, and it simplifies the code that used to subtract the fixed size buffer size in two places. Reported-by: Jakub Kicinski Signed-off-by: Johannes Berg Acked-by: Kalle Valo Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220523180200.115fa27fbece.Ie66d874b047e7afad63900aa2df70f031711147e@changeid --- drivers/net/wireless/marvell/libertas/cfg.c | 4 +--- drivers/net/wireless/marvell/libertas/host.h | 6 ++++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 4e3de684928b..b0b3f59dabc6 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -1053,7 +1053,6 @@ static int lbs_set_authtype(struct lbs_private *priv, */ #define LBS_ASSOC_MAX_CMD_SIZE \ (sizeof(struct cmd_ds_802_11_associate) \ - - 512 /* cmd_ds_802_11_associate.iebuf */ \ + LBS_MAX_SSID_TLV_SIZE \ + LBS_MAX_CHANNEL_TLV_SIZE \ + LBS_MAX_CF_PARAM_TLV_SIZE \ @@ -1130,8 +1129,7 @@ static int lbs_associate(struct lbs_private *priv, if (sme->ie && sme->ie_len) pos += lbs_add_wpa_tlv(pos, sme->ie, sme->ie_len); - len = (sizeof(*cmd) - sizeof(cmd->iebuf)) + - (u16)(pos - (u8 *) &cmd->iebuf); + len = sizeof(*cmd) + (u16)(pos - (u8 *) &cmd->iebuf); cmd->hdr.size = cpu_to_le16(len); lbs_deb_hex(LBS_DEB_ASSOC, "ASSOC_CMD", (u8 *) cmd, diff --git a/drivers/net/wireless/marvell/libertas/host.h b/drivers/net/wireless/marvell/libertas/host.h index ceff4b92e7a1..a202b716ad5d 100644 --- a/drivers/net/wireless/marvell/libertas/host.h +++ b/drivers/net/wireless/marvell/libertas/host.h @@ -528,7 +528,8 @@ struct cmd_ds_802_11_associate { __le16 listeninterval; __le16 bcnperiod; u8 dtimperiod; - u8 iebuf[512]; /* Enough for required and most optional IEs */ + /* 512 permitted - enough for required and most optional IEs */ + u8 iebuf[]; } __packed; struct cmd_ds_802_11_associate_response { @@ -537,7 +538,8 @@ struct cmd_ds_802_11_associate_response { __le16 capability; __le16 statuscode; __le16 aid; - u8 iebuf[512]; + /* max 512 */ + u8 iebuf[]; } __packed; struct cmd_ds_802_11_set_wep { From 9c6837311f437701b398bfaed963458aa292e9d7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 May 2022 22:03:01 +0200 Subject: [PATCH 41/80] wifi: iwlwifi: pcie: rename CAUSE macro At least mips64 has ist own CAUSE macro, so rename ours to IWL_CAUSE to fix build issues. Reported-by: Jakub Kicinski Fixes: c1918196427b ("iwlwifi: pcie: simplify MSI-X cause mapping") Signed-off-by: Johannes Berg Acked-by: Gregory Greenman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220523220300.682be2029361.I283200b18da589a975a284073dca8ed001ee107a@changeid --- .../net/wireless/intel/iwlwifi/pcie/trans.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 6fc69c42f36e..bd50f52a1aad 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1090,7 +1090,7 @@ struct iwl_causes_list { u8 addr; }; -#define CAUSE(reg, mask) \ +#define IWL_CAUSE(reg, mask) \ { \ .mask_reg = reg, \ .bit = ilog2(mask), \ @@ -1101,28 +1101,28 @@ struct iwl_causes_list { } static const struct iwl_causes_list causes_list_common[] = { - CAUSE(CSR_MSIX_FH_INT_MASK_AD, MSIX_FH_INT_CAUSES_D2S_CH0_NUM), - CAUSE(CSR_MSIX_FH_INT_MASK_AD, MSIX_FH_INT_CAUSES_D2S_CH1_NUM), - CAUSE(CSR_MSIX_FH_INT_MASK_AD, MSIX_FH_INT_CAUSES_S2D), - CAUSE(CSR_MSIX_FH_INT_MASK_AD, MSIX_FH_INT_CAUSES_FH_ERR), - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_ALIVE), - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_WAKEUP), - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_RESET_DONE), - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_CT_KILL), - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_RF_KILL), - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_PERIODIC), - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_SCD), - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_FH_TX), - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_HW_ERR), - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_HAP), + IWL_CAUSE(CSR_MSIX_FH_INT_MASK_AD, MSIX_FH_INT_CAUSES_D2S_CH0_NUM), + IWL_CAUSE(CSR_MSIX_FH_INT_MASK_AD, MSIX_FH_INT_CAUSES_D2S_CH1_NUM), + IWL_CAUSE(CSR_MSIX_FH_INT_MASK_AD, MSIX_FH_INT_CAUSES_S2D), + IWL_CAUSE(CSR_MSIX_FH_INT_MASK_AD, MSIX_FH_INT_CAUSES_FH_ERR), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_ALIVE), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_WAKEUP), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_RESET_DONE), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_CT_KILL), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_RF_KILL), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_PERIODIC), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_SCD), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_FH_TX), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_HW_ERR), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_HAP), }; static const struct iwl_causes_list causes_list_pre_bz[] = { - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_SW_ERR), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_SW_ERR), }; static const struct iwl_causes_list causes_list_bz[] = { - CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_SW_ERR_BZ), + IWL_CAUSE(CSR_MSIX_HW_INT_MASK_AD, MSIX_HW_INT_CAUSES_REG_SW_ERR_BZ), }; static void iwl_pcie_map_list(struct iwl_trans *trans, From 7711fe713a4987695ac6c68c9f175cb2d1d06e83 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Thu, 26 May 2022 13:12:51 +0800 Subject: [PATCH 42/80] wifi: rtw88: add a work to correct atomic scheduling warning of ::set_tim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The set_tim is supposed to be atomic, but we should download beacon context to firmware with a mutex lock. To avoid warning, do the thing in another work. BUG: scheduling while atomic: swapper/1/0/0x00000700 Modules linked in: CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 5.18.0-rc7-00703-g33b5ee09a0c1 #4 Hardware name: Pine64 RK3566 Quartz64-A Board (DT) Call trace: dump_backtrace.part.0+0xc4/0xd0 show_stack+0x14/0x60 dump_stack_lvl+0x60/0x78 dump_stack+0x14/0x2c __schedule_bug+0x5c/0x70 __schedule+0x5c4/0x630 schedule+0x44/0xb0 schedule_preempt_disabled+0xc/0x14 __mutex_lock.constprop.0+0x538/0x56c __mutex_lock_slowpath+0x10/0x20 mutex_lock+0x54/0x60 rtw_ops_set_tim+0x20/0x40 __sta_info_recalc_tim+0x150/0x250 sta_info_recalc_tim+0x10/0x20 invoke_tx_handlers_early+0x4e4/0x5c0 ieee80211_tx+0x78/0x110 ieee80211_xmit+0x94/0xc0 __ieee80211_subif_start_xmit+0x818/0xd20 ieee80211_subif_start_xmit+0x44/0x2d0 dev_hard_start_xmit+0xd0/0x150 __dev_queue_xmit+0x250/0xb30 dev_queue_xmit+0x10/0x20 br_dev_queue_push_xmit+0x94/0x174 br_forward_finish+0x90/0xa0 __br_forward+0xc0/0x13c br_forward+0x108/0x134 br_dev_xmit+0x1cc/0x3a4 dev_hard_start_xmit+0xd0/0x150 __dev_queue_xmit+0x250/0xb30 dev_queue_xmit+0x10/0x20 arp_xmit+0x6c/0x7c arp_send_dst+0x8c/0xc0 arp_solicit+0xd4/0x1e0 neigh_probe+0x58/0xa0 neigh_timer_handler+0x27c/0x380 call_timer_fn.constprop.0+0x20/0x80 __run_timers.part.0+0x230/0x280 run_timer_softirq+0x38/0x70 _stext+0x104/0x278 __irq_exit_rcu+0xa4/0xdc irq_exit_rcu+0xc/0x14 el1_interrupt+0x34/0x50 el1h_64_irq_handler+0x14/0x20 el1h_64_irq+0x64/0x68 arch_cpu_idle+0x14/0x20 do_idle+0x208/0x290 cpu_startup_entry+0x20/0x30 secondary_start_kernel+0x130/0x144 __secondary_switched+0x54/0x58 Fixes: f2217968ffda ("rtw88: Add update beacon flow for AP mode") Reported-by: Ondřej Jirman Signed-off-by: Ping-Ke Shih Tested-by: Ondřej Jirman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220526051251.281905-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/fw.c | 10 ++++++++++ drivers/net/wireless/realtek/rtw88/fw.h | 1 + drivers/net/wireless/realtek/rtw88/mac80211.c | 4 +--- drivers/net/wireless/realtek/rtw88/main.c | 2 ++ drivers/net/wireless/realtek/rtw88/main.h | 1 + 5 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c index 090610e48d08..c3ae631c2264 100644 --- a/drivers/net/wireless/realtek/rtw88/fw.c +++ b/drivers/net/wireless/realtek/rtw88/fw.c @@ -1602,6 +1602,16 @@ free: return ret; } +void rtw_fw_update_beacon_work(struct work_struct *work) +{ + struct rtw_dev *rtwdev = container_of(work, struct rtw_dev, + update_beacon_work); + + mutex_lock(&rtwdev->mutex); + rtw_fw_download_rsvd_page(rtwdev); + mutex_unlock(&rtwdev->mutex); +} + static void rtw_fw_read_fifo_page(struct rtw_dev *rtwdev, u32 offset, u32 size, u32 *buf, u32 residue, u16 start_pg) { diff --git a/drivers/net/wireless/realtek/rtw88/fw.h b/drivers/net/wireless/realtek/rtw88/fw.h index 734113fba184..7a37675c61e8 100644 --- a/drivers/net/wireless/realtek/rtw88/fw.h +++ b/drivers/net/wireless/realtek/rtw88/fw.h @@ -809,6 +809,7 @@ void rtw_add_rsvd_page_pno(struct rtw_dev *rtwdev, void rtw_add_rsvd_page_sta(struct rtw_dev *rtwdev, struct rtw_vif *rtwvif); int rtw_fw_download_rsvd_page(struct rtw_dev *rtwdev); +void rtw_fw_update_beacon_work(struct work_struct *work); void rtw_send_rsvd_page_h2c(struct rtw_dev *rtwdev); int rtw_dump_drv_rsvd_page(struct rtw_dev *rtwdev, u32 offset, u32 size, u32 *buf); diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c index 30903c567cd9..4310362dc333 100644 --- a/drivers/net/wireless/realtek/rtw88/mac80211.c +++ b/drivers/net/wireless/realtek/rtw88/mac80211.c @@ -493,9 +493,7 @@ static int rtw_ops_set_tim(struct ieee80211_hw *hw, struct ieee80211_sta *sta, { struct rtw_dev *rtwdev = hw->priv; - mutex_lock(&rtwdev->mutex); - rtw_fw_download_rsvd_page(rtwdev); - mutex_unlock(&rtwdev->mutex); + ieee80211_queue_work(hw, &rtwdev->update_beacon_work); return 0; } diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index 14289f83feb5..efabd5b1bf5b 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1442,6 +1442,7 @@ void rtw_core_stop(struct rtw_dev *rtwdev) mutex_unlock(&rtwdev->mutex); cancel_work_sync(&rtwdev->c2h_work); + cancel_work_sync(&rtwdev->update_beacon_work); cancel_delayed_work_sync(&rtwdev->watch_dog_work); cancel_delayed_work_sync(&coex->bt_relink_work); cancel_delayed_work_sync(&coex->bt_reenable_work); @@ -1998,6 +1999,7 @@ int rtw_core_init(struct rtw_dev *rtwdev) INIT_WORK(&rtwdev->c2h_work, rtw_c2h_work); INIT_WORK(&rtwdev->ips_work, rtw_ips_work); INIT_WORK(&rtwdev->fw_recovery_work, rtw_fw_recovery_work); + INIT_WORK(&rtwdev->update_beacon_work, rtw_fw_update_beacon_work); INIT_WORK(&rtwdev->ba_work, rtw_txq_ba_work); skb_queue_head_init(&rtwdev->c2h_queue); skb_queue_head_init(&rtwdev->coex.queue); diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h index 0baaf5a32e82..c02be4ac159e 100644 --- a/drivers/net/wireless/realtek/rtw88/main.h +++ b/drivers/net/wireless/realtek/rtw88/main.h @@ -2008,6 +2008,7 @@ struct rtw_dev { struct work_struct c2h_work; struct work_struct ips_work; struct work_struct fw_recovery_work; + struct work_struct update_beacon_work; /* used to protect txqs list */ spinlock_t txq_lock; From 3a2cd89bfbeb10012eb90857ce641d34f0151c4c Mon Sep 17 00:00:00 2001 From: huhai Date: Thu, 26 May 2022 18:12:13 +0800 Subject: [PATCH 43/80] net: ipv4: Avoid bounds check warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following build warning when CONFIG_IPV6 is not set: In function ‘fortify_memcpy_chk’, inlined from ‘tcp_md5_do_add’ at net/ipv4/tcp_ipv4.c:1210:2: ./include/linux/fortify-string.h:328:4: error: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror=attribute-warning] 328 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Suggested-by: Paolo Abeni Signed-off-by: huhai Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220526101213.2392980-1-zhanggenjian@kylinos.cn Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index dac2650f3863..fe8f23b95d32 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1207,8 +1207,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key->l3index = l3index; key->flags = flags; memcpy(&key->addr, addr, - (family == AF_INET6) ? sizeof(struct in6_addr) : - sizeof(struct in_addr)); + (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) ? sizeof(struct in6_addr) : + sizeof(struct in_addr)); hlist_add_head_rcu(&key->node, &md5sig->head); return 0; } From d7cd5e06c9dd70a82f1461c7b5f676bc03f5cd61 Mon Sep 17 00:00:00 2001 From: Viorel Suman Date: Fri, 27 May 2022 11:49:34 +0300 Subject: [PATCH 44/80] net: phy: at803x: disable WOL at probe Before 7beecaf7d507b ("net: phy: at803x: improve the WOL feature") patch "at803x_get_wol" implementation used AT803X_INTR_ENABLE_WOL value to set WAKE_MAGIC flag, and now AT803X_WOL_EN value is used for the same purpose. The problem here is that the values of these two bits are different after hardware reset: AT803X_INTR_ENABLE_WOL=0 after hardware reset, but AT803X_WOL_EN=1. So now, if called right after boot, "at803x_get_wol" will set WAKE_MAGIC flag, even if WOL function is not enabled by calling "at803x_set_wol" function. The patch disables WOL function on probe thus the behavior is consistent. Fixes: 7beecaf7d507b ("net: phy: at803x: improve the WOL feature") Signed-off-by: Viorel Suman Link: https://lore.kernel.org/r/20220527084935.235274-1-viorel.suman@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/at803x.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 73926006d319..6a467e7817a6 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -433,20 +433,21 @@ static void at803x_context_restore(struct phy_device *phydev, static int at803x_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { - struct net_device *ndev = phydev->attached_dev; - const u8 *mac; int ret, irq_enabled; - unsigned int i; - static const unsigned int offsets[] = { - AT803X_LOC_MAC_ADDR_32_47_OFFSET, - AT803X_LOC_MAC_ADDR_16_31_OFFSET, - AT803X_LOC_MAC_ADDR_0_15_OFFSET, - }; - - if (!ndev) - return -ENODEV; if (wol->wolopts & WAKE_MAGIC) { + struct net_device *ndev = phydev->attached_dev; + const u8 *mac; + unsigned int i; + static const unsigned int offsets[] = { + AT803X_LOC_MAC_ADDR_32_47_OFFSET, + AT803X_LOC_MAC_ADDR_16_31_OFFSET, + AT803X_LOC_MAC_ADDR_0_15_OFFSET, + }; + + if (!ndev) + return -ENODEV; + mac = (const u8 *) ndev->dev_addr; if (!is_valid_ether_addr(mac)) @@ -857,6 +858,9 @@ static int at803x_probe(struct phy_device *phydev) if (phydev->drv->phy_id == ATH8031_PHY_ID) { int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG); int mode_cfg; + struct ethtool_wolinfo wol = { + .wolopts = 0, + }; if (ccr < 0) goto err; @@ -872,6 +876,13 @@ static int at803x_probe(struct phy_device *phydev) priv->is_fiber = true; break; } + + /* Disable WOL by default */ + ret = at803x_set_wol(phydev, &wol); + if (ret < 0) { + phydev_err(phydev, "failed to disable WOL on probe: %d\n", ret); + goto err; + } } return 0; From 4a1f14df55d1e9ecdfa797a87a80131207cbd66f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 30 May 2022 14:26:39 +0800 Subject: [PATCH 45/80] bonding: show NS IPv6 targets in proc master info When adding bond new parameter ns_targets. I forgot to print this in bond master proc info. After updating, the bond master info will look like: ARP IP target/s (n.n.n.n form): 192.168.1.254 NS IPv6 target/s (XX::XX form): 2022::1, 2022::2 Fixes: 4e24be018eb9 ("bonding: add new parameter ns_targets") Reported-by: Li Liang Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20220530062639.37179-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_procfs.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index cfe37be42be4..43be458422b3 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -129,6 +129,21 @@ static void bond_info_show_master(struct seq_file *seq) printed = 1; } seq_printf(seq, "\n"); + +#if IS_ENABLED(CONFIG_IPV6) + printed = 0; + seq_printf(seq, "NS IPv6 target/s (xx::xx form):"); + + for (i = 0; (i < BOND_MAX_NS_TARGETS); i++) { + if (ipv6_addr_any(&bond->params.ns_targets[i])) + break; + if (printed) + seq_printf(seq, ","); + seq_printf(seq, " %pI6c", &bond->params.ns_targets[i]); + printed = 1; + } + seq_printf(seq, "\n"); +#endif } if (BOND_MODE(bond) == BOND_MODE_8023AD) { From 3e0b8f529c10037ae0b369fc892e524eae5a5485 Mon Sep 17 00:00:00 2001 From: Arun Ajith S Date: Mon, 30 May 2022 10:14:14 +0000 Subject: [PATCH 46/80] net/ipv6: Expand and rename accept_unsolicited_na to accept_untracked_na RFC 9131 changes default behaviour of handling RX of NA messages when the corresponding entry is absent in the neighbour cache. The current implementation is limited to accept just unsolicited NAs. However, the RFC is more generic where it also accepts solicited NAs. Both types should result in adding a STALE entry for this case. Expand accept_untracked_na behaviour to also accept solicited NAs to be compliant with the RFC and rename the sysctl knob to accept_untracked_na. Fixes: f9a2fb73318e ("net/ipv6: Introduce accept_unsolicited_na knob to implement router-side changes for RFC9131") Signed-off-by: Arun Ajith S Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220530101414.65439-1-aajith@arista.com Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 23 ++++------ include/linux/ipv6.h | 2 +- include/uapi/linux/ipv6.h | 2 +- net/ipv6/addrconf.c | 6 +-- net/ipv6/ndisc.c | 42 +++++++++++-------- .../net/ndisc_unsolicited_na_test.sh | 23 +++++----- 6 files changed, 50 insertions(+), 48 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b882d4238581..04216564a03c 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2474,21 +2474,16 @@ drop_unsolicited_na - BOOLEAN By default this is turned off. -accept_unsolicited_na - BOOLEAN - Add a new neighbour cache entry in STALE state for routers on receiving an - unsolicited neighbour advertisement with target link-layer address option - specified. This is as per router-side behavior documented in RFC9131. - This has lower precedence than drop_unsolicited_na. +accept_untracked_na - BOOLEAN + Add a new neighbour cache entry in STALE state for routers on receiving a + neighbour advertisement (either solicited or unsolicited) with target + link-layer address option specified if no neighbour entry is already + present for the advertised IPv6 address. Without this knob, NAs received + for untracked addresses (absent in neighbour cache) are silently ignored. - ==== ====== ====== ============================================== - drop accept fwding behaviour - ---- ------ ------ ---------------------------------------------- - 1 X X Drop NA packet and don't pass up the stack - 0 0 X Pass NA packet up the stack, don't update NC - 0 1 0 Pass NA packet up the stack, don't update NC - 0 1 1 Pass NA packet up the stack, and add a STALE - NC entry - ==== ====== ====== ============================================== + This is as per router-side behaviour documented in RFC9131. + + This has lower precedence than drop_unsolicited_na. This will optimize the return path for the initial off-link communication that is initiated by a directly connected host, by ensuring that diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 38c8203d52cb..37dfdcfcdd54 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -61,7 +61,7 @@ struct ipv6_devconf { __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; __s32 drop_unsolicited_na; - __s32 accept_unsolicited_na; + __s32 accept_untracked_na; struct ipv6_stable_secret { bool initialized; struct in6_addr secret; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 549ddeaf788b..03cdbe798fe3 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -194,7 +194,7 @@ enum { DEVCONF_IOAM6_ID, DEVCONF_IOAM6_ID_WIDE, DEVCONF_NDISC_EVICT_NOCARRIER, - DEVCONF_ACCEPT_UNSOLICITED_NA, + DEVCONF_ACCEPT_UNTRACKED_NA, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ca0aa744593e..1b1932502e9e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5586,7 +5586,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; - array[DEVCONF_ACCEPT_UNSOLICITED_NA] = cnf->accept_unsolicited_na; + array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na; } static inline size_t inet6_ifla6_size(void) @@ -7038,8 +7038,8 @@ static const struct ctl_table addrconf_sysctl[] = { .extra2 = (void *)SYSCTL_ONE, }, { - .procname = "accept_unsolicited_na", - .data = &ipv6_devconf.accept_unsolicited_na, + .procname = "accept_untracked_na", + .data = &ipv6_devconf.accept_untracked_na, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 254addad0dd3..b0dfe97ea4ee 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -979,7 +979,7 @@ static void ndisc_recv_na(struct sk_buff *skb) struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_ifaddr *ifp; struct neighbour *neigh; - bool create_neigh; + u8 new_state; if (skb->len < sizeof(struct nd_msg)) { ND_PRINTK(2, warn, "NA: packet too short\n"); @@ -1000,7 +1000,7 @@ static void ndisc_recv_na(struct sk_buff *skb) /* For some 802.11 wireless deployments (and possibly other networks), * there will be a NA proxy and unsolicitd packets are attacks * and thus should not be accepted. - * drop_unsolicited_na takes precedence over accept_unsolicited_na + * drop_unsolicited_na takes precedence over accept_untracked_na */ if (!msg->icmph.icmp6_solicited && idev && idev->cnf.drop_unsolicited_na) @@ -1041,25 +1041,33 @@ static void ndisc_recv_na(struct sk_buff *skb) in6_ifa_put(ifp); return; } + + neigh = neigh_lookup(&nd_tbl, &msg->target, dev); + /* RFC 9131 updates original Neighbour Discovery RFC 4861. - * An unsolicited NA can now create a neighbour cache entry - * on routers if it has Target LL Address option. + * NAs with Target LL Address option without a corresponding + * entry in the neighbour cache can now create a STALE neighbour + * cache entry on routers. + * + * entry accept fwding solicited behaviour + * ------- ------ ------ --------- ---------------------- + * present X X 0 Set state to STALE + * present X X 1 Set state to REACHABLE + * absent 0 X X Do nothing + * absent 1 0 X Do nothing + * absent 1 1 X Add a new STALE entry * - * drop accept fwding behaviour - * ---- ------ ------ ---------------------------------------------- - * 1 X X Drop NA packet and don't pass up the stack - * 0 0 X Pass NA packet up the stack, don't update NC - * 0 1 0 Pass NA packet up the stack, don't update NC - * 0 1 1 Pass NA packet up the stack, and add a STALE - * NC entry * Note that we don't do a (daddr == all-routers-mcast) check. */ - create_neigh = !msg->icmph.icmp6_solicited && lladdr && - idev && idev->cnf.forwarding && - idev->cnf.accept_unsolicited_na; - neigh = __neigh_lookup(&nd_tbl, &msg->target, dev, create_neigh); + new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE; + if (!neigh && lladdr && + idev && idev->cnf.forwarding && + idev->cnf.accept_untracked_na) { + neigh = neigh_create(&nd_tbl, &msg->target, dev); + new_state = NUD_STALE; + } - if (neigh) { + if (neigh && !IS_ERR(neigh)) { u8 old_flags = neigh->flags; struct net *net = dev_net(dev); @@ -1079,7 +1087,7 @@ static void ndisc_recv_na(struct sk_buff *skb) } ndisc_update(dev, neigh, lladdr, - msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, + new_state, NEIGH_UPDATE_F_WEAK_OVERRIDE| (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh index f508657ee126..86e621b7b9c7 100755 --- a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh +++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh @@ -1,15 +1,14 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# This test is for the accept_unsolicited_na feature to +# This test is for the accept_untracked_na feature to # enable RFC9131 behaviour. The following is the test-matrix. # drop accept fwding behaviour # ---- ------ ------ ---------------------------------------------- -# 1 X X Drop NA packet and don't pass up the stack -# 0 0 X Pass NA packet up the stack, don't update NC -# 0 1 0 Pass NA packet up the stack, don't update NC -# 0 1 1 Pass NA packet up the stack, and add a STALE -# NC entry +# 1 X X Don't update NC +# 0 0 X Don't update NC +# 0 1 0 Don't update NC +# 0 1 1 Add a STALE NC entry ret=0 # Kselftest framework requirement - SKIP code is 4. @@ -72,7 +71,7 @@ setup() set -e local drop_unsolicited_na=$1 - local accept_unsolicited_na=$2 + local accept_untracked_na=$2 local forwarding=$3 # Setup two namespaces and a veth tunnel across them. @@ -93,7 +92,7 @@ setup() ${IP_ROUTER_EXEC} sysctl -qw \ ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na} ${IP_ROUTER_EXEC} sysctl -qw \ - ${ROUTER_CONF}.accept_unsolicited_na=${accept_unsolicited_na} + ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} ${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0 ${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF} @@ -144,13 +143,13 @@ link_up() { verify_ndisc() { local drop_unsolicited_na=$1 - local accept_unsolicited_na=$2 + local accept_untracked_na=$2 local forwarding=$3 neigh_show_output=$(${IP_ROUTER} neigh show \ to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale) if [ ${drop_unsolicited_na} -eq 0 ] && \ - [ ${accept_unsolicited_na} -eq 1 ] && \ + [ ${accept_untracked_na} -eq 1 ] && \ [ ${forwarding} -eq 1 ]; then # Neighbour entry expected to be present for 011 case [[ ${neigh_show_output} ]] @@ -179,14 +178,14 @@ test_unsolicited_na_combination() { test_unsolicited_na_common $1 $2 $3 test_msg=("test_unsolicited_na: " "drop_unsolicited_na=$1 " - "accept_unsolicited_na=$2 " + "accept_untracked_na=$2 " "forwarding=$3") log_test $? 0 "${test_msg[*]}" cleanup } test_unsolicited_na_combinations() { - # Args: drop_unsolicited_na accept_unsolicited_na forwarding + # Args: drop_unsolicited_na accept_untracked_na forwarding # Expect entry test_unsolicited_na_combination 0 1 1 From 09e545f7381459c015b6fa0cd0ac6f010ef8cc25 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 30 May 2022 13:34:59 +0200 Subject: [PATCH 47/80] xen/netback: fix incorrect usage of RING_HAS_UNCONSUMED_REQUESTS() Commit 6fac592cca60 ("xen: update ring.h") missed to fix one use case of RING_HAS_UNCONSUMED_REQUESTS(). Reported-by: Jan Beulich Fixes: 6fac592cca60 ("xen: update ring.h") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Acked-by: Wei Liu Link: https://lore.kernel.org/r/20220530113459.20124-1-jgross@suse.com Signed-off-by: Paolo Abeni --- drivers/net/xen-netback/netback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 0f7fd159f0f2..d93814c14a23 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -828,7 +828,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, break; } - work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx); + work_to_do = XEN_RING_NR_UNCONSUMED_REQUESTS(&queue->tx); if (!work_to_do) break; From 80b2bd737d0e833e6a2b77e482e5a714a79c86a4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 24 May 2022 15:59:27 +0300 Subject: [PATCH 48/80] net/mlx5: Don't use already freed action pointer The call to mlx5dr_action_destroy() releases "action" memory. That pointer is set to miss_action later and generates the following smatch error: drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c:53 set_miss_action() warn: 'action' was already freed. Make sure that the pointer is always valid by setting NULL after destroy. Fixes: 6a48faeeca10 ("net/mlx5: Add direct rule fs_cmd implementation") Reported-by: Dan Carpenter Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 728f81882589..6a9abba92df6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -44,11 +44,10 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns, err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action); if (err && action) { err = mlx5dr_action_destroy(action); - if (err) { - action = NULL; - mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", - err); - } + if (err) + mlx5_core_err(ns->dev, + "Failed to destroy action (%d)\n", err); + action = NULL; } ft->fs_dr_table.miss_action = action; if (old_miss_action) { From 66cb64e292d21588bdb831f08a7ec0ff04d6380d Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Mon, 2 May 2022 10:51:30 +0300 Subject: [PATCH 49/80] net/mlx5e: TC NIC mode, fix tc chains miss table The cited commit changed promisc table to be created on demand with the highest priority in the NIC table replacing the vlan table, this caused tc NIC tables miss flow to skip the prmoisc table because it use vlan table as miss table. OVS offload in NIC mode use promisc by default so any unicast packet which will be handled by tc NIC tables miss flow will skip the promisc rule and will be dropped. Fix this by adding new empty table in new tc level with low priority and point the nic tc chain miss to it, the new table is managed so it will point to vlan table if promisc is disabled and to promisc table if enabled. Fixes: 1c46d7409f30 ("net/mlx5e: Optimize promiscuous mode") Signed-off-by: Maor Dickman Reviewed-by: Paul Blakey Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/fs.h | 2 + .../net/ethernet/mellanox/mlx5/core/en_tc.c | 38 ++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 4130a871de61..6e3a90a959e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -12,6 +12,7 @@ struct mlx5e_post_act; enum { MLX5E_TC_FT_LEVEL = 0, MLX5E_TC_TTC_FT_LEVEL, + MLX5E_TC_MISS_LEVEL, }; struct mlx5e_tc_table { @@ -20,6 +21,7 @@ struct mlx5e_tc_table { */ struct mutex t_lock; struct mlx5_flow_table *t; + struct mlx5_flow_table *miss_t; struct mlx5_fs_chains *chains; struct mlx5e_post_act *post_act; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 49dea02a12d2..34bf11cdf90f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4714,6 +4714,33 @@ static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev) return tc_tbl_size; } +static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table **ft = &priv->fs.tc.miss_t; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + int err = 0; + + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = MLX5E_TC_MISS_LEVEL; + ft_attr.prio = 0; + ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); + + *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(*ft)) { + err = PTR_ERR(*ft); + netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err); + } + + return err; +} + +static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_flow_table(priv->fs.tc.miss_t); +} + int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; @@ -4746,19 +4773,23 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) } tc->mapping = chains_mapping; + err = mlx5e_tc_nic_create_miss_table(priv); + if (err) + goto err_chains; + if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev); attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; - attr.default_ft = mlx5e_vlan_get_flowtable(priv->fs.vlan); + attr.default_ft = priv->fs.tc.miss_t; attr.mapping = chains_mapping; tc->chains = mlx5_chains_create(dev, &attr); if (IS_ERR(tc->chains)) { err = PTR_ERR(tc->chains); - goto err_chains; + goto err_miss; } tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); @@ -4781,6 +4812,8 @@ err_reg: mlx5_tc_ct_clean(tc->ct); mlx5e_tc_post_act_destroy(tc->post_act); mlx5_chains_destroy(tc->chains); +err_miss: + mlx5e_tc_nic_destroy_miss_table(priv); err_chains: mapping_destroy(chains_mapping); err_mapping: @@ -4821,6 +4854,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) mlx5e_tc_post_act_destroy(tc->post_act); mapping_destroy(tc->mapping); mlx5_chains_destroy(tc->chains); + mlx5e_tc_nic_destroy_miss_table(priv); } int mlx5e_tc_ht_init(struct rhashtable *tc_ht) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 84caffe4c278..fdcf7f529330 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -114,7 +114,7 @@ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) #define KERNEL_NIC_TC_NUM_PRIOS 1 -#define KERNEL_NIC_TC_NUM_LEVELS 2 +#define KERNEL_NIC_TC_NUM_LEVELS 3 #define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 From 1f2856cde64baa78475e6d3c601fb7b7f693a161 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 23 May 2022 19:12:21 +0300 Subject: [PATCH 50/80] net/mlx5: CT: Fix header-rewrite re-use for tupels Tuple entries that don't have nat configured for them which are added to the ct nat table will always create a new modify header, as we don't check for possible re-use on them. The same for tuples that have nat configured for them but are added to ct table. Fix the above by only avoiding wasteful re-use lookup for actually natted entries in ct nat table. Fixes: 7fac5c2eced3 ("net/mlx5: CT: Avoid reusing modify header context for natted entries") Signed-off-by: Paul Blakey Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index bceea7a1589e..25f51f80a9b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -715,7 +715,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_flow_attr *attr, struct flow_rule *flow_rule, struct mlx5e_mod_hdr_handle **mh, - u8 zone_restore_id, bool nat) + u8 zone_restore_id, bool nat_table, bool has_nat) { DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); @@ -731,11 +731,12 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, &attr->ct_attr.ct_labels_id); if (err) return -EOPNOTSUPP; - if (nat) { - err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, - &mod_acts); - if (err) - goto err_mapping; + if (nat_table) { + if (has_nat) { + err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts); + if (err) + goto err_mapping; + } ct_state |= MLX5_CT_STATE_NAT_BIT; } @@ -750,7 +751,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, if (err) goto err_mapping; - if (nat) { + if (nat_table && has_nat) { attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type, mod_acts.num_actions, mod_acts.actions); @@ -818,7 +819,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &zone_rule->mh, - zone_restore_id, nat); + zone_restore_id, + nat, + mlx5_tc_ct_entry_has_nat(entry)); if (err) { ct_dbg("Failed to create ct entry mod hdr"); goto err_mod_hdr; From 2e642afb61b24401a7ec819d27ddcd69c7c29784 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 15 Apr 2022 16:19:15 +0300 Subject: [PATCH 51/80] net/mlx5e: Disable softirq in mlx5e_activate_rq to avoid race condition When the driver activates the channels, it assumes NAPI isn't running yet. mlx5e_activate_rq posts a NOP WQE to ICOSQ to trigger a hardware interrupt and start NAPI, which will run mlx5e_alloc_rx_mpwqe and post UMR WQEs to ICOSQ to be able to receive packets with striding RQ. Unfortunately, a race condition is possible if NAPI is triggered by something else (for example, TX) at a bad timing, before mlx5e_activate_rq finishes. In this case, mlx5e_alloc_rx_mpwqe may post UMR WQEs to ICOSQ, and with the bad timing, the wqe_info of the first UMR may be overwritten by the wqe_info of the NOP posted by mlx5e_activate_rq. The consequence is that icosq->db.wqe_info[0].num_wqebbs will be changed from MLX5E_UMR_WQEBBS to 1, disrupting the integrity of the array-based linked list in wqe_info[]. mlx5e_poll_ico_cq will hang in an infinite loop after processing wqe_info[0], because after the corruption, the next item to be processed will be wqe_info[1], which is filled with zeros, and `sqcc += wi->num_wqebbs` will never move further. This commit fixes this race condition by using async_icosq to post the NOP and trigger the interrupt. async_icosq is always protected with a spinlock, eliminating the race condition. Fixes: bc77b240b3c5 ("net/mlx5e: Add fragmented memory support for RX multi packet WQE") Signed-off-by: Maxim Mikityanskiy Reported-by: Karsten Nielsen Reviewed-by: Tariq Toukan Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++++ .../net/ethernet/mellanox/mlx5/core/en/ptp.c | 1 + .../mellanox/mlx5/core/en/reporter_rx.c | 6 +++++ .../net/ethernet/mellanox/mlx5/core/en/trap.c | 1 + .../ethernet/mellanox/mlx5/core/en/xsk/pool.c | 1 + .../mellanox/mlx5/core/en/xsk/setup.c | 5 +--- .../net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++++++++++++------ 7 files changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 65d3c4865abf..b6c15efe92ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -764,6 +764,7 @@ struct mlx5e_rq { u8 wq_type; u32 rqn; struct mlx5_core_dev *mdev; + struct mlx5e_channel *channel; u32 umr_mkey; struct mlx5e_dma_info wqe_overflow; @@ -1076,6 +1077,9 @@ void mlx5e_close_cq(struct mlx5e_cq *cq); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); +void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c); +void mlx5e_trigger_napi_sched(struct napi_struct *napi); + int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); void mlx5e_close_channels(struct mlx5e_channels *chs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 335b20b6383b..047f88f09203 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -736,6 +736,7 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c) if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { mlx5e_ptp_rx_set_fs(c->priv); mlx5e_activate_rq(&c->rq); + mlx5e_trigger_napi_sched(&c->napi); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 2684e9da9f41..fc366e66d0b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -123,6 +123,8 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) xskrq->stats->recover++; } + mlx5e_trigger_napi_icosq(icosq->channel); + mutex_unlock(&icosq->channel->icosq_recovery_lock); return 0; @@ -166,6 +168,10 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); mlx5e_activate_rq(rq); rq->stats->recover++; + if (rq->channel) + mlx5e_trigger_napi_icosq(rq->channel); + else + mlx5e_trigger_napi_sched(rq->cq.napi); return 0; out: clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 857840ab1e91..11f2a7fb72a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -179,6 +179,7 @@ static void mlx5e_activate_trap(struct mlx5e_trap *trap) { napi_enable(&trap->napi); mlx5e_activate_rq(&trap->rq); + mlx5e_trigger_napi_sched(&trap->napi); } void mlx5e_deactivate_trap(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 279cd8f4e79f..2c520394aa1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -117,6 +117,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, goto err_remove_pool; mlx5e_activate_xsk(c); + mlx5e_trigger_napi_icosq(c); /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide * any Fill Ring entries at the setup stage. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 3ad7f1301fa8..98ed9ef3a6bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -64,6 +64,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, rq->clock = &mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; + rq->channel = c; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->xdpsq = &c->rq_xdpsq; @@ -179,10 +180,6 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c) mlx5e_reporter_icosq_resume_recovery(c); /* TX queue is created active. */ - - spin_lock_bh(&c->async_icosq_lock); - mlx5e_trigger_irq(&c->async_icosq); - spin_unlock_bh(&c->async_icosq_lock); } void mlx5e_deactivate_xsk(struct mlx5e_channel *c) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 05c015515cce..930a5402c817 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -475,6 +475,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->clock = &mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; + rq->channel = c; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->xdpsq = &c->rq_xdpsq; @@ -1066,13 +1067,6 @@ err_free_rq: void mlx5e_activate_rq(struct mlx5e_rq *rq) { set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - if (rq->icosq) { - mlx5e_trigger_irq(rq->icosq); - } else { - local_bh_disable(); - napi_schedule(rq->cq.napi); - local_bh_enable(); - } } void mlx5e_deactivate_rq(struct mlx5e_rq *rq) @@ -2227,6 +2221,20 @@ static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) return 0; } +void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c) +{ + spin_lock_bh(&c->async_icosq_lock); + mlx5e_trigger_irq(&c->async_icosq); + spin_unlock_bh(&c->async_icosq_lock); +} + +void mlx5e_trigger_napi_sched(struct napi_struct *napi) +{ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct mlx5e_channel_param *cparam, @@ -2308,6 +2316,8 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_activate_xsk(c); + + mlx5e_trigger_napi_icosq(c); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) From 3fc2a9e89b3508a5cc0c324f26d7b4740ba8c456 Mon Sep 17 00:00:00 2001 From: Changcheng Liu Date: Tue, 26 Apr 2022 21:28:14 +0800 Subject: [PATCH 52/80] net/mlx5: correct ECE offset in query qp output ECE field should be after opt_param_mask in query qp output. Fixes: 6b646a7e4af6 ("net/mlx5: Add ability to read and write ECE options") Signed-off-by: Changcheng Liu Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 78b3d3465dd7..2cd7d611e7b3 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5176,12 +5176,11 @@ struct mlx5_ifc_query_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x20]; - u8 ece[0x20]; + u8 reserved_at_40[0x40]; u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; From f6279f113ad593971999c877eb69dc3d36a75894 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 23 May 2022 15:39:13 +0300 Subject: [PATCH 53/80] net/mlx5e: Update netdev features after changing XDP state Some features (LRO, HW GRO) conflict with XDP. If there is an attempt to enable such features while XDP is active, they will be set to `off [requested on]`. In order to activate these features after XDP is turned off, the driver needs to call netdev_update_features(). This commit adds this missing call after XDP state changes. Fixes: cf6e34c8c22f ("net/mlx5e: Properly block LRO when XDP is enabled") Fixes: b0617e7b3500 ("net/mlx5e: Properly block HW GRO when XDP is enabled") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 930a5402c817..087952b84ccb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4569,6 +4569,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) unlock: mutex_unlock(&priv->state_lock); + + /* Need to fix some features. */ + if (!err) + netdev_update_features(netdev); + return err; } From 1c5de097bea31760c3f0467ac0c84ba0dc3525d5 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 19 Feb 2021 23:10:47 -0800 Subject: [PATCH 54/80] net/mlx5: Fix mlx5_get_next_dev() peer device matching In some use-cases, mlx5 instances will need to search for their peer device (the other port on the same HCA). For that, mlx5 device matching mechanism relied on auxiliary_find_device() to search, and used a bad matching callback function. This approach has two issues: 1) next_phys_dev() the matching function, assumed all devices are of the type mlx5_adev (mlx5 auxiliary device) which is wrong and could lead to crashes, this worked for a while, since only lately other drivers started registering auxiliary devices. 2) using the auxiliary class bus (auxiliary_find_device) to search for mlx5_core_dev devices, who are actually PCIe device instances, is wrong. This works since mlx5_core always has at least one mlx5_adev instance hanging around in the aux bus. As suggested by others we can fix 1. by comparing device names prefixes if they have the string "mlx5_core" in them, which is not a best practice ! but even with that fixed, still 2. needs fixing, we are trying to match pcie device peers so we should look in the right bus (pci bus), hence this fix. The fix: 1) search the pci bus for mlx5 peer devices, instead of the aux bus 2) to validated devices are the same type "mlx5_core_dev" compare if they have the same driver, which is bulletproof. This wouldn't have worked with the aux bus since the various mlx5 aux device types don't share the same driver, even if they share the same device wrapper struct (mlx5_adev) "which helped to find the parent device" Fixes: a925b5e309c9 ("net/mlx5: Register mlx5 devices to auxiliary virtual bus") Reported-by: Alexander Lobakin Reported-by: Maher Sanalla Signed-off-by: Saeed Mahameed Reviewed-by: Leon Romanovsky Reviewed-by: Mark Bloch Reviewed-by: Maher Sanalla --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 11f7c03ae81b..0eb9d74547f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -571,18 +571,32 @@ static int _next_phys_dev(struct mlx5_core_dev *mdev, return 1; } +static void *pci_get_other_drvdata(struct device *this, struct device *other) +{ + if (this->driver != other->driver) + return NULL; + + return pci_get_drvdata(to_pci_dev(other)); +} + static int next_phys_dev(struct device *dev, const void *data) { - struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); - struct mlx5_core_dev *mdev = madev->mdev; + struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data; + + mdev = pci_get_other_drvdata(this->device, dev); + if (!mdev) + return 0; return _next_phys_dev(mdev, data); } static int next_phys_dev_lag(struct device *dev, const void *data) { - struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); - struct mlx5_core_dev *mdev = madev->mdev; + struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data; + + mdev = pci_get_other_drvdata(this->device, dev); + if (!mdev) + return 0; if (!MLX5_CAP_GEN(mdev, vport_group_manager) || !MLX5_CAP_GEN(mdev, lag_master) || @@ -596,19 +610,17 @@ static int next_phys_dev_lag(struct device *dev, const void *data) static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev, int (*match)(struct device *dev, const void *data)) { - struct auxiliary_device *adev; - struct mlx5_adev *madev; + struct device *next; if (!mlx5_core_is_pf(dev)) return NULL; - adev = auxiliary_find_device(NULL, dev, match); - if (!adev) + next = bus_find_device(&pci_bus_type, NULL, dev, match); + if (!next) return NULL; - madev = container_of(adev, struct mlx5_adev, adev); - put_device(&adev->dev); - return madev->mdev; + put_device(next); + return pci_get_drvdata(to_pci_dev(next)); } /* Must be called with intf_mutex held */ From fecf31ee395b0295f2d7260aa29946b7605f7c85 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 27 May 2022 09:56:18 +0200 Subject: [PATCH 55/80] netfilter: nf_tables: sanitize nft_set_desc_concat_parse() Add several sanity checks for nft_set_desc_concat_parse(): - validate desc->field_count not larger than desc->field_len array. - field length cannot be larger than desc->field_len (ie. U8_MAX) - total length of the concatenation cannot be larger than register array. Joint work with Florian Westphal. Fixes: f3a2181e16f1 ("netfilter: nf_tables: Support for sets with multiple ranged fields") Reported-by: Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f4f1d0a2da43..dcefb5f36b3a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4246,6 +4246,9 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, u32 len; int err; + if (desc->field_count >= ARRAY_SIZE(desc->field_len)) + return -E2BIG; + err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr, nft_concat_policy, NULL); if (err < 0) @@ -4255,9 +4258,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, return -EINVAL; len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN])); - - if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT) - return -E2BIG; + if (!len || len > U8_MAX) + return -EINVAL; desc->field_len[desc->field_count++] = len; @@ -4268,7 +4270,8 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, const struct nlattr *nla) { struct nlattr *attr; - int rem, err; + u32 num_regs = 0; + int rem, err, i; nla_for_each_nested(attr, nla, rem) { if (nla_type(attr) != NFTA_LIST_ELEM) @@ -4279,6 +4282,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, return err; } + for (i = 0; i < desc->field_count; i++) + num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32)); + + if (num_regs > NFT_REG32_COUNT) + return -E2BIG; + return 0; } From 3923b1e4406680d57da7e873da77b1683035d83f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 May 2022 18:24:05 +0200 Subject: [PATCH 56/80] netfilter: nf_tables: hold mutex on netns pre_exit path clean_net() runs in workqueue while walking over the lists, grab mutex. Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dcefb5f36b3a..f77414e13de1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9896,7 +9896,11 @@ static int __net_init nf_tables_init_net(struct net *net) static void __net_exit nf_tables_pre_exit_net(struct net *net) { + struct nftables_pernet *nft_net = nft_pernet(net); + + mutex_lock(&nft_net->commit_mutex); __nft_release_hooks(net); + mutex_unlock(&nft_net->commit_mutex); } static void __net_exit nf_tables_exit_net(struct net *net) From f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 May 2022 18:24:06 +0200 Subject: [PATCH 57/80] netfilter: nf_tables: double hook unregistration in netns path __nft_release_hooks() is called from pre_netns exit path which unregisters the hooks, then the NETDEV_UNREGISTER event is triggered which unregisters the hooks again. [ 565.221461] WARNING: CPU: 18 PID: 193 at net/netfilter/core.c:495 __nf_unregister_net_hook+0x247/0x270 [...] [ 565.246890] CPU: 18 PID: 193 Comm: kworker/u64:1 Tainted: G E 5.18.0-rc7+ #27 [ 565.253682] Workqueue: netns cleanup_net [ 565.257059] RIP: 0010:__nf_unregister_net_hook+0x247/0x270 [...] [ 565.297120] Call Trace: [ 565.300900] [ 565.304683] nf_tables_flowtable_event+0x16a/0x220 [nf_tables] [ 565.308518] raw_notifier_call_chain+0x63/0x80 [ 565.312386] unregister_netdevice_many+0x54f/0xb50 Unregister and destroy netdev hook from netns pre_exit via kfree_rcu so the NETDEV_UNREGISTER path see unregistered hooks. Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 54 ++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f77414e13de1..746be13438ef 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -222,12 +222,18 @@ err_register: } static void nft_netdev_unregister_hooks(struct net *net, - struct list_head *hook_list) + struct list_head *hook_list, + bool release_netdev) { - struct nft_hook *hook; + struct nft_hook *hook, *next; - list_for_each_entry(hook, hook_list, list) + list_for_each_entry_safe(hook, next, hook_list, list) { nf_unregister_net_hook(net, &hook->ops); + if (release_netdev) { + list_del(&hook->list); + kfree_rcu(hook, rcu); + } + } } static int nf_tables_register_hook(struct net *net, @@ -253,9 +259,10 @@ static int nf_tables_register_hook(struct net *net, return nf_register_net_hook(net, &basechain->ops); } -static void nf_tables_unregister_hook(struct net *net, - const struct nft_table *table, - struct nft_chain *chain) +static void __nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain, + bool release_netdev) { struct nft_base_chain *basechain; const struct nf_hook_ops *ops; @@ -270,11 +277,19 @@ static void nf_tables_unregister_hook(struct net *net, return basechain->type->ops_unregister(net, ops); if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) - nft_netdev_unregister_hooks(net, &basechain->hook_list); + nft_netdev_unregister_hooks(net, &basechain->hook_list, + release_netdev); else nf_unregister_net_hook(net, &basechain->ops); } +static void nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain) +{ + return __nf_tables_unregister_hook(net, table, chain, false); +} + static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); @@ -7307,13 +7322,25 @@ static void nft_unregister_flowtable_hook(struct net *net, FLOW_BLOCK_UNBIND); } +static void __nft_unregister_flowtable_net_hooks(struct net *net, + struct list_head *hook_list, + bool release_netdev) +{ + struct nft_hook *hook, *next; + + list_for_each_entry_safe(hook, next, hook_list, list) { + nf_unregister_net_hook(net, &hook->ops); + if (release_netdev) { + list_del(&hook->list); + kfree_rcu(hook); + } + } +} + static void nft_unregister_flowtable_net_hooks(struct net *net, struct list_head *hook_list) { - struct nft_hook *hook; - - list_for_each_entry(hook, hook_list, list) - nf_unregister_net_hook(net, &hook->ops); + __nft_unregister_flowtable_net_hooks(net, hook_list, false); } static int nft_register_flowtable_net_hooks(struct net *net, @@ -9755,9 +9782,10 @@ static void __nft_release_hook(struct net *net, struct nft_table *table) struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hook(net, table, chain); + __nf_tables_unregister_hook(net, table, chain, true); list_for_each_entry(flowtable, &table->flowtables, list) - nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list); + __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list, + true); } static void __nft_release_hooks(struct net *net) From f1896d45fee90a868dfd0b3cc119e4cc1bbd7ca2 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 25 May 2022 21:25:45 -0400 Subject: [PATCH 58/80] netfilter: flowtable: fix missing FLOWI_FLAG_ANYSRC flag The nf_flow_table gets route through ip_route_output_key. If the saddr is not local one, then FLOWI_FLAG_ANYSRC flag should be set. Without this flag, the route lookup for other_dst will fail. Fixes: 3412e1641828 (netfilter: flowtable: nft_flow_route use more data for reverse route) Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index a16cf47199b7..20e5f372dd76 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -237,6 +237,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos); fl.u.ip4.flowi4_mark = pkt->skb->mark; + fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC; break; case NFPROTO_IPV6: fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; @@ -245,6 +246,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, fl.u.ip6.flowi6_iif = this_dst->dev->ifindex; fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb)); fl.u.ip6.flowi6_mark = pkt->skb->mark; + fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC; break; } From 97629b237a8cb7ac655c3969b8d5e57300ff6598 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 25 May 2022 21:25:46 -0400 Subject: [PATCH 59/80] netfilter: flowtable: fix nft_flow_route source address for nat case For snat and dnat cases, the saddr should be taken from reverse tuple. Fixes: 3412e1641828 (netfilter: flowtable: nft_flow_route use more data for reverse route) Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 20e5f372dd76..a25c88bc8b75 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -232,7 +232,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, switch (nft_pf(pkt)) { case NFPROTO_IPV4: fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; - fl.u.ip4.saddr = ct->tuplehash[dir].tuple.dst.u3.ip; + fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip; fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos); @@ -241,7 +241,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, break; case NFPROTO_IPV6: fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; - fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6; + fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.src.u3.in6; fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex; fl.u.ip6.flowi6_iif = this_dst->dev->ifindex; fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb)); From 2e8728c955ce0624b958eee6e030a37aca3a5d86 Mon Sep 17 00:00:00 2001 From: Guoju Fang Date: Sat, 28 May 2022 18:16:28 +0800 Subject: [PATCH 60/80] net: sched: add barrier to fix packet stuck problem for lockless qdisc In qdisc_run_end(), the spin_unlock() only has store-release semantic, which guarantees all earlier memory access are visible before it. But the subsequent test_bit() has no barrier semantics so may be reordered ahead of the spin_unlock(). The store-load reordering may cause a packet stuck problem. The concurrent operations can be described as below, CPU 0 | CPU 1 qdisc_run_end() | qdisc_run_begin() . | . ----> /* may be reorderd here */ | . | . | . | spin_unlock() | set_bit() | . | smp_mb__after_atomic() ---- test_bit() | spin_trylock() . | . Consider the following sequence of events: CPU 0 reorder test_bit() ahead and see MISSED = 0 CPU 1 calls set_bit() CPU 1 calls spin_trylock() and return fail CPU 0 executes spin_unlock() At the end of the sequence, CPU 0 calls spin_unlock() and does nothing because it see MISSED = 0. The skb on CPU 1 has beed enqueued but no one take it, until the next cpu pushing to the qdisc (if ever ...) will notice and dequeue it. This patch fix this by adding one explicit barrier. As spin_unlock() and test_bit() ordering is a store-load ordering, a full memory barrier smp_mb() is needed here. Fixes: a90c57f2cedd ("net: sched: fix packet stuck problem for lockless qdisc") Signed-off-by: Guoju Fang Link: https://lore.kernel.org/r/20220528101628.120193-1-gjfang@linux.alibaba.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 80973ce820f3..d6cf5116b5f9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -209,6 +209,12 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); + /* spin_unlock() only has store-release semantic. The unlock + * and test_bit() ordering is a store-load ordering, so a full + * memory barrier is needed here. + */ + smp_mb(); + if (unlikely(test_bit(__QDISC_STATE_MISSED, &qdisc->state))) __netif_schedule(qdisc); From 0a375c822497ed6ad6b5da0792a12a6f1af10c0b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 May 2022 14:37:13 -0700 Subject: [PATCH 61/80] tcp: tcp_rtx_synack() can be called from process context Laurent reported the enclosed report [1] This bug triggers with following coditions: 0) Kernel built with CONFIG_DEBUG_PREEMPT=y 1) A new passive FastOpen TCP socket is created. This FO socket waits for an ACK coming from client to be a complete ESTABLISHED one. 2) A socket operation on this socket goes through lock_sock() release_sock() dance. 3) While the socket is owned by the user in step 2), a retransmit of the SYN is received and stored in socket backlog. 4) At release_sock() time, the socket backlog is processed while in process context. 5) A SYNACK packet is cooked in response of the SYN retransmit. 6) -> tcp_rtx_synack() is called in process context. Before blamed commit, tcp_rtx_synack() was always called from BH handler, from a timer handler. Fix this by using TCP_INC_STATS() & NET_INC_STATS() which do not assume caller is in non preemptible context. [1] BUG: using __this_cpu_add() in preemptible [00000000] code: epollpep/2180 caller is tcp_rtx_synack.part.0+0x36/0xc0 CPU: 10 PID: 2180 Comm: epollpep Tainted: G OE 5.16.0-0.bpo.4-amd64 #1 Debian 5.16.12-1~bpo11+1 Hardware name: Supermicro SYS-5039MC-H8TRF/X11SCD-F, BIOS 1.7 11/23/2021 Call Trace: dump_stack_lvl+0x48/0x5e check_preemption_disabled+0xde/0xe0 tcp_rtx_synack.part.0+0x36/0xc0 tcp_rtx_synack+0x8d/0xa0 ? kmem_cache_alloc+0x2e0/0x3e0 ? apparmor_file_alloc_security+0x3b/0x1f0 inet_rtx_syn_ack+0x16/0x30 tcp_check_req+0x367/0x610 tcp_rcv_state_process+0x91/0xf60 ? get_nohz_timer_target+0x18/0x1a0 ? lock_timer_base+0x61/0x80 ? preempt_count_add+0x68/0xa0 tcp_v4_do_rcv+0xbd/0x270 __release_sock+0x6d/0xb0 release_sock+0x2b/0x90 sock_setsockopt+0x138/0x1140 ? __sys_getsockname+0x7e/0xc0 ? aa_sk_perm+0x3e/0x1a0 __sys_setsockopt+0x198/0x1e0 __x64_sys_setsockopt+0x21/0x30 do_syscall_64+0x38/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Signed-off-by: Eric Dumazet Reported-by: Laurent Fasnacht Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20220530213713.601888-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4b2284ed4a2..1c054431e358 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4115,8 +4115,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL, NULL); if (!res) { - __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); if (unlikely(tcp_passive_fastopen(sk))) tcp_sk(sk)->total_retrans++; trace_tcp_retransmit_synack(sk, req); From c4caa500ffebf64795d1c0f6f9d6f179b502c6b7 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 31 May 2022 14:37:27 +0800 Subject: [PATCH 62/80] bonding: guard ns_targets by CONFIG_IPV6 Guard ns_targets in struct bond_params by CONFIG_IPV6, which could save 256 bytes if IPv6 not configed. Also add this protection for function bond_is_ip6_target_ok() and bond_get_targets_ip6(). Remove the IS_ENABLED() check for bond_opts[] as this will make BOND_OPT_NS_TARGETS uninitialized if CONFIG_IPV6 not enabled. Add a dummy bond_option_ns_ip6_targets_set() for this situation. Fixes: 4e24be018eb9 ("bonding: add new parameter ns_targets") Signed-off-by: Hangbin Liu Acked-by: Jonathan Toppins Link: https://lore.kernel.org/r/20220531063727.224043-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 2 ++ drivers/net/bonding/bond_options.c | 10 ++++++---- include/net/bonding.h | 6 ++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3b7baaeae82c..f85372adf042 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -6159,7 +6159,9 @@ static int bond_check_params(struct bond_params *params) strscpy_pad(params->primary, primary, sizeof(params->primary)); memcpy(params->arp_targets, arp_target, sizeof(arp_target)); +#if IS_ENABLED(CONFIG_IPV6) memset(params->ns_targets, 0, sizeof(struct in6_addr) * BOND_MAX_NS_TARGETS); +#endif return 0; } diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 64f7db2627ce..1f8323ad5282 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -34,10 +34,8 @@ static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target); static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target); static int bond_option_arp_ip_targets_set(struct bonding *bond, const struct bond_opt_value *newval); -#if IS_ENABLED(CONFIG_IPV6) static int bond_option_ns_ip6_targets_set(struct bonding *bond, const struct bond_opt_value *newval); -#endif static int bond_option_arp_validate_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_arp_all_targets_set(struct bonding *bond, @@ -299,7 +297,6 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .flags = BOND_OPTFLAG_RAWVAL, .set = bond_option_arp_ip_targets_set }, -#if IS_ENABLED(CONFIG_IPV6) [BOND_OPT_NS_TARGETS] = { .id = BOND_OPT_NS_TARGETS, .name = "ns_ip6_target", @@ -307,7 +304,6 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .flags = BOND_OPTFLAG_RAWVAL, .set = bond_option_ns_ip6_targets_set }, -#endif [BOND_OPT_DOWNDELAY] = { .id = BOND_OPT_DOWNDELAY, .name = "downdelay", @@ -1254,6 +1250,12 @@ static int bond_option_ns_ip6_targets_set(struct bonding *bond, return 0; } +#else +static int bond_option_ns_ip6_targets_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + return -EPERM; +} #endif static int bond_option_arp_validate_set(struct bonding *bond, diff --git a/include/net/bonding.h b/include/net/bonding.h index b14f4c0b4e9e..cb904d356e31 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -149,7 +149,9 @@ struct bond_params { struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; +#if IS_ENABLED(CONFIG_IPV6) struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; +#endif /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; @@ -503,12 +505,14 @@ static inline int bond_is_ip_target_ok(__be32 addr) return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr); } +#if IS_ENABLED(CONFIG_IPV6) static inline int bond_is_ip6_target_ok(struct in6_addr *addr) { return !ipv6_addr_any(addr) && !ipv6_addr_loopback(addr) && !ipv6_addr_is_multicast(addr); } +#endif /* Get the oldest arp which we've received on this slave for bond's * arp_targets. @@ -746,6 +750,7 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) return -1; } +#if IS_ENABLED(CONFIG_IPV6) static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr *ip) { int i; @@ -758,6 +763,7 @@ static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr return -1; } +#endif /* exported from bond_main.c */ extern unsigned int bond_net_id; From 2965c4cdf7ad9ce0796fac5e57debb9519ea721e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Jun 2022 09:19:36 +0200 Subject: [PATCH 63/80] wifi: mac80211: fix use-after-free in chanctx code In ieee80211_vif_use_reserved_context(), when we have an old context and the new context's replace_state is set to IEEE80211_CHANCTX_REPLACE_NONE, we free the old context in ieee80211_vif_use_reserved_reassign(). Therefore, we cannot check the old_ctx anymore, so we should set it to NULL after this point. However, since the new_ctx replace state is clearly not IEEE80211_CHANCTX_REPLACES_OTHER, we're not going to do anything else in this function and can just return to avoid accessing the freed old_ctx. Cc: stable@vger.kernel.org Fixes: 5bcae31d9cb1 ("mac80211: implement multi-vif in-place reservations") Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220601091926.df419d91b165.I17a9b3894ff0b8323ce2afdb153b101124c821e5@changeid --- net/mac80211/chan.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index e3452445b363..d8246e00a10b 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1749,12 +1749,9 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata) if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) { if (old_ctx) - err = ieee80211_vif_use_reserved_reassign(sdata); - else - err = ieee80211_vif_use_reserved_assign(sdata); + return ieee80211_vif_use_reserved_reassign(sdata); - if (err) - return err; + return ieee80211_vif_use_reserved_assign(sdata); } /* From f3d671c711097a133bc36bd2bde52f1fcca783a6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 May 2022 10:28:45 +0300 Subject: [PATCH 64/80] octeontx2-af: fix error code in is_valid_offset() The is_valid_offset() function returns success/true if the call to validate_and_get_cpt_blkaddr() fails. Fixes: ecad2ce8c48f ("octeontx2-af: cn10k: Add mailbox to configure reassembly timeout") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YpXDrTPb8qV01JSP@kili Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index a79201a9a6f0..a9da85e418a4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -579,7 +579,7 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); if (blkaddr < 0) - return blkaddr; + return false; /* Registers that can be accessed from PF/VF */ if ((offset & 0xFF000) == CPT_AF_LFX_CTL(0) || From 196a888ca6571deb344468e1d7138e3273206335 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Tue, 31 May 2022 15:45:00 +0800 Subject: [PATCH 65/80] macsec: fix UAF bug for real_dev Create a new macsec device but not get reference to real_dev. That can not ensure that real_dev is freed after macsec. That will trigger the UAF bug for real_dev as following: ================================================================== BUG: KASAN: use-after-free in macsec_get_iflink+0x5f/0x70 drivers/net/macsec.c:3662 Call Trace: ... macsec_get_iflink+0x5f/0x70 drivers/net/macsec.c:3662 dev_get_iflink+0x73/0xe0 net/core/dev.c:637 default_operstate net/core/link_watch.c:42 [inline] rfc2863_policy+0x233/0x2d0 net/core/link_watch.c:54 linkwatch_do_dev+0x2a/0x150 net/core/link_watch.c:161 Allocated by task 22209: ... alloc_netdev_mqs+0x98/0x1100 net/core/dev.c:10549 rtnl_create_link+0x9d7/0xc00 net/core/rtnetlink.c:3235 veth_newlink+0x20e/0xa90 drivers/net/veth.c:1748 Freed by task 8: ... kfree+0xd6/0x4d0 mm/slub.c:4552 kvfree+0x42/0x50 mm/util.c:615 device_release+0x9f/0x240 drivers/base/core.c:2229 kobject_cleanup lib/kobject.c:673 [inline] kobject_release lib/kobject.c:704 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x1c8/0x540 lib/kobject.c:721 netdev_run_todo+0x72e/0x10b0 net/core/dev.c:10327 After commit faab39f63c1f ("net: allow out-of-order netdev unregistration") and commit e5f80fcf869a ("ipv6: give an IPv6 dev to blackhole_netdev"), we can add dev_hold_track() in macsec_dev_init() and dev_put_track() in macsec_free_netdev() to fix the problem. Fixes: 2bce1ebed17d ("macsec: fix refcnt leak in module exit routine") Reported-by: syzbot+d0e94b65ac259c29ce7a@syzkaller.appspotmail.com Signed-off-by: Ziyang Xuan Link: https://lore.kernel.org/r/20220531074500.1272846-1-william.xuanziyang@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/macsec.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 832f09ac075e..817577e713d7 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -99,6 +99,7 @@ struct pcpu_secy_stats { * struct macsec_dev - private data * @secy: SecY config * @real_dev: pointer to underlying netdevice + * @dev_tracker: refcount tracker for @real_dev reference * @stats: MACsec device stats * @secys: linked list of SecY's on the underlying device * @gro_cells: pointer to the Generic Receive Offload cell @@ -107,6 +108,7 @@ struct pcpu_secy_stats { struct macsec_dev { struct macsec_secy secy; struct net_device *real_dev; + netdevice_tracker dev_tracker; struct pcpu_secy_stats __percpu *stats; struct list_head secys; struct gro_cells gro_cells; @@ -3459,6 +3461,9 @@ static int macsec_dev_init(struct net_device *dev) if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); + /* Get macsec's reference to real_dev */ + dev_hold_track(real_dev, &macsec->dev_tracker, GFP_KERNEL); + return 0; } @@ -3704,6 +3709,8 @@ static void macsec_free_netdev(struct net_device *dev) free_percpu(macsec->stats); free_percpu(macsec->secy.tx_sc.stats); + /* Get rid of the macsec's reference to real_dev */ + dev_put_track(macsec->real_dev, &macsec->dev_tracker); } static void macsec_setup(struct net_device *dev) From e6652a8ef3e64d953168a95878fe29b934ad78ac Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 31 May 2022 11:45:44 +0300 Subject: [PATCH 66/80] net: ping6: Fix ping -6 with interface name When passing interface parameter to ping -6: $ ping -6 ::11:141:84:9 -I eth2 Results in: PING ::11:141:84:10(::11:141:84:10) from ::11:141:84:9 eth2: 56 data bytes ping: sendmsg: Invalid argument ping: sendmsg: Invalid argument Initialize the fl6's outgoing interface (OIF) before triggering ip6_datagram_send_ctl. Don't wipe fl6 after ip6_datagram_send_ctl() as changes in fl6 that may happen in the function are overwritten explicitly. Update comment accordingly. Fixes: 13651224c00b ("net: ping6: support setting basic SOL_IPV6 options via cmsg") Signed-off-by: Aya Levin Reviewed-by: Gal Pressman Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220531084544.15126-1-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- net/ipv6/ping.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index ff033d16549e..ecf3a553a0dc 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -101,6 +101,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.sockc.tsflags = sk->sk_tsflags; ipc6.sockc.mark = sk->sk_mark; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + if (msg->msg_controllen) { struct ipv6_txoptions opt = {}; @@ -112,17 +115,14 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return err; /* Changes to txoptions and flow info are not implemented, yet. - * Drop the options, fl6 is wiped below. + * Drop the options. */ ipc6.opt = NULL; } - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.saddr = np->saddr; fl6.daddr = *daddr; - fl6.flowi6_oif = oif; fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; From 86360030cc5117596626bef1d937277cd2bebe05 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 May 2022 15:10:05 +0300 Subject: [PATCH 67/80] net/sched: act_api: fix error code in tcf_ct_flow_table_fill_tuple_ipv6() The tcf_ct_flow_table_fill_tuple_ipv6() function is supposed to return false on failure. It should not return negatives because that means succes/true. Fixes: fcb6aa86532c ("act_ct: Support GRE offload") Signed-off-by: Dan Carpenter Acked-by: Toshiaki Makita Link: https://lore.kernel.org/r/YpYFnbDxFl6tQ3Bn@kili Signed-off-by: Paolo Abeni --- net/sched/act_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 8af9d6e5ba61..e013253b10d1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -548,7 +548,7 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, break; #endif default: - return -1; + return false; } if (ip6h->hop_limit <= 1) From 8d3398ba2a0d1e25690f830192b7834acab003ec Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 31 May 2022 11:43:45 +0200 Subject: [PATCH 68/80] socket: Don't use u8 type in uapi socket.h Use plain 255 instead, which also avoid introducing an additional header dependency on Fixes: 26859240e4ee ("txhash: Add socket option to control TX hash rethink behavior") Signed-off-by: Tobias Klauser Link: https://lore.kernel.org/r/20220531094345.13801-1-tklauser@distanz.ch Signed-off-by: Jakub Kicinski --- include/uapi/linux/socket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index 51d6bb2f6765..d3fcd3b5ec53 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -31,7 +31,7 @@ struct __kernel_sockaddr_storage { #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) -#define SOCK_TXREHASH_DEFAULT ((u8)-1) +#define SOCK_TXREHASH_DEFAULT 255 #define SOCK_TXREHASH_DISABLED 0 #define SOCK_TXREHASH_ENABLED 1 From 183614bff5fc1682240ab7604201e18afb118092 Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Wed, 1 Jun 2022 08:36:02 +0200 Subject: [PATCH 69/80] sfc/siena: fix considering that all channels have TX queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally, all channels have RX and TX queues, but this is not true if modparam efx_separate_tx_channels=1 is used. In that cases, some channels only have RX queues and others only TX queues (or more preciselly, they have them allocated, but not initialized). Fix efx_channel_has_tx_queues to return the correct value for this case too. This has been already done for sfc, do it also for sfc_siena. Messages shown at probe time before the fix: sfc 0000:03:00.0 ens6f0np0: MC command 0x82 inlen 544 failed rc=-22 (raw=0) arg=0 ------------[ cut here ]------------ netdevice: ens6f0np0: failed to initialise TXQ -1 WARNING: CPU: 1 PID: 626 at drivers/net/ethernet/sfc/ef10.c:2393 efx_ef10_tx_init+0x201/0x300 [sfc] [...] stripped RIP: 0010:efx_ef10_tx_init+0x201/0x300 [sfc] [...] stripped Call Trace: efx_init_tx_queue+0xaa/0xf0 [sfc] efx_start_channels+0x49/0x120 [sfc] efx_start_all+0x1f8/0x430 [sfc] efx_net_open+0x5a/0xe0 [sfc] __dev_open+0xd0/0x190 __dev_change_flags+0x1b3/0x220 dev_change_flags+0x21/0x60 [...] stripped Messages shown at remove time before the fix: sfc 0000:03:00.0 ens6f0np0: failed to flush 10 queues sfc 0000:03:00.0 ens6f0np0: failed to flush queues Fixes: 8700aff08984 ("sfc: fix channel allocation with brute force") Reported-by: Tianhao Zhao Signed-off-by: Martin Habets Tested-by: Íñigo Huguet Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/siena/net_driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/siena/net_driver.h b/drivers/net/ethernet/sfc/siena/net_driver.h index a8f6c3699c8b..c4a97fbf4672 100644 --- a/drivers/net/ethernet/sfc/siena/net_driver.h +++ b/drivers/net/ethernet/sfc/siena/net_driver.h @@ -1529,7 +1529,7 @@ static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel) static inline bool efx_channel_has_tx_queues(struct efx_channel *channel) { - return true; + return channel && channel->channel >= channel->efx->tx_channel_offset; } static inline unsigned int efx_channel_num_tx_queues(struct efx_channel *channel) From 25bde571b4a83a73bb37beacb833bbd9c38b43c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Wed, 1 Jun 2022 08:36:03 +0200 Subject: [PATCH 70/80] sfc/siena: fix wrong tx channel offset with efx_separate_tx_channels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tx_channel_offset is calculated in efx_allocate_msix_channels, but it is also calculated again in efx_set_channels because it was originally done there, and when efx_allocate_msix_channels was introduced it was forgotten to be removed from efx_set_channels. Moreover, the old calculation is wrong when using efx_separate_tx_channels because now we can have XDP channels after the TX channels, so n_channels - n_tx_channels doesn't point to the first TX channel. Remove the old calculation from efx_set_channels, and add the initialization of this variable if MSI or legacy interrupts are used, next to the initialization of the rest of the related variables, where it was missing. This has been already done for sfc, do it also for sfc_siena. Fixes: 3990a8fffbda ("sfc: allocate channels for XDP tx queues") Reported-by: Tianhao Zhao Signed-off-by: Íñigo Huguet Acked-by: Martin Habets Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/siena/efx_channels.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/siena/efx_channels.c b/drivers/net/ethernet/sfc/siena/efx_channels.c index 2465cf4d505c..017212a40df3 100644 --- a/drivers/net/ethernet/sfc/siena/efx_channels.c +++ b/drivers/net/ethernet/sfc/siena/efx_channels.c @@ -299,6 +299,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1; efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->tx_channel_offset = 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; rc = pci_enable_msi(efx->pci_dev); @@ -319,6 +320,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1 + (efx_siena_separate_tx_channels ? 1 : 0); efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->tx_channel_offset = 1; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; efx->legacy_irq = efx->pci_dev->irq; @@ -958,10 +960,6 @@ int efx_siena_set_channels(struct efx_nic *efx) struct efx_channel *channel; int rc; - efx->tx_channel_offset = - efx_siena_separate_tx_channels ? - efx->n_channels - efx->n_tx_channels : 0; - if (efx->xdp_tx_queue_count) { EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); From 9f4fc18bf285f20c1498f8fcfb586fa70a070fb5 Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Wed, 1 Jun 2022 12:05:31 +0800 Subject: [PATCH 71/80] net: usb: qmi_wwan: Add support for Cinterion MV31 with new baseline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding support for Cinterion device MV31 with Qualcomm new baseline. Use different PIDs to separate it from previous base line products. All interfaces settings keep same as previous. T: Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 7 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1e2d ProdID=00b9 Rev=04.14 S: Manufacturer=Cinterion S: Product=Cinterion PID 0x00B9 USB Mobile Broadband S: SerialNumber=90418e79 C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option I: If#=0x3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option Signed-off-by: Slark Xiao Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20220601040531.6016-1-slark_xiao@163.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index a659d6fb0b12..571a399c195d 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1389,6 +1389,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ {QMI_QUIRK_SET_DTR(0x1e2d, 0x00b0, 4)}, /* Cinterion CLS8 */ {QMI_FIXED_INTF(0x1e2d, 0x00b7, 0)}, /* Cinterion MV31 RmNet */ + {QMI_FIXED_INTF(0x1e2d, 0x00b9, 0)}, /* Cinterion MV31 RmNet based on new baseline */ {QMI_FIXED_INTF(0x413c, 0x81a2, 8)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a3, 8)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a4, 8)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ From 7d8a3a477b3e25ada8dc71d22048c2ea417209a0 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Mon, 30 May 2022 23:21:58 +0800 Subject: [PATCH 72/80] ax25: Fix ax25 session cleanup problems There are session cleanup problems in ax25_release() and ax25_disconnect(). If we setup a session and then disconnect, the disconnected session is still in "LISTENING" state that is shown below. Active AX.25 sockets Dest Source Device State Vr/Vs Send-Q Recv-Q DL9SAU-4 DL9SAU-3 ??? LISTENING 000/000 0 0 DL9SAU-3 DL9SAU-4 ??? LISTENING 000/000 0 0 The first reason is caused by del_timer_sync() in ax25_release(). The timers of ax25 are used for correct session cleanup. If we use ax25_release() to close ax25 sessions and ax25_dev is not null, the del_timer_sync() functions in ax25_release() will execute. As a result, the sessions could not be cleaned up correctly, because the timers have stopped. In order to solve this problem, this patch adds a device_up flag in ax25_dev in order to judge whether the device is up. If there are sessions to be cleaned up, the del_timer_sync() in ax25_release() will not execute. What's more, we add ax25_cb_del() in ax25_kill_by_device(), because the timers have been stopped and there are no functions that could delete ax25_cb if we do not call ax25_release(). Finally, we reorder the position of ax25_list_lock in ax25_cb_del() in order to synchronize among different functions that call ax25_cb_del(). The second reason is caused by improper check in ax25_disconnect(). The incoming ax25 sessions which ax25->sk is null will close heartbeat timer, because the check "if(!ax25->sk || ..)" is satisfied. As a result, the session could not be cleaned up properly. In order to solve this problem, this patch changes the improper check to "if(ax25->sk && ..)" in ax25_disconnect(). What`s more, the ax25_disconnect() may be called twice, which is not necessary. For example, ax25_kill_by_device() calls ax25_disconnect() and sets ax25->state to AX25_STATE_0, but ax25_release() calls ax25_disconnect() again. In order to solve this problem, this patch add a check in ax25_release(). If the flag of ax25->sk equals to SOCK_DEAD, the ax25_disconnect() in ax25_release() should not be executed. Fixes: 82e31755e55f ("ax25: Fix UAF bugs in ax25 timers") Fixes: 8a367e74c012 ("ax25: Fix segfault after sock connection timeout") Reported-and-tested-by: Thomas Osterried Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20220530152158.108619-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni --- include/net/ax25.h | 1 + net/ax25/af_ax25.c | 27 +++++++++++++++++---------- net/ax25/ax25_dev.c | 1 + net/ax25/ax25_subr.c | 2 +- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index 0f9790c455bb..a427a05672e2 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -228,6 +228,7 @@ typedef struct ax25_dev { ax25_dama_info dama; #endif refcount_t refcount; + bool device_up; } ax25_dev; typedef struct ax25_cb { diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 116481e4da82..95393bb2760b 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -62,12 +62,12 @@ static void ax25_free_sock(struct sock *sk) */ static void ax25_cb_del(ax25_cb *ax25) { + spin_lock_bh(&ax25_list_lock); if (!hlist_unhashed(&ax25->ax25_node)) { - spin_lock_bh(&ax25_list_lock); hlist_del_init(&ax25->ax25_node); - spin_unlock_bh(&ax25_list_lock); ax25_cb_put(ax25); } + spin_unlock_bh(&ax25_list_lock); } /* @@ -81,6 +81,7 @@ static void ax25_kill_by_device(struct net_device *dev) if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; + ax25_dev->device_up = false; spin_lock_bh(&ax25_list_lock); again: @@ -91,6 +92,7 @@ again: spin_unlock_bh(&ax25_list_lock); ax25_disconnect(s, ENETUNREACH); s->ax25_dev = NULL; + ax25_cb_del(s); spin_lock_bh(&ax25_list_lock); goto again; } @@ -103,6 +105,7 @@ again: dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } + ax25_cb_del(s); release_sock(sk); spin_lock_bh(&ax25_list_lock); sock_put(sk); @@ -995,9 +998,11 @@ static int ax25_release(struct socket *sock) if (sk->sk_type == SOCK_SEQPACKET) { switch (ax25->state) { case AX25_STATE_0: - release_sock(sk); - ax25_disconnect(ax25, 0); - lock_sock(sk); + if (!sock_flag(ax25->sk, SOCK_DEAD)) { + release_sock(sk); + ax25_disconnect(ax25, 0); + lock_sock(sk); + } ax25_destroy_socket(ax25); break; @@ -1053,11 +1058,13 @@ static int ax25_release(struct socket *sock) ax25_destroy_socket(ax25); } if (ax25_dev) { - del_timer_sync(&ax25->timer); - del_timer_sync(&ax25->t1timer); - del_timer_sync(&ax25->t2timer); - del_timer_sync(&ax25->t3timer); - del_timer_sync(&ax25->idletimer); + if (!ax25_dev->device_up) { + del_timer_sync(&ax25->timer); + del_timer_sync(&ax25->t1timer); + del_timer_sync(&ax25->t2timer); + del_timer_sync(&ax25->t3timer); + del_timer_sync(&ax25->idletimer); + } dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index b80fccbac62a..95a76d571c44 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -62,6 +62,7 @@ void ax25_dev_device_up(struct net_device *dev) ax25_dev->dev = dev; dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); ax25_dev->forward = NULL; + ax25_dev->device_up = true; ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE; ax25_dev->values[AX25_VALUES_AXDEFMODE] = AX25_DEF_AXDEFMODE; diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 3a476e4f6cd0..9ff98f46dc6b 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -268,7 +268,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason) del_timer_sync(&ax25->t3timer); del_timer_sync(&ax25->idletimer); } else { - if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) + if (ax25->sk && !sock_flag(ax25->sk, SOCK_DESTROY)) ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); From c6fbbf1eae8f35e10966826960e154c9596c86dc Mon Sep 17 00:00:00 2001 From: Fei Qin Date: Wed, 1 Jun 2022 10:34:49 +0200 Subject: [PATCH 73/80] nfp: remove padding in nfp_nfdk_tx_desc NFDK firmware supports 48-bit dma addressing and parses 16 high bits of dma addresses. In nfp_nfdk_tx_desc, dma related structure and tso related structure are union. When "mss" be filled with nonzero value due to enable tso, the memory used by "padding" may be also filled. Then, firmware may parse wrong dma addresses which causes TX watchdog timeout problem. This patch removes padding and unifies the dma_addr_hi bits with the one in firmware. nfp_nfdk_tx_desc_set_dma_addr is also added to match this change. Fixes: c10d12e3dce8 ("nfp: add support for NFDK data path") Signed-off-by: Fei Qin Signed-off-by: Yinjun Zhang Signed-off-by: Louis Peens Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20220601083449.50556-1-simon.horman@corigine.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/netronome/nfp/nfdk/dp.c | 12 ++++++------ drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h | 3 +-- drivers/net/ethernet/netronome/nfp/nfp_net.h | 11 ++++++++++- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index e3da9ac20e57..e509d6dcba5c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -314,7 +314,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_desc_set_dma_addr(txd, dma_addr); + nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); /* starts at bit 0 */ BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1)); @@ -339,7 +339,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_desc_set_dma_addr(txd, dma_addr); + nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); dma_len -= dlen_type; dma_addr += dlen_type + 1; @@ -929,7 +929,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_desc_set_dma_addr(txd, dma_addr); + nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; dma_len -= tmp_dlen; @@ -940,7 +940,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, dma_len -= 1; dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_desc_set_dma_addr(txd, dma_addr); + nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); dlen_type &= NFDK_DESC_TX_DMA_LEN; dma_len -= dlen_type; @@ -1332,7 +1332,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_desc_set_dma_addr(txd, dma_addr); + nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; dma_len -= tmp_dlen; @@ -1343,7 +1343,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, dma_len -= 1; dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); txd->dma_len_type = cpu_to_le16(dlen_type); - nfp_desc_set_dma_addr(txd, dma_addr); + nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); dlen_type &= NFDK_DESC_TX_DMA_LEN; dma_len -= dlen_type; diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h b/drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h index c41e0975eb73..0ea51d9f2325 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h +++ b/drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h @@ -46,8 +46,7 @@ struct nfp_nfdk_tx_desc { union { struct { - u8 dma_addr_hi; /* High bits of host buf address */ - u8 padding; /* Must be zero */ + __le16 dma_addr_hi; /* High bits of host buf address */ __le16 dma_len_type; /* Length to DMA for this desc */ __le32 dma_addr_lo; /* Low 32bit of host buf addr */ }; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 428783b7018b..3dd3a92d2e7f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -117,13 +117,22 @@ struct nfp_nfdk_tx_buf; /* Convenience macro for writing dma address into RX/TX descriptors */ #define nfp_desc_set_dma_addr(desc, dma_addr) \ do { \ - __typeof(desc) __d = (desc); \ + __typeof__(desc) __d = (desc); \ dma_addr_t __addr = (dma_addr); \ \ __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ __d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \ } while (0) +#define nfp_nfdk_tx_desc_set_dma_addr(desc, dma_addr) \ + do { \ + __typeof__(desc) __d = (desc); \ + dma_addr_t __addr = (dma_addr); \ + \ + __d->dma_addr_hi = cpu_to_le16(upper_32_bits(__addr) & 0xff); \ + __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ + } while (0) + /** * struct nfp_net_tx_ring - TX ring structure * @r_vec: Back pointer to ring vector structure From 6e1ff618737a2ffa12191dcd99f83ef07fac2e45 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 1 Jun 2022 12:59:24 +0200 Subject: [PATCH 74/80] ice: fix access-beyond-end in the switch code Global `-Warray-bounds` enablement revealed some problems, one of which is the way we define and use AQC rules messages. In fact, they have a shared header, followed by the actual message, which can be of one of several different formats. So it is straightforward enough to define that header as a separate struct and then embed it into message structures as needed, but currently all the formats reside in one union coupled with the header. Then, the code allocates only the memory needed for a particular message format, leaving the union potentially incomplete. There are no actual reads or writes beyond the end of an allocated chunk, but at the same time, the whole implementation is fragile and backed by an equilibrium rather than strong type and memory checks. Define the structures the other way around: one for the common header and the rest for the actual formats with the header embedded. There are no places where several union members would be used at the same time anyway. This allows to use proper struct_size() and let the compiler know what is going to be done. Finally, unsilence `-Warray-bounds` back for ice_switch.c. Other little things worth mentioning: * &ice_sw_rule_vsi_list_query is not used anywhere, remove it. It's weird anyway to talk to hardware with purely kernel types (bitmaps); * expand the ICE_SW_RULE_*_SIZE() macros to pass a structure variable name to struct_size() to let it do strict typechecking; * rename ice_sw_rule_lkup_rx_tx::hdr to ::hdr_data to keep ::hdr for the header structure to have the same name for it constistenly everywhere; * drop the duplicate of %ICE_SW_RULE_RX_TX_NO_HDR_SIZE residing in ice_switch.h. Fixes: 9daf8208dd4d ("ice: Add support for switch filter programming") Fixes: 66486d8943ba ("ice: replace single-element array used for C struct hack") Signed-off-by: Alexander Lobakin Reviewed-by: Marcin Szycik Acked-by: Tony Nguyen Link: https://lore.kernel.org/r/20220601105924.2841410-1-alexandr.lobakin@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/Makefile | 5 - .../net/ethernet/intel/ice/ice_adminq_cmd.h | 58 +++--- drivers/net/ethernet/intel/ice/ice_switch.c | 188 +++++++++--------- drivers/net/ethernet/intel/ice/ice_switch.h | 3 - 4 files changed, 115 insertions(+), 139 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 46f439641441..9183d480b70b 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -47,8 +47,3 @@ ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o ice-$(CONFIG_ICE_SWITCHDEV) += ice_eswitch.o - -# FIXME: temporarily silence -Warray-bounds on non W=1+ builds -ifndef KBUILD_EXTRA_WARN -CFLAGS_ice_switch.o += -Wno-array-bounds -endif diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index b25e27c4d887..05cb9dd7035a 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -601,12 +601,30 @@ struct ice_aqc_sw_rules { __le32 addr_low; }; +/* Add switch rule response: + * Content of return buffer is same as the input buffer. The status field and + * LUT index are updated as part of the response + */ +struct ice_aqc_sw_rules_elem_hdr { + __le16 type; /* Switch rule type, one of T_... */ +#define ICE_AQC_SW_RULES_T_LKUP_RX 0x0 +#define ICE_AQC_SW_RULES_T_LKUP_TX 0x1 +#define ICE_AQC_SW_RULES_T_LG_ACT 0x2 +#define ICE_AQC_SW_RULES_T_VSI_LIST_SET 0x3 +#define ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR 0x4 +#define ICE_AQC_SW_RULES_T_PRUNE_LIST_SET 0x5 +#define ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR 0x6 + __le16 status; +} __packed __aligned(sizeof(__le16)); + /* Add/Update/Get/Remove lookup Rx/Tx command/response entry * This structures describes the lookup rules and associated actions. "index" * is returned as part of a response to a successful Add command, and can be * used to identify the rule for Update/Get/Remove commands. */ struct ice_sw_rule_lkup_rx_tx { + struct ice_aqc_sw_rules_elem_hdr hdr; + __le16 recipe_id; #define ICE_SW_RECIPE_LOGICAL_PORT_FWD 10 /* Source port for LOOKUP_RX and source VSI in case of LOOKUP_TX */ @@ -683,14 +701,16 @@ struct ice_sw_rule_lkup_rx_tx { * lookup-type */ __le16 hdr_len; - u8 hdr[]; -}; + u8 hdr_data[]; +} __packed __aligned(sizeof(__le16)); /* Add/Update/Remove large action command/response entry * "index" is returned as part of a response to a successful Add command, and * can be used to identify the action for Update/Get/Remove commands. */ struct ice_sw_rule_lg_act { + struct ice_aqc_sw_rules_elem_hdr hdr; + __le16 index; /* Index in large action table */ __le16 size; /* Max number of large actions */ @@ -744,45 +764,19 @@ struct ice_sw_rule_lg_act { #define ICE_LG_ACT_STAT_COUNT_S 3 #define ICE_LG_ACT_STAT_COUNT_M (0x7F << ICE_LG_ACT_STAT_COUNT_S) __le32 act[]; /* array of size for actions */ -}; +} __packed __aligned(sizeof(__le16)); /* Add/Update/Remove VSI list command/response entry * "index" is returned as part of a response to a successful Add command, and * can be used to identify the VSI list for Update/Get/Remove commands. */ struct ice_sw_rule_vsi_list { + struct ice_aqc_sw_rules_elem_hdr hdr; + __le16 index; /* Index of VSI/Prune list */ __le16 number_vsi; __le16 vsi[]; /* Array of number_vsi VSI numbers */ -}; - -/* Query VSI list command/response entry */ -struct ice_sw_rule_vsi_list_query { - __le16 index; - DECLARE_BITMAP(vsi_list, ICE_MAX_VSI); -} __packed; - -/* Add switch rule response: - * Content of return buffer is same as the input buffer. The status field and - * LUT index are updated as part of the response - */ -struct ice_aqc_sw_rules_elem { - __le16 type; /* Switch rule type, one of T_... */ -#define ICE_AQC_SW_RULES_T_LKUP_RX 0x0 -#define ICE_AQC_SW_RULES_T_LKUP_TX 0x1 -#define ICE_AQC_SW_RULES_T_LG_ACT 0x2 -#define ICE_AQC_SW_RULES_T_VSI_LIST_SET 0x3 -#define ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR 0x4 -#define ICE_AQC_SW_RULES_T_PRUNE_LIST_SET 0x5 -#define ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR 0x6 - __le16 status; - union { - struct ice_sw_rule_lkup_rx_tx lkup_tx_rx; - struct ice_sw_rule_lg_act lg_act; - struct ice_sw_rule_vsi_list vsi_list; - struct ice_sw_rule_vsi_list_query vsi_list_query; - } __packed pdata; -}; +} __packed __aligned(sizeof(__le16)); /* Query PFC Mode (direct 0x0302) * Set PFC Mode (direct 0x0303) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 9f0a4dfb4818..8d8f3eec79ee 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1282,18 +1282,13 @@ static const struct ice_dummy_pkt_profile ice_dummy_pkt_profiles[] = { ICE_PKT_PROFILE(tcp, 0), }; -#define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE \ - (offsetof(struct ice_aqc_sw_rules_elem, pdata.lkup_tx_rx.hdr) + \ - (DUMMY_ETH_HDR_LEN * \ - sizeof(((struct ice_sw_rule_lkup_rx_tx *)0)->hdr[0]))) -#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE \ - (offsetof(struct ice_aqc_sw_rules_elem, pdata.lkup_tx_rx.hdr)) -#define ICE_SW_RULE_LG_ACT_SIZE(n) \ - (offsetof(struct ice_aqc_sw_rules_elem, pdata.lg_act.act) + \ - ((n) * sizeof(((struct ice_sw_rule_lg_act *)0)->act[0]))) -#define ICE_SW_RULE_VSI_LIST_SIZE(n) \ - (offsetof(struct ice_aqc_sw_rules_elem, pdata.vsi_list.vsi) + \ - ((n) * sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi[0]))) +#define ICE_SW_RULE_RX_TX_HDR_SIZE(s, l) struct_size((s), hdr_data, (l)) +#define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s) \ + ICE_SW_RULE_RX_TX_HDR_SIZE((s), DUMMY_ETH_HDR_LEN) +#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s) \ + ICE_SW_RULE_RX_TX_HDR_SIZE((s), 0) +#define ICE_SW_RULE_LG_ACT_SIZE(s, n) struct_size((s), act, (n)) +#define ICE_SW_RULE_VSI_LIST_SIZE(s, n) struct_size((s), vsi, (n)) /* this is a recipe to profile association bitmap */ static DECLARE_BITMAP(recipe_to_profile[ICE_MAX_NUM_RECIPES], @@ -2376,7 +2371,8 @@ static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *fi) */ static void ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, - struct ice_aqc_sw_rules_elem *s_rule, enum ice_adminq_opc opc) + struct ice_sw_rule_lkup_rx_tx *s_rule, + enum ice_adminq_opc opc) { u16 vlan_id = ICE_MAX_VLAN_ID + 1; u16 vlan_tpid = ETH_P_8021Q; @@ -2388,15 +2384,14 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, u8 q_rgn; if (opc == ice_aqc_opc_remove_sw_rules) { - s_rule->pdata.lkup_tx_rx.act = 0; - s_rule->pdata.lkup_tx_rx.index = - cpu_to_le16(f_info->fltr_rule_id); - s_rule->pdata.lkup_tx_rx.hdr_len = 0; + s_rule->act = 0; + s_rule->index = cpu_to_le16(f_info->fltr_rule_id); + s_rule->hdr_len = 0; return; } eth_hdr_sz = sizeof(dummy_eth_header); - eth_hdr = s_rule->pdata.lkup_tx_rx.hdr; + eth_hdr = s_rule->hdr_data; /* initialize the ether header with a dummy header */ memcpy(eth_hdr, dummy_eth_header, eth_hdr_sz); @@ -2481,14 +2476,14 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, break; } - s_rule->type = (f_info->flag & ICE_FLTR_RX) ? + s_rule->hdr.type = (f_info->flag & ICE_FLTR_RX) ? cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX) : cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); /* Recipe set depending on lookup type */ - s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(f_info->lkup_type); - s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(f_info->src); - s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act); + s_rule->recipe_id = cpu_to_le16(f_info->lkup_type); + s_rule->src = cpu_to_le16(f_info->src); + s_rule->act = cpu_to_le32(act); if (daddr) ether_addr_copy(eth_hdr + ICE_ETH_DA_OFFSET, daddr); @@ -2502,7 +2497,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, /* Create the switch rule with the final dummy Ethernet header */ if (opc != ice_aqc_opc_update_sw_rules) - s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(eth_hdr_sz); + s_rule->hdr_len = cpu_to_le16(eth_hdr_sz); } /** @@ -2519,7 +2514,8 @@ static int ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, u16 sw_marker, u16 l_id) { - struct ice_aqc_sw_rules_elem *lg_act, *rx_tx; + struct ice_sw_rule_lkup_rx_tx *rx_tx; + struct ice_sw_rule_lg_act *lg_act; /* For software marker we need 3 large actions * 1. FWD action: FWD TO VSI or VSI LIST * 2. GENERIC VALUE action to hold the profile ID @@ -2540,18 +2536,18 @@ ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, * 1. Large Action * 2. Look up Tx Rx */ - lg_act_size = (u16)ICE_SW_RULE_LG_ACT_SIZE(num_lg_acts); - rules_size = lg_act_size + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE; + lg_act_size = (u16)ICE_SW_RULE_LG_ACT_SIZE(lg_act, num_lg_acts); + rules_size = lg_act_size + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(rx_tx); lg_act = devm_kzalloc(ice_hw_to_dev(hw), rules_size, GFP_KERNEL); if (!lg_act) return -ENOMEM; - rx_tx = (struct ice_aqc_sw_rules_elem *)((u8 *)lg_act + lg_act_size); + rx_tx = (typeof(rx_tx))((u8 *)lg_act + lg_act_size); /* Fill in the first switch rule i.e. large action */ - lg_act->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LG_ACT); - lg_act->pdata.lg_act.index = cpu_to_le16(l_id); - lg_act->pdata.lg_act.size = cpu_to_le16(num_lg_acts); + lg_act->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LG_ACT); + lg_act->index = cpu_to_le16(l_id); + lg_act->size = cpu_to_le16(num_lg_acts); /* First action VSI forwarding or VSI list forwarding depending on how * many VSIs @@ -2563,13 +2559,13 @@ ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, act |= (id << ICE_LG_ACT_VSI_LIST_ID_S) & ICE_LG_ACT_VSI_LIST_ID_M; if (m_ent->vsi_count > 1) act |= ICE_LG_ACT_VSI_LIST; - lg_act->pdata.lg_act.act[0] = cpu_to_le32(act); + lg_act->act[0] = cpu_to_le32(act); /* Second action descriptor type */ act = ICE_LG_ACT_GENERIC; act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; - lg_act->pdata.lg_act.act[1] = cpu_to_le32(act); + lg_act->act[1] = cpu_to_le32(act); act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M; @@ -2579,24 +2575,22 @@ ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; - lg_act->pdata.lg_act.act[2] = cpu_to_le32(act); + lg_act->act[2] = cpu_to_le32(act); /* call the fill switch rule to fill the lookup Tx Rx structure */ ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx, ice_aqc_opc_update_sw_rules); /* Update the action to point to the large action ID */ - rx_tx->pdata.lkup_tx_rx.act = - cpu_to_le32(ICE_SINGLE_ACT_PTR | - ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) & - ICE_SINGLE_ACT_PTR_VAL_M)); + rx_tx->act = cpu_to_le32(ICE_SINGLE_ACT_PTR | + ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) & + ICE_SINGLE_ACT_PTR_VAL_M)); /* Use the filter rule ID of the previously created rule with single * act. Once the update happens, hardware will treat this as large * action */ - rx_tx->pdata.lkup_tx_rx.index = - cpu_to_le16(m_ent->fltr_info.fltr_rule_id); + rx_tx->index = cpu_to_le16(m_ent->fltr_info.fltr_rule_id); status = ice_aq_sw_rules(hw, lg_act, rules_size, 2, ice_aqc_opc_update_sw_rules, NULL); @@ -2658,7 +2652,7 @@ ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi, u16 vsi_list_id, bool remove, enum ice_adminq_opc opc, enum ice_sw_lkup_type lkup_type) { - struct ice_aqc_sw_rules_elem *s_rule; + struct ice_sw_rule_vsi_list *s_rule; u16 s_rule_size; u16 rule_type; int status; @@ -2681,7 +2675,7 @@ ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi, else return -EINVAL; - s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(num_vsi); + s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, num_vsi); s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL); if (!s_rule) return -ENOMEM; @@ -2691,13 +2685,13 @@ ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi, goto exit; } /* AQ call requires hw_vsi_id(s) */ - s_rule->pdata.vsi_list.vsi[i] = + s_rule->vsi[i] = cpu_to_le16(ice_get_hw_vsi_num(hw, vsi_handle_arr[i])); } - s_rule->type = cpu_to_le16(rule_type); - s_rule->pdata.vsi_list.number_vsi = cpu_to_le16(num_vsi); - s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id); + s_rule->hdr.type = cpu_to_le16(rule_type); + s_rule->number_vsi = cpu_to_le16(num_vsi); + s_rule->index = cpu_to_le16(vsi_list_id); status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opc, NULL); @@ -2745,13 +2739,14 @@ ice_create_pkt_fwd_rule(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry) { struct ice_fltr_mgmt_list_entry *fm_entry; - struct ice_aqc_sw_rules_elem *s_rule; + struct ice_sw_rule_lkup_rx_tx *s_rule; enum ice_sw_lkup_type l_type; struct ice_sw_recipe *recp; int status; s_rule = devm_kzalloc(ice_hw_to_dev(hw), - ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL); + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule), + GFP_KERNEL); if (!s_rule) return -ENOMEM; fm_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*fm_entry), @@ -2772,17 +2767,16 @@ ice_create_pkt_fwd_rule(struct ice_hw *hw, ice_fill_sw_rule(hw, &fm_entry->fltr_info, s_rule, ice_aqc_opc_add_sw_rules); - status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1, + status = ice_aq_sw_rules(hw, s_rule, + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule), 1, ice_aqc_opc_add_sw_rules, NULL); if (status) { devm_kfree(ice_hw_to_dev(hw), fm_entry); goto ice_create_pkt_fwd_rule_exit; } - f_entry->fltr_info.fltr_rule_id = - le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); - fm_entry->fltr_info.fltr_rule_id = - le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + f_entry->fltr_info.fltr_rule_id = le16_to_cpu(s_rule->index); + fm_entry->fltr_info.fltr_rule_id = le16_to_cpu(s_rule->index); /* The book keeping entries will get removed when base driver * calls remove filter AQ command @@ -2807,20 +2801,22 @@ ice_create_pkt_fwd_rule_exit: static int ice_update_pkt_fwd_rule(struct ice_hw *hw, struct ice_fltr_info *f_info) { - struct ice_aqc_sw_rules_elem *s_rule; + struct ice_sw_rule_lkup_rx_tx *s_rule; int status; s_rule = devm_kzalloc(ice_hw_to_dev(hw), - ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL); + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule), + GFP_KERNEL); if (!s_rule) return -ENOMEM; ice_fill_sw_rule(hw, f_info, s_rule, ice_aqc_opc_update_sw_rules); - s_rule->pdata.lkup_tx_rx.index = cpu_to_le16(f_info->fltr_rule_id); + s_rule->index = cpu_to_le16(f_info->fltr_rule_id); /* Update switch rule with new rule set to forward VSI list */ - status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1, + status = ice_aq_sw_rules(hw, s_rule, + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule), 1, ice_aqc_opc_update_sw_rules, NULL); devm_kfree(ice_hw_to_dev(hw), s_rule); @@ -3104,17 +3100,17 @@ static int ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id, enum ice_sw_lkup_type lkup_type) { - struct ice_aqc_sw_rules_elem *s_rule; + struct ice_sw_rule_vsi_list *s_rule; u16 s_rule_size; int status; - s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(0); + s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, 0); s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL); if (!s_rule) return -ENOMEM; - s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR); - s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id); + s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR); + s_rule->index = cpu_to_le16(vsi_list_id); /* Free the vsi_list resource that we allocated. It is assumed that the * list is empty at this point. @@ -3274,10 +3270,10 @@ ice_remove_rule_internal(struct ice_hw *hw, u8 recp_id, if (remove_rule) { /* Remove the lookup rule */ - struct ice_aqc_sw_rules_elem *s_rule; + struct ice_sw_rule_lkup_rx_tx *s_rule; s_rule = devm_kzalloc(ice_hw_to_dev(hw), - ICE_SW_RULE_RX_TX_NO_HDR_SIZE, + ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s_rule), GFP_KERNEL); if (!s_rule) { status = -ENOMEM; @@ -3288,8 +3284,8 @@ ice_remove_rule_internal(struct ice_hw *hw, u8 recp_id, ice_aqc_opc_remove_sw_rules); status = ice_aq_sw_rules(hw, s_rule, - ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1, - ice_aqc_opc_remove_sw_rules, NULL); + ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s_rule), + 1, ice_aqc_opc_remove_sw_rules, NULL); /* Remove a book keeping from the list */ devm_kfree(ice_hw_to_dev(hw), s_rule); @@ -3437,7 +3433,7 @@ bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle) */ int ice_add_mac(struct ice_hw *hw, struct list_head *m_list) { - struct ice_aqc_sw_rules_elem *s_rule, *r_iter; + struct ice_sw_rule_lkup_rx_tx *s_rule, *r_iter; struct ice_fltr_list_entry *m_list_itr; struct list_head *rule_head; u16 total_elem_left, s_rule_size; @@ -3501,7 +3497,7 @@ int ice_add_mac(struct ice_hw *hw, struct list_head *m_list) rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; /* Allocate switch rule buffer for the bulk update for unicast */ - s_rule_size = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE; + s_rule_size = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule); s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size, GFP_KERNEL); if (!s_rule) { @@ -3517,8 +3513,7 @@ int ice_add_mac(struct ice_hw *hw, struct list_head *m_list) if (is_unicast_ether_addr(mac_addr)) { ice_fill_sw_rule(hw, &m_list_itr->fltr_info, r_iter, ice_aqc_opc_add_sw_rules); - r_iter = (struct ice_aqc_sw_rules_elem *) - ((u8 *)r_iter + s_rule_size); + r_iter = (typeof(s_rule))((u8 *)r_iter + s_rule_size); } } @@ -3527,7 +3522,7 @@ int ice_add_mac(struct ice_hw *hw, struct list_head *m_list) /* Call AQ switch rule in AQ_MAX chunk */ for (total_elem_left = num_unicast; total_elem_left > 0; total_elem_left -= elem_sent) { - struct ice_aqc_sw_rules_elem *entry = r_iter; + struct ice_sw_rule_lkup_rx_tx *entry = r_iter; elem_sent = min_t(u8, total_elem_left, (ICE_AQ_MAX_BUF_LEN / s_rule_size)); @@ -3536,7 +3531,7 @@ int ice_add_mac(struct ice_hw *hw, struct list_head *m_list) NULL); if (status) goto ice_add_mac_exit; - r_iter = (struct ice_aqc_sw_rules_elem *) + r_iter = (typeof(s_rule)) ((u8 *)r_iter + (elem_sent * s_rule_size)); } @@ -3548,8 +3543,7 @@ int ice_add_mac(struct ice_hw *hw, struct list_head *m_list) struct ice_fltr_mgmt_list_entry *fm_entry; if (is_unicast_ether_addr(mac_addr)) { - f_info->fltr_rule_id = - le16_to_cpu(r_iter->pdata.lkup_tx_rx.index); + f_info->fltr_rule_id = le16_to_cpu(r_iter->index); f_info->fltr_act = ICE_FWD_TO_VSI; /* Create an entry to track this MAC address */ fm_entry = devm_kzalloc(ice_hw_to_dev(hw), @@ -3565,8 +3559,7 @@ int ice_add_mac(struct ice_hw *hw, struct list_head *m_list) */ list_add(&fm_entry->list_entry, rule_head); - r_iter = (struct ice_aqc_sw_rules_elem *) - ((u8 *)r_iter + s_rule_size); + r_iter = (typeof(s_rule))((u8 *)r_iter + s_rule_size); } } @@ -3865,7 +3858,7 @@ ice_rem_adv_rule_info(struct ice_hw *hw, struct list_head *rule_head) */ int ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_handle, bool set, u8 direction) { - struct ice_aqc_sw_rules_elem *s_rule; + struct ice_sw_rule_lkup_rx_tx *s_rule; struct ice_fltr_info f_info; enum ice_adminq_opc opcode; u16 s_rule_size; @@ -3876,8 +3869,8 @@ int ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_handle, bool set, u8 direction) return -EINVAL; hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); - s_rule_size = set ? ICE_SW_RULE_RX_TX_ETH_HDR_SIZE : - ICE_SW_RULE_RX_TX_NO_HDR_SIZE; + s_rule_size = set ? ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule) : + ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s_rule); s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL); if (!s_rule) @@ -3915,7 +3908,7 @@ int ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_handle, bool set, u8 direction) if (status || !(f_info.flag & ICE_FLTR_TX_RX)) goto out; if (set) { - u16 index = le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + u16 index = le16_to_cpu(s_rule->index); if (f_info.flag & ICE_FLTR_TX) { hw->port_info->dflt_tx_vsi_num = hw_vsi_id; @@ -5641,7 +5634,7 @@ ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, */ static int ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, - struct ice_aqc_sw_rules_elem *s_rule, + struct ice_sw_rule_lkup_rx_tx *s_rule, const struct ice_dummy_pkt_profile *profile) { u8 *pkt; @@ -5650,7 +5643,7 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, /* Start with a packet with a pre-defined/dummy content. Then, fill * in the header values to be looked up or matched. */ - pkt = s_rule->pdata.lkup_tx_rx.hdr; + pkt = s_rule->hdr_data; memcpy(pkt, profile->pkt, profile->pkt_len); @@ -5740,7 +5733,7 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, } } - s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(profile->pkt_len); + s_rule->hdr_len = cpu_to_le16(profile->pkt_len); return 0; } @@ -5963,7 +5956,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, struct ice_rule_query_data *added_entry) { struct ice_adv_fltr_mgmt_list_entry *m_entry, *adv_fltr = NULL; - struct ice_aqc_sw_rules_elem *s_rule = NULL; + struct ice_sw_rule_lkup_rx_tx *s_rule = NULL; const struct ice_dummy_pkt_profile *profile; u16 rid = 0, i, rule_buf_sz, vsi_handle; struct list_head *rule_head; @@ -6040,7 +6033,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, } return status; } - rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE + profile->pkt_len; + rule_buf_sz = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule, profile->pkt_len); s_rule = kzalloc(rule_buf_sz, GFP_KERNEL); if (!s_rule) return -ENOMEM; @@ -6089,16 +6082,15 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, * by caller) */ if (rinfo->rx) { - s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); - s_rule->pdata.lkup_tx_rx.src = - cpu_to_le16(hw->port_info->lport); + s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); + s_rule->src = cpu_to_le16(hw->port_info->lport); } else { - s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); - s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(rinfo->sw_act.src); + s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); + s_rule->src = cpu_to_le16(rinfo->sw_act.src); } - s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(rid); - s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act); + s_rule->recipe_id = cpu_to_le16(rid); + s_rule->act = cpu_to_le32(act); status = ice_fill_adv_dummy_packet(lkups, lkups_cnt, s_rule, profile); if (status) @@ -6107,7 +6099,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (rinfo->tun_type != ICE_NON_TUN && rinfo->tun_type != ICE_SW_TUN_AND_NON_TUN) { status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, - s_rule->pdata.lkup_tx_rx.hdr, + s_rule->hdr_data, profile->offsets); if (status) goto err_ice_add_adv_rule; @@ -6135,8 +6127,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, adv_fltr->lkups_cnt = lkups_cnt; adv_fltr->rule_info = *rinfo; - adv_fltr->rule_info.fltr_rule_id = - le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + adv_fltr->rule_info.fltr_rule_id = le16_to_cpu(s_rule->index); sw = hw->switch_info; sw->recp_list[rid].adv_rule = true; rule_head = &sw->recp_list[rid].filt_rules; @@ -6384,17 +6375,16 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, } mutex_unlock(rule_lock); if (remove_rule) { - struct ice_aqc_sw_rules_elem *s_rule; + struct ice_sw_rule_lkup_rx_tx *s_rule; u16 rule_buf_sz; - rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE; + rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s_rule); s_rule = kzalloc(rule_buf_sz, GFP_KERNEL); if (!s_rule) return -ENOMEM; - s_rule->pdata.lkup_tx_rx.act = 0; - s_rule->pdata.lkup_tx_rx.index = - cpu_to_le16(list_elem->rule_info.fltr_rule_id); - s_rule->pdata.lkup_tx_rx.hdr_len = 0; + s_rule->act = 0; + s_rule->index = cpu_to_le16(list_elem->rule_info.fltr_rule_id); + s_rule->hdr_len = 0; status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule, rule_buf_sz, 1, ice_aqc_opc_remove_sw_rules, NULL); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index ecac75e71395..eb641e5512d2 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -23,9 +23,6 @@ #define ICE_PROFID_IPV6_GTPU_TEID 46 #define ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER 70 -#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE \ - (offsetof(struct ice_aqc_sw_rules_elem, pdata.lkup_tx_rx.hdr)) - /* VSI context structure for add/get/update/free operations */ struct ice_vsi_ctx { u16 vsi_num; From 7f36f798f89bf32c0164049cb0e3fd1af613d0bb Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Thu, 2 Jun 2022 13:30:53 +0700 Subject: [PATCH 75/80] tipc: check attribute length for bearer name syzbot reported uninit-value: ===================================================== BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline] BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725 string_nocheck lib/vsprintf.c:644 [inline] string+0x4f9/0x6f0 lib/vsprintf.c:725 vsnprintf+0x2222/0x3650 lib/vsprintf.c:2806 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50 _printk+0x18d/0x1cf kernel/printk/printk.c:2293 tipc_enable_bearer net/tipc/bearer.c:371 [inline] __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] - Do sanity check the attribute length for TIPC_NLA_BEARER_NAME. - Do not use 'illegal name' in printing message. Reported-by: syzbot+e820fdc8ce362f2dea51@syzkaller.appspotmail.com Fixes: cb30a63384bc ("tipc: refactor function tipc_enable_bearer()") Acked-by: Jon Maloy Signed-off-by: Hoang Le Link: https://lore.kernel.org/r/20220602063053.5892-1-hoang.h.le@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/bearer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6d39ca05f249..932c87b98eca 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, u32 i; if (!bearer_name_validate(name, &b_names)) { - errstr = "illegal name"; NL_SET_ERR_MSG(extack, "Illegal name"); - goto rejected; + return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { From 839612d23ffd933174db911ce56dc3f3ca883ec5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 2 Jun 2022 09:48:40 +0200 Subject: [PATCH 76/80] net: stmmac: use dev_err_probe() for reporting mdio bus registration failure I have a board where these two lines are always printed during boot: imx-dwmac 30bf0000.ethernet: Cannot register the MDIO bus imx-dwmac 30bf0000.ethernet: stmmac_dvr_probe: MDIO bus (id: 1) registration failed It's perfectly fine, and the device is successfully (and silently, as far as the console goes) probed later. Use dev_err_probe() instead, which will demote these messages to debug level (thus removing the alarming messages from the console) when the error is -EPROBE_DEFER, and also has the advantage of including the error code if/when it happens to be something other than -EPROBE_DEFER. While here, add the missing \n to one of the format strings. Signed-off-by: Rasmus Villemoes Link: https://lore.kernel.org/r/20220602074840.1143360-1-linux@rasmusvillemoes.dk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++--- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3b81d4e9dc83..d1a7cf4567bc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7129,9 +7129,9 @@ int stmmac_dvr_probe(struct device *device, /* MDIO bus Registration */ ret = stmmac_mdio_register(ndev); if (ret < 0) { - dev_err(priv->device, - "%s: MDIO bus (id: %d) registration failed", - __func__, priv->plat->bus_id); + dev_err_probe(priv->device, ret, + "%s: MDIO bus (id: %d) registration failed\n", + __func__, priv->plat->bus_id); goto error_mdio_register; } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 9bc625fccca0..03d3d1f7aa4b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -482,7 +482,7 @@ int stmmac_mdio_register(struct net_device *ndev) err = of_mdiobus_register(new_bus, mdio_node); if (err != 0) { - dev_err(dev, "Cannot register the MDIO bus\n"); + dev_err_probe(dev, err, "Cannot register the MDIO bus\n"); goto bus_register_fail; } From 83450bbafebdaf90818e77ee368202f03d056cd7 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 2 Jun 2022 15:35:07 +0800 Subject: [PATCH 77/80] stmmac: intel: Add RPL-P PCI ID Add PCI ID for Ethernet TSN Controller on RPL-P. Signed-off-by: Michael Sit Wei Hong Link: https://lore.kernel.org/r/20220602073507.3955721-1-michael.wei.hong.sit@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 0b0be0898ac5..f9f80933e0c9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -1161,6 +1161,7 @@ static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend, #define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_0 0x7aac #define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_1 0x7aad #define PCI_DEVICE_ID_INTEL_ADLN_SGMII1G 0x54ac +#define PCI_DEVICE_ID_INTEL_RPLP_SGMII1G 0x51ac static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, QUARK, &quark_info) }, @@ -1179,6 +1180,7 @@ static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0, &adls_sgmii1g_phy0_info) }, { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1, &adls_sgmii1g_phy1_info) }, { PCI_DEVICE_DATA(INTEL, ADLN_SGMII1G, &tgl_sgmii1g_phy0_info) }, + { PCI_DEVICE_DATA(INTEL, RPLP_SGMII1G, &tgl_sgmii1g_phy0_info) }, {} }; MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table); From eb0b39efb7d908e3950f6f76ee6e3cecb86ec489 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jun 2022 09:18:57 -0700 Subject: [PATCH 78/80] net: CONFIG_DEBUG_NET depends on CONFIG_NET It makes little sense to debug networking stacks if networking is not compiled in. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/Kconfig.debug b/net/Kconfig.debug index a5781cf63b16..e6ae11cc2fb7 100644 --- a/net/Kconfig.debug +++ b/net/Kconfig.debug @@ -20,7 +20,7 @@ config NET_NS_REFCNT_TRACKER config DEBUG_NET bool "Add generic networking debug" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && NET help Enable extra sanity checks in networking. This is mostly used by fuzzers, but is safe to select. From 22296a5c0cd35aaf62e1af3266f82cdf6b0b9b78 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jun 2022 09:18:58 -0700 Subject: [PATCH 79/80] net: add debug info to __skb_pull() While analyzing yet another syzbot report, I found the following patch very useful. It allows to better understand what went wrong. This debug info is only enabled if CONFIG_DEBUG_NET=y, which is the case for syzbot builds. Signed-off-by: Eric Dumazet Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index da96f0d3e753..d3d10556f0fa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2696,7 +2696,14 @@ void *skb_pull(struct sk_buff *skb, unsigned int len); static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { skb->len -= len; - BUG_ON(skb->len < skb->data_len); + if (unlikely(skb->len < skb->data_len)) { +#if defined(CONFIG_DEBUG_NET) + skb->len += len; + pr_err("__skb_pull(len=%u)\n", len); + skb_dump(KERN_ERR, skb, false); +#endif + BUG(); + } return skb->data += len; } From e9d3f80935b6607dcdc5682b00b1d4b28e0a0c5d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jun 2022 09:18:59 -0700 Subject: [PATCH 80/80] net/af_packet: make sure to pull mac header GSO assumes skb->head contains link layer headers. tun device in some case can provide base 14 bytes, regardless of VLAN being used or not. After blamed commit, we can end up setting a network header offset of 18+, we better pull the missing bytes to avoid a posible crash in GSO. syzbot report was: kernel BUG at include/linux/skbuff.h:2699! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 3601 Comm: syz-executor210 Not tainted 5.18.0-syzkaller-11338-g2c5ca23f7414 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__skb_pull include/linux/skbuff.h:2699 [inline] RIP: 0010:skb_mac_gso_segment+0x48f/0x530 net/core/gro.c:136 Code: 00 48 c7 c7 00 96 d4 8a c6 05 cb d3 45 06 01 e8 26 bb d0 01 e9 2f fd ff ff 49 c7 c4 ea ff ff ff e9 f1 fe ff ff e8 91 84 19 fa <0f> 0b 48 89 df e8 97 44 66 fa e9 7f fd ff ff e8 ad 44 66 fa e9 48 RSP: 0018:ffffc90002e2f4b8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000012 RCX: 0000000000000000 RDX: ffff88805bb58000 RSI: ffffffff8760ed0f RDI: 0000000000000004 RBP: 0000000000005dbc R08: 0000000000000004 R09: 0000000000000fe0 R10: 0000000000000fe4 R11: 0000000000000000 R12: 0000000000000fe0 R13: ffff88807194d780 R14: 1ffff920005c5e9b R15: 0000000000000012 FS: 000055555730f300(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200015c0 CR3: 0000000071ff8000 CR4: 0000000000350ee0 Call Trace: __skb_gso_segment+0x327/0x6e0 net/core/dev.c:3411 skb_gso_segment include/linux/netdevice.h:4749 [inline] validate_xmit_skb+0x6bc/0xf10 net/core/dev.c:3669 validate_xmit_skb_list+0xbc/0x120 net/core/dev.c:3719 sch_direct_xmit+0x3d1/0xbe0 net/sched/sch_generic.c:327 __dev_xmit_skb net/core/dev.c:3815 [inline] __dev_queue_xmit+0x14a1/0x3a00 net/core/dev.c:4219 packet_snd net/packet/af_packet.c:3071 [inline] packet_sendmsg+0x21cb/0x5550 net/packet/af_packet.c:3102 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2492 ___sys_sendmsg+0xf3/0x170 net/socket.c:2546 __sys_sendmsg net/socket.c:2575 [inline] __do_sys_sendmsg net/socket.c:2584 [inline] __se_sys_sendmsg net/socket.c:2582 [inline] __x64_sys_sendmsg+0x132/0x220 net/socket.c:2582 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f4b95da06c9 Code: 28 c3 e8 4a 15 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd7defc4c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007ffd7defc4f0 RCX: 00007f4b95da06c9 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000003 RBP: 0000000000000003 R08: bb1414ac00000050 R09: bb1414ac00000050 R10: 0000000000000004 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffd7defc4e0 R14: 00007ffd7defc4d8 R15: 00007ffd7defc4d4 Fixes: dfed913e8b55 ("net/af_packet: add VLAN support for AF_PACKET SOCK_RAW GSO") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Hangbin Liu Acked-by: Willem de Bruijn Cc: Michael S. Tsirkin Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 677f9cfa9660..ca6e92a22923 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1935,8 +1935,10 @@ static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) /* Move network header to the right position for VLAN tagged packets */ if (likely(skb->dev->type == ARPHRD_ETHER) && eth_type_vlan(skb->protocol) && - __vlan_get_protocol(skb, skb->protocol, &depth) != 0) - skb_set_network_header(skb, depth); + __vlan_get_protocol(skb, skb->protocol, &depth) != 0) { + if (pskb_may_pull(skb, depth)) + skb_set_network_header(skb, depth); + } skb_probe_transport_header(skb); }