From 7a8227b2e76be506b2ac64d2beac950ca04892a5 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Tue, 27 Jun 2023 03:50:00 +0000 Subject: [PATCH 01/70] net: lan743x: Don't sleep in atomic context dev_set_rx_mode() grabs a spin_lock, and the lan743x implementation proceeds subsequently to go to sleep using readx_poll_timeout(). Introduce a helper wrapping the readx_poll_timeout_atomic() function and use it to replace the calls to readx_polL_timeout(). Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Cc: stable@vger.kernel.org Cc: Bryan Whitehead Cc: UNGLinuxDriver@microchip.com Signed-off-by: Moritz Fischer Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230627035000.1295254-1-moritzf@google.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan743x_main.c | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 5b0e8b0e0c89..a36f6369f132 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -144,6 +144,18 @@ static int lan743x_csr_light_reset(struct lan743x_adapter *adapter) !(data & HW_CFG_LRST_), 100000, 10000000); } +static int lan743x_csr_wait_for_bit_atomic(struct lan743x_adapter *adapter, + int offset, u32 bit_mask, + int target_value, int udelay_min, + int udelay_max, int count) +{ + u32 data; + + return readx_poll_timeout_atomic(LAN743X_CSR_READ_OP, offset, data, + target_value == !!(data & bit_mask), + udelay_max, udelay_min * count); +} + static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter, int offset, u32 bit_mask, int target_value, int usleep_min, @@ -736,8 +748,8 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, u32 dp_sel; int i; - if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, - 1, 40, 100, 100)) + if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, DP_SEL_DPRDY_, + 1, 40, 100, 100)) return -EIO; dp_sel = lan743x_csr_read(adapter, DP_SEL); dp_sel &= ~DP_SEL_MASK_; @@ -748,8 +760,9 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, lan743x_csr_write(adapter, DP_ADDR, addr + i); lan743x_csr_write(adapter, DP_DATA_0, buf[i]); lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_); - if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, - 1, 40, 100, 100)) + if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, + DP_SEL_DPRDY_, + 1, 40, 100, 100)) return -EIO; } From 6feb37b3b06e9049e20dcf7e23998f92c9c5be9a Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Tue, 27 Jun 2023 12:03:40 +0000 Subject: [PATCH 02/70] sctp: fix potential deadlock on &net->sctp.addr_wq_lock As &net->sctp.addr_wq_lock is also acquired by the timer sctp_addr_wq_timeout_handler() in protocal.c, the same lock acquisition at sctp_auto_asconf_init() seems should disable irq since it is called from sctp_accept() under process context. Possible deadlock scenario: sctp_accept() -> sctp_sock_migrate() -> sctp_auto_asconf_init() -> spin_lock(&net->sctp.addr_wq_lock) -> sctp_addr_wq_timeout_handler() -> spin_lock_bh(&net->sctp.addr_wq_lock); (deadlock here) This flaw was found using an experimental static analysis tool we are developing for irq-related deadlock. The tentative patch fix the potential deadlock by spin_lock_bh(). Signed-off-by: Chengfeng Ye Fixes: 34e5b0118685 ("sctp: delay auto_asconf init until binding the first addr") Acked-by: Xin Long Link: https://lore.kernel.org/r/20230627120340.19432-1-dg573847474@gmail.com Signed-off-by: Paolo Abeni --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6554a357fe33..9388d98aebc0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -364,9 +364,9 @@ static void sctp_auto_asconf_init(struct sctp_sock *sp) struct net *net = sock_net(&sp->inet.sk); if (net->sctp.default_auto_asconf) { - spin_lock(&net->sctp.addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); - spin_unlock(&net->sctp.addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); sp->do_auto_asconf = 1; } } From b4ee93380b3c891fea996af8d1d3ca0e36ad31f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Jun 2023 14:38:11 +0200 Subject: [PATCH 03/70] net/sched: act_ipt: add sanity checks on table name and hook locations Looks like "tc" hard-codes "mangle" as the only supported table name, but on kernel side there are no checks. This is wrong. Not all xtables targets are safe to call from tc. E.g. "nat" targets assume skb has a conntrack object assigned to it. Normally those get called from netfilter nat core which consults the nat table to obtain the address mapping. "tc" userspace either sets PRE or POSTROUTING as hook number, but there is no validation of this on kernel side, so update netlink policy to reject bogus numbers. Some targets may assume skb_dst is set for input/forward hooks, so prevent those from being used. act_ipt uses the hook number in two places: 1. the state hook number, this is fine as-is 2. to set par.hook_mask The latter is a bit mask, so update the assignment to make xt_check_target() to the right thing. Followup patch adds required checks for the skb/packet headers before calling the targets evaluation function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Florian Westphal Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/act_ipt.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 5d96ffebd40f..ea7f151e7dd2 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -48,7 +48,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, par.entryinfo = &e; par.target = target; par.targinfo = t->data; - par.hook_mask = hook; + par.hook_mask = 1 << hook; par.family = NFPROTO_IPV4; ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); @@ -85,7 +85,8 @@ static void tcf_ipt_release(struct tc_action *a) static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, - [TCA_IPT_HOOK] = { .type = NLA_U32 }, + [TCA_IPT_HOOK] = NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING, + NF_INET_NUMHOOKS), [TCA_IPT_INDEX] = { .type = NLA_U32 }, [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, }; @@ -158,15 +159,27 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, return -EEXIST; } } - hook = nla_get_u32(tb[TCA_IPT_HOOK]); - err = -ENOMEM; - tname = kmalloc(IFNAMSIZ, GFP_KERNEL); + err = -EINVAL; + hook = nla_get_u32(tb[TCA_IPT_HOOK]); + switch (hook) { + case NF_INET_PRE_ROUTING: + break; + case NF_INET_POST_ROUTING: + break; + default: + goto err1; + } + + if (tb[TCA_IPT_TABLE]) { + /* mangle only for now */ + if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle")) + goto err1; + } + + tname = kstrdup("mangle", GFP_KERNEL); if (unlikely(!tname)) goto err1; - if (tb[TCA_IPT_TABLE] == NULL || - nla_strscpy(tname, tb[TCA_IPT_TABLE], IFNAMSIZ) >= IFNAMSIZ) - strcpy(tname, "mangle"); t = kmemdup(td, td->u.target_size, GFP_KERNEL); if (unlikely(!t)) From b2dc32dcba08bf55cec600caa76f4afd2e3614df Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Jun 2023 14:38:12 +0200 Subject: [PATCH 04/70] net/sched: act_ipt: add sanity checks on skb before calling target Netfilter targets make assumptions on the skb state, for example iphdr is supposed to be in the linear area. This is normally done by IP stack, but in act_ipt case no such checks are made. Some targets can even assume that skb_dst will be valid. Make a minimum effort to check for this: - Don't call the targets eval function for non-ipv4 skbs. - Don't call the targets eval function for POSTROUTING emulation when the skb has no dst set. v3: use skb_protocol helper (Davide Caratti) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Florian Westphal Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/act_ipt.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index ea7f151e7dd2..a6b522b512dc 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -230,6 +230,26 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla, a, &act_xt_ops, tp, flags); } +static bool tcf_ipt_act_check(struct sk_buff *skb) +{ + const struct iphdr *iph; + unsigned int nhoff, len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return false; + + nhoff = skb_network_offset(skb); + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return false; + + len = skb_ip_totlen(skb); + if (skb->len < nhoff + len || len < (iph->ihl * 4u)) + return false; + + return pskb_may_pull(skb, iph->ihl * 4u); +} + TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) @@ -244,9 +264,22 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, .pf = NFPROTO_IPV4, }; + if (skb_protocol(skb, false) != htons(ETH_P_IP)) + return TC_ACT_UNSPEC; + if (skb_unclone(skb, GFP_ATOMIC)) return TC_ACT_UNSPEC; + if (!tcf_ipt_act_check(skb)) + return TC_ACT_UNSPEC; + + if (state.hook == NF_INET_POST_ROUTING) { + if (!skb_dst(skb)) + return TC_ACT_UNSPEC; + + state.out = skb->dev; + } + spin_lock(&ipt->tcf_lock); tcf_lastuse_update(&ipt->tcf_tm); From 93d75d475c5dc3404292976147d063ee4d808592 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Jun 2023 14:38:13 +0200 Subject: [PATCH 05/70] net/sched: act_ipt: zero skb->cb before calling target xtables relies on skb being owned by ip stack, i.e. with ipv4 check in place skb->cb is supposed to be IPCB. I don't see an immediate problem (REJECT target cannot be used anymore now that PRE/POSTROUTING hook validation has been fixed), but better be safe than sorry. A much better patch would be to either mark act_ipt as "depends on BROKEN" or remove it altogether. I plan to do this for -next in the near future. This tc extension is broken in the sense that tc lacks an equivalent of NF_STOLEN verdict. With NF_STOLEN, target function takes complete ownership of skb, caller cannot dereference it anymore. ACT_STOLEN cannot be used for this: it has a different meaning, caller is allowed to dereference the skb. At this time NF_STOLEN won't be returned by any targets as far as I can see, but this may change in the future. It might be possible to work around this via list of allowed target extensions known to only return DROP or ACCEPT verdicts, but this is error prone/fragile. Existing selftest only validates xt_LOG and act_ipt is restricted to ipv4 so I don't think this action is used widely. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Florian Westphal Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/act_ipt.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index a6b522b512dc..598d6e299152 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -254,6 +255,7 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { + char saved_cb[sizeof_field(struct sk_buff, cb)]; int ret = 0, result = 0; struct tcf_ipt *ipt = to_ipt(a); struct xt_action_param par; @@ -280,6 +282,8 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, state.out = skb->dev; } + memcpy(saved_cb, skb->cb, sizeof(saved_cb)); + spin_lock(&ipt->tcf_lock); tcf_lastuse_update(&ipt->tcf_tm); @@ -292,6 +296,9 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, par.state = &state; par.target = ipt->tcfi_t->u.kernel.target; par.targinfo = ipt->tcfi_t->data; + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + ret = par.target->target(skb, &par); switch (ret) { @@ -312,6 +319,9 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, break; } spin_unlock(&ipt->tcf_lock); + + memcpy(skb->cb, saved_cb, sizeof(skb->cb)); + return result; } From 4fd44b82b7aceaa35c2901c6546d2c4198e0799d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 27 Jun 2023 19:31:12 +0300 Subject: [PATCH 06/70] net: mscc: ocelot: don't report that RX timestamping is enabled by default PTP RX timestamping should be enabled when the user requests it, not by default. If it is enabled by default, it can be problematic when the ocelot driver is a DSA master, and it sidesteps what DSA tries to avoid through __dsa_master_hwtstamp_validate(). Additionally, after the change which made ocelot trap PTP packets only to the CPU at ocelot_hwtstamp_set() time, it is no longer even true that RX timestamping is enabled by default, because until ocelot_hwtstamp_set() is called, the PTP traps are actually not set up. So the rx_filter field of ocelot->hwtstamp_config reflects an incorrect reality. Fixes: 96ca08c05838 ("net: mscc: ocelot: set up traps for PTP packets") Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mscc/ocelot_ptp.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index 2180ae94c744..673bfd70867a 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -824,11 +824,6 @@ int ocelot_init_timestamp(struct ocelot *ocelot, ocelot_write(ocelot, PTP_CFG_MISC_PTP_EN, PTP_CFG_MISC); - /* There is no device reconfiguration, PTP Rx stamping is always - * enabled. - */ - ocelot->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - return 0; } EXPORT_SYMBOL(ocelot_init_timestamp); From 45d0fcb5bc9558d0bf3d2fa7fabc5d8a88d35439 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 27 Jun 2023 19:31:13 +0300 Subject: [PATCH 07/70] net: mscc: ocelot: don't keep PTP configuration of all ports in single structure In a future change, the driver will need to determine whether PTP RX timestamping is enabled on a port (including whether traps were set up on that port in particular) and that is currently not possible. The driver supports different RX filters (L2, L4) and kinds of TX timestamping (one-step, two-step) on its ports, but it saves all configuration in a single struct hwtstamp_config that is global to the switch. So, the latest timestamping configuration on one port (including a request to disable timestamping) affects what gets reported for all ports, even though the configuration itself is still individual to each port. The port timestamping configurations are only coupled because of the common structure, so replace the hwtstamp_config with a mask of trapped protocols saved per port. We also have the ptp_cmd to distinguish between one-step and two-step PTP timestamping, so with those 2 bits of information we can fully reconstruct a descriptive struct hwtstamp_config for each port, during the SIOCGHWTSTAMP ioctl. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Fixes: 96ca08c05838 ("net: mscc: ocelot: set up traps for PTP packets") Signed-off-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mscc/ocelot.c | 1 - drivers/net/ethernet/mscc/ocelot_ptp.c | 61 +++++++++++++++++--------- include/soc/mscc/ocelot.h | 10 +++-- 3 files changed, 48 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 1f5f00b30441..2fa833d041ba 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2925,7 +2925,6 @@ int ocelot_init(struct ocelot *ocelot) } } - mutex_init(&ocelot->ptp_lock); mutex_init(&ocelot->mact_lock); mutex_init(&ocelot->fwd_domain_lock); mutex_init(&ocelot->tas_lock); diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index 673bfd70867a..cb32234a5bf1 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -439,8 +439,12 @@ static int ocelot_ipv6_ptp_trap_del(struct ocelot *ocelot, int port) static int ocelot_setup_ptp_traps(struct ocelot *ocelot, int port, bool l2, bool l4) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; int err; + ocelot_port->trap_proto &= ~(OCELOT_PROTO_PTP_L2 | + OCELOT_PROTO_PTP_L4); + if (l2) err = ocelot_l2_ptp_trap_add(ocelot, port); else @@ -464,6 +468,11 @@ static int ocelot_setup_ptp_traps(struct ocelot *ocelot, int port, if (err) return err; + if (l2) + ocelot_port->trap_proto |= OCELOT_PROTO_PTP_L2; + if (l4) + ocelot_port->trap_proto |= OCELOT_PROTO_PTP_L4; + return 0; err_ipv6: @@ -474,10 +483,38 @@ err_ipv4: return err; } +static int ocelot_traps_to_ptp_rx_filter(unsigned int proto) +{ + if ((proto & OCELOT_PROTO_PTP_L2) && (proto & OCELOT_PROTO_PTP_L4)) + return HWTSTAMP_FILTER_PTP_V2_EVENT; + else if (proto & OCELOT_PROTO_PTP_L2) + return HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + else if (proto & OCELOT_PROTO_PTP_L4) + return HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + + return HWTSTAMP_FILTER_NONE; +} + int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr) { - return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config, - sizeof(ocelot->hwtstamp_config)) ? -EFAULT : 0; + struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct hwtstamp_config cfg = {}; + + switch (ocelot_port->ptp_cmd) { + case IFH_REW_OP_TWO_STEP_PTP: + cfg.tx_type = HWTSTAMP_TX_ON; + break; + case IFH_REW_OP_ORIGIN_PTP: + cfg.tx_type = HWTSTAMP_TX_ONESTEP_SYNC; + break; + default: + cfg.tx_type = HWTSTAMP_TX_OFF; + break; + } + + cfg.rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); + + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } EXPORT_SYMBOL(ocelot_hwstamp_get); @@ -509,8 +546,6 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) return -ERANGE; } - mutex_lock(&ocelot->ptp_lock); - switch (cfg.rx_filter) { case HWTSTAMP_FILTER_NONE: break; @@ -531,28 +566,14 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) l4 = true; break; default: - mutex_unlock(&ocelot->ptp_lock); return -ERANGE; } err = ocelot_setup_ptp_traps(ocelot, port, l2, l4); - if (err) { - mutex_unlock(&ocelot->ptp_lock); + if (err) return err; - } - if (l2 && l4) - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - else if (l2) - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; - else if (l4) - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; - else - cfg.rx_filter = HWTSTAMP_FILTER_NONE; - - /* Commit back the result & save it */ - memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg)); - mutex_unlock(&ocelot->ptp_lock); + cfg.rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index cb8fbb241879..22aae505c813 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -730,6 +730,11 @@ enum macaccess_entry_type { ENTRYTYPE_MACv6, }; +enum ocelot_proto { + OCELOT_PROTO_PTP_L2 = BIT(0), + OCELOT_PROTO_PTP_L4 = BIT(1), +}; + #define OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION BIT(0) #define OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP BIT(1) @@ -775,6 +780,8 @@ struct ocelot_port { unsigned int ptp_skbs_in_flight; struct sk_buff_head tx_skbs; + unsigned int trap_proto; + u16 mrp_ring_id; u8 ptp_cmd; @@ -868,12 +875,9 @@ struct ocelot { u8 mm_supported:1; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_info; - struct hwtstamp_config hwtstamp_config; unsigned int ptp_skbs_in_flight; /* Protects the 2-step TX timestamp ID logic */ spinlock_t ts_id_lock; - /* Protects the PTP interface state */ - struct mutex ptp_lock; /* Protects the PTP clock */ spinlock_t ptp_clock_lock; struct ptp_pin_desc ptp_pins[OCELOT_PTP_PINS_NUM]; From 2edcfcbb3c5946609be1d8875473a240b170673b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 27 Jun 2023 19:31:14 +0300 Subject: [PATCH 08/70] net: dsa: felix: don't drop PTP frames with tag_8021q when RX timestamping is disabled The driver implements a workaround for the fact that it doesn't have an IRQ source to tell it whether PTP frames are available through the extraction registers, for those frames to be processed and passed towards the network stack. That workaround is to configure the switch, through felix_hwtstamp_set() -> felix_update_trapping_destinations(), to create two copies of PTP packets: one sent over Ethernet to the DSA master, and one to be consumed through the aforementioned CPU extraction queue registers. The reason why we want PTP packets to be consumed through the CPU extraction registers in the first place is because we want to see their hardware RX timestamp. With tag_8021q, that is only visible that way, and it isn't visible with the copy of the packet that's transmitted over Ethernet. The problem with the workaround implementation is that it drops the packet received over Ethernet, in expectation of its copy being present in the CPU extraction registers. However, if felix_hwtstamp_set() hasn't run (aka PTP RX timestamping is disabled), the driver will drop the original PTP frame and there will be no copy of it in the CPU extraction registers. So, the network stack will simply not see any PTP frame. Look at the port's trapping configuration to see whether the driver has previously enabled the CPU extraction registers. If it hasn't, just don't RX timestamp the frame and let it be passed up the stack by DSA, which is perfectly fine. Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- drivers/net/dsa/ocelot/felix.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 80861ac090ae..70c0e2b1936b 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1725,6 +1725,18 @@ static bool felix_rxtstamp(struct dsa_switch *ds, int port, u32 tstamp_hi; u64 tstamp; + switch (type & PTP_CLASS_PMASK) { + case PTP_CLASS_L2: + if (!(ocelot->ports[port]->trap_proto & OCELOT_PROTO_PTP_L2)) + return false; + break; + case PTP_CLASS_IPV4: + case PTP_CLASS_IPV6: + if (!(ocelot->ports[port]->trap_proto & OCELOT_PROTO_PTP_L4)) + return false; + break; + } + /* If the "no XTR IRQ" workaround is in use, tell DSA to defer this skb * for RX timestamping. Then free it, and poll for its copy through * MMIO in the CPU port module, and inject that into the stack from From b4638af8885af93cd70351081da1909c59342440 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 27 Jun 2023 12:42:06 +0300 Subject: [PATCH 09/70] net: dsa: sja1105: always enable the INCL_SRCPT option Link-local traffic on bridged SJA1105 ports is sometimes tagged by the hardware with source port information (when the port is under a VLAN aware bridge). The tag_8021q source port identification has become more loose ("imprecise") and will report a plausible rather than exact bridge port, when under a bridge (be it VLAN-aware or VLAN-unaware). But link-local traffic always needs to know the precise source port. Modify the driver logic (and therefore: the tagging protocol itself) to always include the source port information with link-local packets, regardless of whether the port is standalone, under a VLAN-aware or VLAN-unaware bridge. This makes it possible for the tagging driver to give priority to that information over the tag_8021q VLAN header. The big drawback with INCL_SRCPT is that it makes it impossible to distinguish between an original MAC DA of 01:80:C2:XX:YY:ZZ and 01:80:C2:AA:BB:ZZ, because the tagger just patches MAC DA bytes 3 and 4 with zeroes. Only if PTP RX timestamping is enabled, the switch will generate a META follow-up frame containing the RX timestamp and the original bytes 3 and 4 of the MAC DA. Those will be used to patch up the original packet. Nonetheless, in the absence of PTP RX timestamping, we have to live with this limitation, since it is more important to have the more precise source port information for link-local traffic. Fixes: d7f9787a763f ("net: dsa: tag_8021q: add support for imprecise RX based on the VBID") Fixes: 91495f21fcec ("net: dsa: tag_8021q: replace the SVL bridging with VLAN-unaware IVL bridging") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/dsa/sja1105/sja1105_main.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index a55a6436fc05..dd154b2b9680 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -866,11 +866,11 @@ static int sja1105_init_general_params(struct sja1105_private *priv) .hostprio = 7, .mac_fltres1 = SJA1105_LINKLOCAL_FILTER_A, .mac_flt1 = SJA1105_LINKLOCAL_FILTER_A_MASK, - .incl_srcpt1 = false, + .incl_srcpt1 = true, .send_meta1 = false, .mac_fltres0 = SJA1105_LINKLOCAL_FILTER_B, .mac_flt0 = SJA1105_LINKLOCAL_FILTER_B_MASK, - .incl_srcpt0 = false, + .incl_srcpt0 = true, .send_meta0 = false, /* Default to an invalid value */ .mirr_port = priv->ds->num_ports, @@ -2405,11 +2405,6 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, general_params->tpid = tpid; /* EtherType used to identify outer tagged (S-tag) VLAN traffic */ general_params->tpid2 = tpid2; - /* When VLAN filtering is on, we need to at least be able to - * decode management traffic through the "backup plan". - */ - general_params->incl_srcpt1 = enabled; - general_params->incl_srcpt0 = enabled; for (port = 0; port < ds->num_ports; port++) { if (dsa_is_unused_port(ds, port)) From c1ae02d876898b1b8ca1e12c6f84d7b406263800 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 27 Jun 2023 12:42:07 +0300 Subject: [PATCH 10/70] net: dsa: tag_sja1105: always prefer source port information from INCL_SRCPT Currently the sja1105 tagging protocol prefers using the source port information from the VLAN header if that is available, falling back to the INCL_SRCPT option if it isn't. The VLAN header is available for all frames except for META frames initiated by the switch (containing RX timestamps), and thus, the "if (is_link_local)" branch is practically dead. The tag_8021q source port identification has become more loose ("imprecise") and will report a plausible rather than exact bridge port, when under a bridge (be it VLAN-aware or VLAN-unaware). But link-local traffic always needs to know the precise source port. With incorrect source port reporting, for example PTP traffic over 2 bridged ports will all be seen on sockets opened on the first such port, which is incorrect. Now that the tagging protocol has been changed to make link-local frames always contain source port information, we can reverse the order of the checks so that we always give precedence to that information (which is always precise) in lieu of the tag_8021q VID which is only precise for a standalone port. Fixes: d7f9787a763f ("net: dsa: tag_8021q: add support for imprecise RX based on the VBID") Fixes: 91495f21fcec ("net: dsa: tag_8021q: replace the SVL bridging with VLAN-unaware IVL bridging") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/dsa/tag_sja1105.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index a5f3b73da417..92a626a05e82 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -545,10 +545,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, is_link_local = sja1105_is_link_local(skb); is_meta = sja1105_is_meta_frame(skb); - if (sja1105_skb_has_tag_8021q(skb)) { - /* Normal traffic path. */ - sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid); - } else if (is_link_local) { + if (is_link_local) { /* Management traffic path. Switch embeds the switch ID and * port ID into bytes of the destination MAC, courtesy of * the incl_srcpt options. @@ -562,16 +559,39 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, sja1105_meta_unpack(skb, &meta); source_port = meta.source_port; switch_id = meta.switch_id; - } else { + } + + /* Normal data plane traffic and link-local frames are tagged with + * a tag_8021q VLAN which we have to strip + */ + if (sja1105_skb_has_tag_8021q(skb)) { + int tmp_source_port = -1, tmp_switch_id = -1; + + sja1105_vlan_rcv(skb, &tmp_source_port, &tmp_switch_id, &vbid, + &vid); + /* Preserve the source information from the INCL_SRCPT option, + * if available. This allows us to not overwrite a valid source + * port and switch ID with zeroes when receiving link-local + * frames from a VLAN-unaware bridged port (non-zero vbid) or a + * VLAN-aware bridged port (non-zero vid). + */ + if (source_port == -1) + source_port = tmp_source_port; + if (switch_id == -1) + switch_id = tmp_switch_id; + } else if (source_port == -1 && switch_id == -1) { + /* Packets with no source information have no chance of + * getting accepted, drop them straight away. + */ return NULL; } - if (vbid >= 1) - skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); - else if (source_port == -1 || switch_id == -1) - skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); - else + if (source_port != -1 && switch_id != -1) skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + else if (vbid >= 1) + skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); + else + skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; From 046f753da6143ee16452966915087ec8b0de3c70 Mon Sep 17 00:00:00 2001 From: Tobias Heider Date: Wed, 28 Jun 2023 02:13:32 +0200 Subject: [PATCH 11/70] Add MODULE_FIRMWARE() for FIRMWARE_TG357766. Fixes a bug where on the M1 mac mini initramfs-tools fails to include the necessary firmware into the initrd. Fixes: c4dab50697ff ("tg3: Download 57766 EEE service patch firmware") Signed-off-by: Tobias Heider Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/ZJt7LKzjdz8+dClx@tobhe.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 5e68a6a4b2af..5ef073a79ce9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -225,6 +225,7 @@ MODULE_AUTHOR("David S. Miller (davem@redhat.com) and Jeff Garzik (jgarzik@pobox MODULE_DESCRIPTION("Broadcom Tigon3 ethernet driver"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FIRMWARE_TG3); +MODULE_FIRMWARE(FIRMWARE_TG357766); MODULE_FIRMWARE(FIRMWARE_TG3TSO); MODULE_FIRMWARE(FIRMWARE_TG3TSO5); From bb23f07cb63975968bbabe314486e2b087234fc5 Mon Sep 17 00:00:00 2001 From: Dan Gora Date: Tue, 9 May 2023 12:51:19 -0700 Subject: [PATCH 12/70] Bluetooth: btrtl: Add missing MODULE_FIRMWARE declarations Add missing MODULE_FIRMWARE declarations for firmware referenced in btrtl.c. Signed-off-by: Dan Gora Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- drivers/bluetooth/btrtl.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 2915c82d719d..d978e7cea873 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -1367,14 +1367,30 @@ MODULE_FIRMWARE("rtl_bt/rtl8723cs_vf_fw.bin"); MODULE_FIRMWARE("rtl_bt/rtl8723cs_vf_config.bin"); MODULE_FIRMWARE("rtl_bt/rtl8723cs_xx_fw.bin"); MODULE_FIRMWARE("rtl_bt/rtl8723cs_xx_config.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8723d_fw.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8723d_config.bin"); MODULE_FIRMWARE("rtl_bt/rtl8723ds_fw.bin"); MODULE_FIRMWARE("rtl_bt/rtl8723ds_config.bin"); MODULE_FIRMWARE("rtl_bt/rtl8761a_fw.bin"); MODULE_FIRMWARE("rtl_bt/rtl8761a_config.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8761b_fw.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8761b_config.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8761bu_fw.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8761bu_config.bin"); MODULE_FIRMWARE("rtl_bt/rtl8821a_fw.bin"); MODULE_FIRMWARE("rtl_bt/rtl8821a_config.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8821c_fw.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8821c_config.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8821cs_fw.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8821cs_config.bin"); MODULE_FIRMWARE("rtl_bt/rtl8822b_fw.bin"); MODULE_FIRMWARE("rtl_bt/rtl8822b_config.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8822cs_fw.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8822cs_config.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8822cu_fw.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8822cu_config.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8851bu_fw.bin"); +MODULE_FIRMWARE("rtl_bt/rtl8851bu_config.bin"); MODULE_FIRMWARE("rtl_bt/rtl8852au_fw.bin"); MODULE_FIRMWARE("rtl_bt/rtl8852au_config.bin"); MODULE_FIRMWARE("rtl_bt/rtl8852bs_fw.bin"); @@ -1383,5 +1399,3 @@ MODULE_FIRMWARE("rtl_bt/rtl8852bu_fw.bin"); MODULE_FIRMWARE("rtl_bt/rtl8852bu_config.bin"); MODULE_FIRMWARE("rtl_bt/rtl8852cu_fw.bin"); MODULE_FIRMWARE("rtl_bt/rtl8852cu_config.bin"); -MODULE_FIRMWARE("rtl_bt/rtl8851bu_fw.bin"); -MODULE_FIRMWARE("rtl_bt/rtl8851bu_config.bin"); From 022b6101c28a33fa8073b284423b88dc46b03d87 Mon Sep 17 00:00:00 2001 From: Dan Gora Date: Tue, 9 May 2023 12:45:24 -0700 Subject: [PATCH 13/70] Bluetooth: btusb: Add device 6655:8771 to device tables This device is an Inspire branded BT 5.1 USB dongle with a Realtek RTL8761BU chip using the "Best Buy China" vendor ID. The device table is as follows: T: Bus=01 Lev=01 Prnt=02 Port=09 Cnt=01 Dev#= 7 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=6655 ProdID=8771 Rev=02.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00E04C239987 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms Signed-off-by: Dan Gora Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 2a8e2bb038f5..0b0a02cf0d4a 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -655,6 +655,8 @@ static const struct usb_device_id blacklist_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x8771), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x6655, 0x8771), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x7392, 0xc611), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x2b89, 0x8761), .driver_info = BTUSB_REALTEK | From 8153b738bc547878a017889d2b1cf8dd2de0e0c6 Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Fri, 19 May 2023 18:43:23 +0800 Subject: [PATCH 14/70] Bluetooth: btqca: use le32_to_cpu for ver.soc_id Use le32_to_cpu for ver.soc_id to fix the following sparse warning. drivers/bluetooth/btqca.c:640:24: sparse: warning: restricted __le32 degrades to integer Signed-off-by: Min-Hua Chen Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index fd0941fe8608..e7e58a956d15 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -637,7 +637,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); else if (qca_is_wcn399x(soc_type)) { - if (ver.soc_id == QCA_WCN3991_SOC_ID) { + if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { snprintf(config.fwname, sizeof(config.fwname), "qca/crnv%02xu.bin", rom_ver); } else { From f752a0b334bb95fe9b42ecb511e0864e2768046f Mon Sep 17 00:00:00 2001 From: Zhengping Jiang Date: Wed, 24 May 2023 17:04:15 -0700 Subject: [PATCH 15/70] Bluetooth: L2CAP: Fix use-after-free Fix potential use-after-free in l2cap_le_command_rej. Signed-off-by: Zhengping Jiang Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- net/bluetooth/l2cap_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c5e8798e297c..17ca13e8c044 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6374,9 +6374,14 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn, if (!chan) goto done; + chan = l2cap_chan_hold_unless_zero(chan); + if (!chan) + goto done; + l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNREFUSED); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); done: mutex_unlock(&conn->chan_lock); From 0cb7365850bacb8c2a9975cae672d65714d8daa1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 31 May 2023 11:04:23 +0200 Subject: [PATCH 16/70] Bluetooth: fix invalid-bdaddr quirk for non-persistent setup Devices that lack persistent storage for the device address can indicate this by setting the HCI_QUIRK_INVALID_BDADDR which causes the controller to be marked as unconfigured until user space has set a valid address. Once configured, the device address must be set on every setup for controllers with HCI_QUIRK_NON_PERSISTENT_SETUP to avoid marking the controller as unconfigured and requiring the address to be set again. Fixes: 740011cfe948 ("Bluetooth: Add new quirk for non-persistent setup settings") Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- net/bluetooth/hci_sync.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 804cde43b4e0..b5b1b610df33 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4626,23 +4626,17 @@ static int hci_dev_setup_sync(struct hci_dev *hdev) invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); if (!ret) { - if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) { - if (!bacmp(&hdev->public_addr, BDADDR_ANY)) - hci_dev_get_bd_addr_from_property(hdev); + if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks) && + !bacmp(&hdev->public_addr, BDADDR_ANY)) + hci_dev_get_bd_addr_from_property(hdev); - if (bacmp(&hdev->public_addr, BDADDR_ANY) && - hdev->set_bdaddr) { - ret = hdev->set_bdaddr(hdev, - &hdev->public_addr); - - /* If setting of the BD_ADDR from the device - * property succeeds, then treat the address - * as valid even if the invalid BD_ADDR - * quirk indicates otherwise. - */ - if (!ret) - invalid_bdaddr = false; - } + if ((invalid_bdaddr || + test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) && + bacmp(&hdev->public_addr, BDADDR_ANY) && + hdev->set_bdaddr) { + ret = hdev->set_bdaddr(hdev, &hdev->public_addr); + if (!ret) + invalid_bdaddr = false; } } From 6945795bc81ab7be22750ecfb365056688f2fada Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 31 May 2023 11:04:24 +0200 Subject: [PATCH 17/70] Bluetooth: fix use-bdaddr-property quirk Devices that lack persistent storage for the device address can indicate this by setting the HCI_QUIRK_INVALID_BDADDR which causes the controller to be marked as unconfigured until user space has set a valid address. The related HCI_QUIRK_USE_BDADDR_PROPERTY was later added to similarly indicate that the device lacks a valid address but that one may be specified in the devicetree. As is clear from commit 7a0e5b15ca45 ("Bluetooth: Add quirk for reading BD_ADDR from fwnode property") that added and documented this quirk and commits like de79a9df1692 ("Bluetooth: btqcomsmd: use HCI_QUIRK_USE_BDADDR_PROPERTY"), the device address of controllers with this flag should be treated as invalid until user space has had a chance to configure the controller in case the devicetree property is missing. As it does not make sense to allow controllers with invalid addresses, restore the original semantics, which also makes sure that the implementation is consistent (e.g. get_missing_options() indicates that the address must be set) and matches the documentation (including comments in the code, such as, "In case any of them is set, the controller has to start up as unconfigured."). Fixes: e668eb1e1578 ("Bluetooth: hci_core: Don't stop BT if the BD address missing in dts") Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- net/bluetooth/hci_sync.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index b5b1b610df33..8561616abbe5 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4623,16 +4623,14 @@ static int hci_dev_setup_sync(struct hci_dev *hdev) * BD_ADDR invalid before creating the HCI device or in * its setup callback. */ - invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks); - + invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) || + test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); if (!ret) { if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks) && !bacmp(&hdev->public_addr, BDADDR_ANY)) hci_dev_get_bd_addr_from_property(hdev); - if ((invalid_bdaddr || - test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) && - bacmp(&hdev->public_addr, BDADDR_ANY) && + if (invalid_bdaddr && bacmp(&hdev->public_addr, BDADDR_ANY) && hdev->set_bdaddr) { ret = hdev->set_bdaddr(hdev, &hdev->public_addr); if (!ret) From 1728137b33c00d5a2b5110ed7aafb42e7c32e4a1 Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Wed, 31 May 2023 01:39:56 -0400 Subject: [PATCH 18/70] Bluetooth: L2CAP: Fix use-after-free in l2cap_sock_ready_cb l2cap_sock_release(sk) frees sk. However, sk's children are still alive and point to the already free'd sk's address. To fix this, l2cap_sock_release(sk) also cleans sk's children. ================================================================== BUG: KASAN: use-after-free in l2cap_sock_ready_cb+0xb7/0x100 net/bluetooth/l2cap_sock.c:1650 Read of size 8 at addr ffff888104617aa8 by task kworker/u3:0/276 CPU: 0 PID: 276 Comm: kworker/u3:0 Not tainted 6.2.0-00001-gef397bd4d5fb-dirty #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Workqueue: hci2 hci_rx_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x72/0x95 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:306 [inline] print_report+0x175/0x478 mm/kasan/report.c:417 kasan_report+0xb1/0x130 mm/kasan/report.c:517 l2cap_sock_ready_cb+0xb7/0x100 net/bluetooth/l2cap_sock.c:1650 l2cap_chan_ready+0x10e/0x1e0 net/bluetooth/l2cap_core.c:1386 l2cap_config_req+0x753/0x9f0 net/bluetooth/l2cap_core.c:4480 l2cap_bredr_sig_cmd net/bluetooth/l2cap_core.c:5739 [inline] l2cap_sig_channel net/bluetooth/l2cap_core.c:6509 [inline] l2cap_recv_frame+0xe2e/0x43c0 net/bluetooth/l2cap_core.c:7788 l2cap_recv_acldata+0x6ed/0x7e0 net/bluetooth/l2cap_core.c:8506 hci_acldata_packet net/bluetooth/hci_core.c:3813 [inline] hci_rx_work+0x66e/0xbc0 net/bluetooth/hci_core.c:4048 process_one_work+0x4ea/0x8e0 kernel/workqueue.c:2289 worker_thread+0x364/0x8e0 kernel/workqueue.c:2436 kthread+0x1b9/0x200 kernel/kthread.c:376 ret_from_fork+0x2c/0x50 arch/x86/entry/entry_64.S:308 Allocated by task 288: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0x82/0x90 mm/kasan/common.c:383 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:968 [inline] __kmalloc+0x5a/0x140 mm/slab_common.c:981 kmalloc include/linux/slab.h:584 [inline] sk_prot_alloc+0x113/0x1f0 net/core/sock.c:2040 sk_alloc+0x36/0x3c0 net/core/sock.c:2093 l2cap_sock_alloc.constprop.0+0x39/0x1c0 net/bluetooth/l2cap_sock.c:1852 l2cap_sock_create+0x10d/0x220 net/bluetooth/l2cap_sock.c:1898 bt_sock_create+0x183/0x290 net/bluetooth/af_bluetooth.c:132 __sock_create+0x226/0x380 net/socket.c:1518 sock_create net/socket.c:1569 [inline] __sys_socket_create net/socket.c:1606 [inline] __sys_socket_create net/socket.c:1591 [inline] __sys_socket+0x112/0x200 net/socket.c:1639 __do_sys_socket net/socket.c:1652 [inline] __se_sys_socket net/socket.c:1650 [inline] __x64_sys_socket+0x40/0x50 net/socket.c:1650 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3f/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc Freed by task 288: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2e/0x50 mm/kasan/generic.c:523 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free mm/kasan/common.c:200 [inline] __kasan_slab_free+0x10a/0x190 mm/kasan/common.c:244 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1781 [inline] slab_free_freelist_hook mm/slub.c:1807 [inline] slab_free mm/slub.c:3787 [inline] __kmem_cache_free+0x88/0x1f0 mm/slub.c:3800 sk_prot_free net/core/sock.c:2076 [inline] __sk_destruct+0x347/0x430 net/core/sock.c:2168 sk_destruct+0x9c/0xb0 net/core/sock.c:2183 __sk_free+0x82/0x220 net/core/sock.c:2194 sk_free+0x7c/0xa0 net/core/sock.c:2205 sock_put include/net/sock.h:1991 [inline] l2cap_sock_kill+0x256/0x2b0 net/bluetooth/l2cap_sock.c:1257 l2cap_sock_release+0x1a7/0x220 net/bluetooth/l2cap_sock.c:1428 __sock_release+0x80/0x150 net/socket.c:650 sock_close+0x19/0x30 net/socket.c:1368 __fput+0x17a/0x5c0 fs/file_table.c:320 task_work_run+0x132/0x1c0 kernel/task_work.c:179 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x113/0x120 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x21/0x50 kernel/entry/common.c:296 do_syscall_64+0x4c/0x90 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x72/0xdc The buggy address belongs to the object at ffff888104617800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 680 bytes inside of 1024-byte region [ffff888104617800, ffff888104617c00) The buggy address belongs to the physical page: page:00000000dbca6a80 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888104614000 pfn:0x104614 head:00000000dbca6a80 order:2 compound_mapcount:0 subpages_mapcount:0 compound_pincount:0 flags: 0x200000000010200(slab|head|node=0|zone=2) raw: 0200000000010200 ffff888100041dc0 ffffea0004212c10 ffffea0004234b10 raw: ffff888104614000 0000000000080002 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888104617980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888104617a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888104617a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888104617b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888104617b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Ack: This bug is found by FuzzBT with a modified Syzkaller. Other contributors are Ruoyu Wu and Hui Peng. Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- net/bluetooth/l2cap_sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index eebe256104bc..947ca580bb9a 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -46,6 +46,7 @@ static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern); +static void l2cap_sock_cleanup_listen(struct sock *parent); bool l2cap_is_socket(struct socket *sock) { @@ -1415,6 +1416,7 @@ static int l2cap_sock_release(struct socket *sock) if (!sk) return 0; + l2cap_sock_cleanup_listen(sk); bt_sock_unlink(&l2cap_sk_list, sk); err = l2cap_sock_shutdown(sock, SHUT_RDWR); From 56b7f325db139c9255b1eb1d1e741576d5f8fa34 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 2 Jun 2023 10:19:12 +0200 Subject: [PATCH 19/70] Bluetooth: hci_bcm: do not mark valid bd_addr as invalid A recent commit restored the original (and still documented) semantics for the HCI_QUIRK_USE_BDADDR_PROPERTY quirk so that the device address is considered invalid unless an address is provided by firmware. This specifically means that this flag must only be set for devices with invalid addresses, but the Broadcom driver has so far been setting this flag unconditionally. Fortunately the driver already checks for invalid addresses during setup and sets the HCI_QUIRK_INVALID_BDADDR flag. Use this flag to indicate when the address can be overridden by firmware (long term, this should probably just always be allowed). Fixes: 6945795bc81a ("Bluetooth: fix use-bdaddr-property quirk") Reported-by: Marek Szyprowski Link: https://lore.kernel.org/lkml/ecef83c8-497f-4011-607b-a63c24764867@samsung.com Signed-off-by: Johan Hovold Tested-by: Marek Szyprowski Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- drivers/bluetooth/hci_bcm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 83bf5d4330c4..874d23089b39 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -643,7 +643,8 @@ static int bcm_setup(struct hci_uart *hu) * Allow the bootloader to set a valid address through the * device tree. */ - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hu->hdev->quirks); + if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hu->hdev->quirks)) + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hu->hdev->quirks); if (!bcm_request_irq(bcm)) err = bcm_setup_sleep(hu); From 6b9545dc9f8ff01d8bc1229103960d9cd265343f Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 1 Jun 2023 09:34:43 +0300 Subject: [PATCH 20/70] Bluetooth: ISO: use hci_sync for setting CIG parameters When reconfiguring CIG after disconnection of the last CIS, LE Remove CIG shall be sent before LE Set CIG Parameters. Otherwise, it fails because CIG is in the inactive state and not configurable (Core v5.3 Vol 6 Part B Sec. 4.5.14.3). This ordering is currently wrong under suitable timing conditions, because LE Remove CIG is sent via the hci_sync queue and may be delayed, but Set CIG Parameters is via hci_send_cmd. Make the ordering well-defined by sending also Set CIG Parameters via hci_sync. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- net/bluetooth/hci_conn.c | 47 +++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1ef952bda97d..2275e0d9f841 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -775,6 +775,11 @@ static void le_conn_timeout(struct work_struct *work) hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); } +struct iso_cig_params { + struct hci_cp_le_set_cig_params cp; + struct hci_cis_params cis[0x1f]; +}; + struct iso_list_data { union { u8 cig; @@ -786,10 +791,7 @@ struct iso_list_data { u16 sync_handle; }; int count; - struct { - struct hci_cp_le_set_cig_params cp; - struct hci_cis_params cis[0x11]; - } pdu; + struct iso_cig_params pdu; }; static void bis_list(struct hci_conn *conn, void *data) @@ -1764,10 +1766,33 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos) return hci_send_cmd(hdev, HCI_OP_LE_CREATE_BIG, sizeof(cp), &cp); } +static void set_cig_params_complete(struct hci_dev *hdev, void *data, int err) +{ + struct iso_cig_params *pdu = data; + + bt_dev_dbg(hdev, ""); + + if (err) + bt_dev_err(hdev, "Unable to set CIG parameters: %d", err); + + kfree(pdu); +} + +static int set_cig_params_sync(struct hci_dev *hdev, void *data) +{ + struct iso_cig_params *pdu = data; + u32 plen; + + plen = sizeof(pdu->cp) + pdu->cp.num_cis * sizeof(pdu->cis[0]); + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS, plen, pdu, + HCI_CMD_TIMEOUT); +} + static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) { struct hci_dev *hdev = conn->hdev; struct iso_list_data data; + struct iso_cig_params *pdu; memset(&data, 0, sizeof(data)); @@ -1837,12 +1862,18 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET || !data.pdu.cp.num_cis) return false; - if (hci_send_cmd(hdev, HCI_OP_LE_SET_CIG_PARAMS, - sizeof(data.pdu.cp) + - (data.pdu.cp.num_cis * sizeof(*data.pdu.cis)), - &data.pdu) < 0) + pdu = kzalloc(sizeof(*pdu), GFP_KERNEL); + if (!pdu) return false; + memcpy(pdu, &data.pdu, sizeof(*pdu)); + + if (hci_cmd_sync_queue(hdev, set_cig_params_sync, pdu, + set_cig_params_complete) < 0) { + kfree(pdu); + return false; + } + return true; } From db9cbcadc16e8b9f0b3ef5870f3a38ebafcbe8e0 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 3 Jun 2023 00:28:12 +0300 Subject: [PATCH 21/70] Bluetooth: hci_event: fix Set CIG Parameters error status handling If the event has error status, return right error code and don't show incorrect "response malformed" messages. Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- net/bluetooth/hci_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 09ba6d8987ee..1d493eefaabe 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3812,7 +3812,8 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_CIG_PARAMS); - if (!cp || rp->num_handles != cp->num_cis || rp->cig_id != cp->cig_id) { + if (!rp->status && (!cp || rp->num_handles != cp->num_cis || + rp->cig_id != cp->cig_id)) { bt_dev_err(hdev, "unexpected Set CIG Parameters response data"); status = HCI_ERROR_UNSPECIFIED; } From 73f55453ea5236a586a7f1b3d5e2ee051d655351 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 7 Jun 2023 12:33:47 -0700 Subject: [PATCH 22/70] Bluetooth: MGMT: Fix marking SCAN_RSP as not connectable When receiving a scan response there is no way to know if the remote device is connectable or not, so when it cannot be merged don't make any assumption and instead just mark it with a new flag defined as MGMT_DEV_FOUND_SCAN_RSP so userspace can tell it is a standalone SCAN_RSP. Link: https://lore.kernel.org/linux-bluetooth/CABBYNZ+CYMsDSPTxBn09Js3BcdC-x7vZFfyLJ3ppZGGwJKmUTw@mail.gmail.com/ Fixes: c70a7e4cc8d2 ("Bluetooth: Add support for Not Connectable flag for Device Found events") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- include/net/bluetooth/mgmt.h | 1 + net/bluetooth/hci_event.c | 15 +++++---------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index a5801649f619..5e68b3dd4422 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -979,6 +979,7 @@ struct mgmt_ev_auth_failed { #define MGMT_DEV_FOUND_NOT_CONNECTABLE BIT(2) #define MGMT_DEV_FOUND_INITIATED_CONN BIT(3) #define MGMT_DEV_FOUND_NAME_REQUEST_FAILED BIT(4) +#define MGMT_DEV_FOUND_SCAN_RSP BIT(5) #define MGMT_EV_DEVICE_FOUND 0x0012 struct mgmt_ev_device_found { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1d493eefaabe..2383153d5345 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6317,23 +6317,18 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, return; } - /* When receiving non-connectable or scannable undirected - * advertising reports, this means that the remote device is - * not connectable and then clearly indicate this in the - * device found event. - * - * When receiving a scan response, then there is no way to + /* When receiving a scan response, then there is no way to * know if the remote device is connectable or not. However * since scan responses are merged with a previously seen * advertising report, the flags field from that report * will be used. * - * In the really unlikely case that a controller get confused - * and just sends a scan response event, then it is marked as - * not connectable as well. + * In the unlikely case that a controller just sends a scan + * response event that doesn't match the pending report, then + * it is marked as a standalone SCAN_RSP. */ if (type == LE_ADV_SCAN_RSP) - flags = MGMT_DEV_FOUND_NOT_CONNECTABLE; + flags = MGMT_DEV_FOUND_SCAN_RSP; /* If there's nothing pending either store the data from this * event or send an immediate device found event if the data From 14f0dceca60b2fc4f2388505b25f9e6f71785e05 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 8 Jun 2023 11:12:18 -0700 Subject: [PATCH 23/70] Bluetooth: ISO: Rework sync_interval to be sync_factor This rework sync_interval to be sync_factor as having sync_interval in the order of seconds is sometimes not disarable. Wit sync_factor the application can tell how many SDU intervals it wants to send an announcement with PA, the EA interval is set to 2 times that so a factor of 24 of BIG SDU interval of 10ms would look like the following: < HCI Command: LE Set Extended Advertising Parameters (0x08|0x0036) plen 25 Handle: 0x01 Properties: 0x0000 Min advertising interval: 480.000 msec (0x0300) Max advertising interval: 480.000 msec (0x0300) Channel map: 37, 38, 39 (0x07) Own address type: Random (0x01) Peer address type: Public (0x00) Peer address: 00:00:00:00:00:00 (OUI 00-00-00) Filter policy: Allow Scan Request from Any, Allow Connect Request from Any (0x00) TX power: Host has no preference (0x7f) Primary PHY: LE 1M (0x01) Secondary max skip: 0x00 Secondary PHY: LE 2M (0x02) SID: 0x00 Scan request notifications: Disabled (0x00) < HCI Command: LE Set Periodic Advertising Parameters (0x08|0x003e) plen 7 Handle: 1 Min interval: 240.00 msec (0x00c0) Max interval: 240.00 msec (0x00c0) Properties: 0x0000 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- include/net/bluetooth/bluetooth.h | 2 +- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/iso.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 1b4230cd42a3..af729859385e 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -185,7 +185,7 @@ struct bt_iso_ucast_qos { struct bt_iso_bcast_qos { __u8 big; __u8 bis; - __u8 sync_interval; + __u8 sync_factor; __u8 packing; __u8 framing; struct bt_iso_io_qos in; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2275e0d9f841..24407a974b9c 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2075,10 +2075,10 @@ static int create_big_sync(struct hci_dev *hdev, void *data) flags |= MGMT_ADV_FLAG_SEC_2M; /* Align intervals */ - interval = qos->bcast.out.interval / 1250; + interval = (qos->bcast.out.interval / 1250) * qos->bcast.sync_factor; if (qos->bcast.bis) - sync_interval = qos->bcast.sync_interval * 1600; + sync_interval = interval * 4; err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->le_per_adv_data_len, conn->le_per_adv_data, flags, interval, diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 34d55a85d8f6..0e6cc57b3911 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -704,7 +704,7 @@ static struct bt_iso_qos default_qos = { .bcast = { .big = BT_ISO_QOS_BIG_UNSET, .bis = BT_ISO_QOS_BIS_UNSET, - .sync_interval = 0x00, + .sync_factor = 0x01, .packing = 0x00, .framing = 0x00, .in = DEFAULT_IO_QOS, @@ -1213,7 +1213,7 @@ static bool check_ucast_qos(struct bt_iso_qos *qos) static bool check_bcast_qos(struct bt_iso_qos *qos) { - if (qos->bcast.sync_interval > 0x07) + if (qos->bcast.sync_factor == 0x00) return false; if (qos->bcast.packing > 0x01) From d40d6f52d5bbbd7aa7092ba9fa793ad7dc253a35 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 20 Jun 2023 16:40:52 +0200 Subject: [PATCH 24/70] Bluetooth: hci_sysfs: make bt_class a static const structure Now that the driver core allows for struct class to be in read-only memory, move the bt_class structure to be declared at build time placing it into read-only memory, instead of having to be dynamically allocated at load time. Cc: Marcel Holtmann Cc: Johan Hedberg Cc: Luiz Augusto von Dentz Cc: linux-bluetooth@vger.kernel.org Suggested-by: Greg Kroah-Hartman Signed-off-by: Ivan Orlov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- net/bluetooth/hci_sysfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 2934d7f4d564..15b33579007c 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -6,7 +6,9 @@ #include #include -static struct class *bt_class; +static const struct class bt_class = { + .name = "bluetooth", +}; static void bt_link_release(struct device *dev) { @@ -36,7 +38,7 @@ void hci_conn_init_sysfs(struct hci_conn *conn) BT_DBG("conn %p", conn); conn->dev.type = &bt_link; - conn->dev.class = bt_class; + conn->dev.class = &bt_class; conn->dev.parent = &hdev->dev; device_initialize(&conn->dev); @@ -104,7 +106,7 @@ void hci_init_sysfs(struct hci_dev *hdev) struct device *dev = &hdev->dev; dev->type = &bt_host; - dev->class = bt_class; + dev->class = &bt_class; __module_get(THIS_MODULE); device_initialize(dev); @@ -112,12 +114,10 @@ void hci_init_sysfs(struct hci_dev *hdev) int __init bt_sysfs_init(void) { - bt_class = class_create("bluetooth"); - - return PTR_ERR_OR_ZERO(bt_class); + return class_register(&bt_class); } void bt_sysfs_cleanup(void) { - class_destroy(bt_class); + class_unregister(&bt_class); } From fa01eba11f0e57c767a5eab5291c7a01407a00be Mon Sep 17 00:00:00 2001 From: Matthew Anderson Date: Sat, 24 Jun 2023 12:08:10 -0500 Subject: [PATCH 25/70] Bluetooth: btusb: Add MT7922 bluetooth ID for the Asus Ally Adding the device ID from the Asus Ally gets the bluetooth working on the device. Signed-off-by: Matthew Anderson Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- drivers/bluetooth/btusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 0b0a02cf0d4a..5ec4ad0a5c86 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -613,6 +613,9 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH | BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x0489, 0xe0f5), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, { USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH | BTUSB_VALID_LE_STATES }, From 5b6d345d1b65d67624349e5de22227492c637576 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Sun, 25 Jun 2023 16:45:13 +0800 Subject: [PATCH 26/70] Bluetooth: hci_conn: Use kmemdup() to replace kzalloc + memcpy Use kmemdup rather than duplicating its implementation. ./net/bluetooth/hci_conn.c:1880:7-14: WARNING opportunity for kmemdup. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=5597 Signed-off-by: Jiapeng Chong Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- net/bluetooth/hci_conn.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 24407a974b9c..056f9516e46d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1862,12 +1862,10 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET || !data.pdu.cp.num_cis) return false; - pdu = kzalloc(sizeof(*pdu), GFP_KERNEL); + pdu = kmemdup(&data.pdu, sizeof(*pdu), GFP_KERNEL); if (!pdu) return false; - memcpy(pdu, &data.pdu, sizeof(*pdu)); - if (hci_cmd_sync_queue(hdev, set_cig_params_sync, pdu, set_cig_params_complete) < 0) { kfree(pdu); From 2be22f1941d5f661aa8043261d1bae5b6696c749 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 20 Jun 2023 15:41:11 -0700 Subject: [PATCH 27/70] Bluetooth: hci_event: Fix parsing of CIS Established Event The ISO Interval on CIS Established Event uses 1.25 ms slots: BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2304: Time = N * 1.25 ms In addition to that this always update the QoS settings based on CIS Established Event. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski --- net/bluetooth/hci_event.c | 45 ++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 2383153d5345..95816a938cea 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6786,6 +6786,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, { struct hci_evt_le_cis_established *ev = data; struct hci_conn *conn; + struct bt_iso_qos *qos; u16 handle = __le16_to_cpu(ev->handle); bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); @@ -6807,21 +6808,39 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, goto unlock; } - if (conn->role == HCI_ROLE_SLAVE) { - __le32 interval; + qos = &conn->iso_qos; - memset(&interval, 0, sizeof(interval)); + /* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */ + qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250; + qos->ucast.out.interval = qos->ucast.in.interval; - memcpy(&interval, ev->c_latency, sizeof(ev->c_latency)); - conn->iso_qos.ucast.in.interval = le32_to_cpu(interval); - memcpy(&interval, ev->p_latency, sizeof(ev->p_latency)); - conn->iso_qos.ucast.out.interval = le32_to_cpu(interval); - conn->iso_qos.ucast.in.latency = le16_to_cpu(ev->interval); - conn->iso_qos.ucast.out.latency = le16_to_cpu(ev->interval); - conn->iso_qos.ucast.in.sdu = le16_to_cpu(ev->c_mtu); - conn->iso_qos.ucast.out.sdu = le16_to_cpu(ev->p_mtu); - conn->iso_qos.ucast.in.phy = ev->c_phy; - conn->iso_qos.ucast.out.phy = ev->p_phy; + switch (conn->role) { + case HCI_ROLE_SLAVE: + /* Convert Transport Latency (us) to Latency (msec) */ + qos->ucast.in.latency = + DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency), + 1000); + qos->ucast.out.latency = + DIV_ROUND_CLOSEST(get_unaligned_le24(ev->p_latency), + 1000); + qos->ucast.in.sdu = le16_to_cpu(ev->c_mtu); + qos->ucast.out.sdu = le16_to_cpu(ev->p_mtu); + qos->ucast.in.phy = ev->c_phy; + qos->ucast.out.phy = ev->p_phy; + break; + case HCI_ROLE_MASTER: + /* Convert Transport Latency (us) to Latency (msec) */ + qos->ucast.out.latency = + DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency), + 1000); + qos->ucast.in.latency = + DIV_ROUND_CLOSEST(get_unaligned_le24(ev->p_latency), + 1000); + qos->ucast.out.sdu = le16_to_cpu(ev->c_mtu); + qos->ucast.in.sdu = le16_to_cpu(ev->p_mtu); + qos->ucast.out.phy = ev->c_phy; + qos->ucast.in.phy = ev->p_phy; + break; } if (!ev->status) { From 915057ae79692d47f9fb3504785855be49abaea4 Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Wed, 28 Jun 2023 13:32:20 +0100 Subject: [PATCH 28/70] sfc: support for devlink port requires MAE access On systems without MAE permission efx->mae is not initialised, and trying to lookup an mport results in a NULL pointer dereference. Fixes: 25414b2a64ae ("sfc: add devlink port support for ef100") Signed-off-by: Martin Habets Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx_devlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c index b82dad50a5b1..3cd750820fdd 100644 --- a/drivers/net/ethernet/sfc/efx_devlink.c +++ b/drivers/net/ethernet/sfc/efx_devlink.c @@ -626,6 +626,9 @@ static struct devlink_port *ef100_set_devlink_port(struct efx_nic *efx, u32 idx) u32 id; int rc; + if (!efx->mae) + return NULL; + if (efx_mae_lookup_mport(efx, idx, &id)) { /* This should not happen. */ if (idx == MAE_MPORT_DESC_VF_IDX_NULL) From 48538ccb825b05544ec308a509e2cc9c013402db Mon Sep 17 00:00:00 2001 From: Nick Child Date: Wed, 28 Jun 2023 13:22:44 -0500 Subject: [PATCH 29/70] ibmvnic: Do not reset dql stats on NON_FATAL err All ibmvnic resets, make a call to netdev_tx_reset_queue() when re-opening the device. netdev_tx_reset_queue() resets the num_queued and num_completed byte counters. These stats are used in Byte Queue Limit (BQL) algorithms. The difference between these two stats tracks the number of bytes currently sitting on the physical NIC. ibmvnic increases the number of queued bytes though calls to netdev_tx_sent_queue() in the drivers xmit function. When, VIOS reports that it is done transmitting bytes, the ibmvnic device increases the number of completed bytes through calls to netdev_tx_completed_queue(). It is important to note that the driver batches its transmit calls and num_queued is increased every time that an skb is added to the next batch, not necessarily when the batch is sent to VIOS for transmission. Unlike other reset types, a NON FATAL reset will not flush the sub crq tx buffers. Therefore, it is possible for the batched skb array to be partially full. So if there is call to netdev_tx_reset_queue() when re-opening the device, the value of num_queued (0) would not account for the skb's that are currently batched. Eventually, when the batch is sent to VIOS, the call to netdev_tx_completed_queue() would increase num_completed to a value greater than the num_queued. This causes a BUG_ON crash: ibmvnic 30000002: Firmware reports error, cause: adapter problem. Starting recovery... ibmvnic 30000002: tx error 600 ibmvnic 30000002: tx error 600 ibmvnic 30000002: tx error 600 ibmvnic 30000002: tx error 600 ------------[ cut here ]------------ kernel BUG at lib/dynamic_queue_limits.c:27! Oops: Exception in kernel mode, sig: 5 [....] NIP dql_completed+0x28/0x1c0 LR ibmvnic_complete_tx.isra.0+0x23c/0x420 [ibmvnic] Call Trace: ibmvnic_complete_tx.isra.0+0x3f8/0x420 [ibmvnic] (unreliable) ibmvnic_interrupt_tx+0x40/0x70 [ibmvnic] __handle_irq_event_percpu+0x98/0x270 ---[ end trace ]--- Therefore, do not reset the dql stats when performing a NON_FATAL reset. Fixes: 0d973388185d ("ibmvnic: Introduce xmit_more support using batched subCRQ hcalls") Signed-off-by: Nick Child Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c63d3ec9d328..763d613adbcc 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1816,7 +1816,14 @@ static int __ibmvnic_open(struct net_device *netdev) if (prev_state == VNIC_CLOSED) enable_irq(adapter->tx_scrq[i]->irq); enable_scrq_irq(adapter, adapter->tx_scrq[i]); - netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); + /* netdev_tx_reset_queue will reset dql stats. During NON_FATAL + * resets, don't reset the stats because there could be batched + * skb's waiting to be sent. If we reset dql stats, we risk + * num_completed being greater than num_queued. This will cause + * a BUG_ON in dql_completed(). + */ + if (adapter->reset_reason != VNIC_RESET_NON_FATAL) + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); } rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); From 3cf62c8177adb0db9e15c8b898c44f997acf3ebf Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Wed, 28 Jun 2023 21:43:27 +0200 Subject: [PATCH 30/70] net: dsa: vsc73xx: fix MTU configuration Switch in MAXLEN register stores the maximum size of a data frame. The MTU size is 18 bytes smaller than the frame size. The current settings are causing problems with packet forwarding. This patch fixes the MTU settings to proper values. Fixes: fb77ffc6ec86 ("net: dsa: vsc73xx: make the MTU configurable") Reviewed-by: Linus Walleij Signed-off-by: Pawel Dembicki Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230628194327.1765644-1-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/vitesse-vsc73xx-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index ae55167ce0a6..ef1a4a7c47b2 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -1025,17 +1025,17 @@ static int vsc73xx_change_mtu(struct dsa_switch *ds, int port, int new_mtu) struct vsc73xx *vsc = ds->priv; return vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port, - VSC73XX_MAXLEN, new_mtu); + VSC73XX_MAXLEN, new_mtu + ETH_HLEN + ETH_FCS_LEN); } /* According to application not "VSC7398 Jumbo Frames" setting - * up the MTU to 9.6 KB does not affect the performance on standard + * up the frame size to 9.6 KB does not affect the performance on standard * frames. It is clear from the application note that * "9.6 kilobytes" == 9600 bytes. */ static int vsc73xx_get_max_mtu(struct dsa_switch *ds, int port) { - return 9600; + return 9600 - ETH_HLEN - ETH_FCS_LEN; } static const struct dsa_switch_ops vsc73xx_ds_ops = { From 08fc75735fda3be97194bfbf3c899c87abb3d0fe Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 30 Jun 2023 09:26:47 +0800 Subject: [PATCH 31/70] mlxsw: minimal: fix potential memory leak in mlxsw_m_linecards_init The line cards array is not freed in the error path of mlxsw_m_linecards_init(), which can lead to a memory leak. Fix by freeing the array in the error path, thereby making the error path identical to mlxsw_m_linecards_fini(). Fixes: 01328e23a476 ("mlxsw: minimal: Extend module to port mapping with slot index") Signed-off-by: Zhengchao Shao Reviewed-by: Petr Machata Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20230630012647.1078002-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/minimal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 6b56eadd736e..6b98c3287b49 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -417,6 +417,7 @@ static int mlxsw_m_linecards_init(struct mlxsw_m *mlxsw_m) err_kmalloc_array: for (i--; i >= 0; i--) kfree(mlxsw_m->line_cards[i]); + kfree(mlxsw_m->line_cards); err_kcalloc: kfree(mlxsw_m->ports); return err; From 1b0fce8c8e69485e49a7d34aac3d4c2a2aa15d62 Mon Sep 17 00:00:00 2001 From: Davide Tronchin Date: Thu, 29 Jun 2023 12:37:36 +0200 Subject: [PATCH 32/70] net: usb: cdc_ether: add u-blox 0x1313 composition. Add CDC-ECM support for LARA-R6 01B. The new LARA-R6 product variant identified by the "01B" string can be configured (by AT interface) in three different USB modes: * Default mode (Vendor ID: 0x1546 Product ID: 0x1311) with 4 serial interfaces * RmNet mode (Vendor ID: 0x1546 Product ID: 0x1312) with 4 serial interfaces and 1 RmNet virtual network interface * CDC-ECM mode (Vendor ID: 0x1546 Product ID: 0x1313) with 4 serial interface and 1 CDC-ECM virtual network interface The first 4 interfaces of all the 3 configurations (default, RmNet, ECM) are the same. In CDC-ECM mode LARA-R6 01B exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parset/alternative functions If 4: CDC-ECM interface Signed-off-by: Davide Tronchin Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 80849d115e5d..c00a89b24df9 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -875,6 +875,12 @@ static const struct usb_device_id products[] = { USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, +}, { + /* U-blox LARA-R6 01B */ + USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1313, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, }, { /* U-blox LARA-L6 */ USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1343, USB_CLASS_COMM, From d5dc39459bdafd18ff1e93e1a86eb3e814ff9f94 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 30 Jun 2023 09:00:25 -0700 Subject: [PATCH 33/70] docs: netdev: broaden mailbot to all MAINTAINERS Reword slightly now that all MAINTAINERS have access to the commands. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/process/maintainer-netdev.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 83614cec9328..2397b31c0198 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -149,8 +149,11 @@ themselves. No email should ever be sent to the list with the main purpose of communicating with the bot, the bot commands should be seen as metadata. The use of the bot is restricted to authors of the patches (the ``From:`` -header on patch submission and command must match!), maintainers themselves -and a handful of senior reviewers. Bot records its activity here: +header on patch submission and command must match!), maintainers of +the modified code according to the MAINTAINERS file (again, ``From:`` +must match the MAINTAINERS entry) and a handful of senior reviewers. + +Bot records its activity here: https://patchwork.hopto.org/pw-bot.html From 26b32974ad2e82811706fd19c33c4ad6b9953663 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 30 Jun 2023 10:26:17 -0600 Subject: [PATCH 34/70] docs: networking: Update codeaurora references for rmnet source.codeaurora.org is no longer accessible and so the reference link in the documentation is not useful. Use iproute2 instead as it has a rmnet module for configuration. Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../networking/device_drivers/cellular/qualcomm/rmnet.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst index 4118384cf8eb..289c146a8291 100644 --- a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst +++ b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst @@ -190,8 +190,7 @@ MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad... 3. Userspace configuration ========================== -rmnet userspace configuration is done through netlink library librmnetctl -and command line utility rmnetcli. Utility is hosted in codeaurora forum git. -The driver uses rtnl_link_ops for communication. +rmnet userspace configuration is done through netlink using iproute2 +https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/ -https://source.codeaurora.org/quic/la/platform/vendor/qcom-opensource/dataservices/tree/rmnetctl +The driver uses rtnl_link_ops for communication. From c97d3fb9e0e716b77d5c0d3051d46e2f41dc6fe4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Jun 2023 22:47:53 +0100 Subject: [PATCH 35/70] nvme-tcp: Fix comma-related oops Fix a comma that should be a semicolon. The comma is at the end of an if-body and thus makes the statement after (a bvec_set_page()) conditional too, resulting in an oops because we didn't fill out the bio_vec[]: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page ... Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp] RIP: 0010:skb_splice_from_iter+0xf1/0x370 ... Call Trace: tcp_sendmsg_locked+0x3a6/0xdd0 tcp_sendmsg+0x31/0x50 inet_sendmsg+0x47/0x80 sock_sendmsg+0x99/0xb0 nvme_tcp_try_send_data+0x149/0x490 [nvme_tcp] nvme_tcp_try_send+0x1b7/0x300 [nvme_tcp] nvme_tcp_io_work+0x40/0xc0 [nvme_tcp] process_one_work+0x21c/0x430 worker_thread+0x54/0x3e0 kthread+0xf8/0x130 Fixes: 7769887817c3 ("nvme-tcp: Use sendmsg(MSG_SPLICE_PAGES) rather then sendpage") Reported-by: Aurelien Aptel Link: https://lore.kernel.org/r/253mt0il43o.fsf@mtr-vdi-124.i-did-not-set--mail-host-address--so-tickle-me/ Signed-off-by: David Howells cc: Sagi Grimberg cc: Willem de Bruijn cc: Keith Busch cc: Jens Axboe cc: Christoph Hellwig cc: Chaitanya Kulkarni cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: Jens Axboe cc: Jens Axboe cc: Matthew Wilcox cc: linux-nvme@lists.infradead.org cc: netdev@vger.kernel.org Reviewed-by: Chaitanya Kulkarni Signed-off-by: David S. Miller --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 3e7dd6f91832..9ce417cd32a7 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1014,7 +1014,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) msg.msg_flags |= MSG_MORE; if (!sendpage_ok(page)) - msg.msg_flags &= ~MSG_SPLICE_PAGES, + msg.msg_flags &= ~MSG_SPLICE_PAGES; bvec_set_page(&bvec, page, len, offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); From 4c5a331cacda995e995a7857f0e44e8937d98d2c Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 30 Jun 2023 11:58:42 +0530 Subject: [PATCH 36/70] octeontx2-af: cn10kb: fix interrupt csr addresses The current design is that, for asynchronous events like link_up and link_down firmware raises the interrupt to kernel. The previous patch which added RPM_USX driver has a bug where it uses old csr addresses for configuring interrupts. Which is resulting in losing interrupts from source firmware. This patch fixes the issue by correcting csr addresses. Fixes: b9d0fedc6234 ("octeontx2-af: cn10kb: Add RPM_USX MAC support") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 2 +- drivers/net/ethernet/marvell/octeontx2/af/rpm.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index de0d88dd10d6..a433f92c51ea 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -47,7 +47,7 @@ static struct mac_ops rpm2_mac_ops = { .int_set_reg = RPM2_CMRX_SW_INT_ENA_W1S, .irq_offset = 1, .int_ena_bit = BIT_ULL(0), - .lmac_fwi = RPM_LMAC_FWI, + .lmac_fwi = RPM2_LMAC_FWI, .non_contiguous_serdes_lane = true, .rx_stats_cnt = 43, .tx_stats_cnt = 34, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index 22147b4c2137..be294eebab26 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -94,7 +94,8 @@ /* CN10KB CSR Declaration */ #define RPM2_CMRX_SW_INT 0x1b0 -#define RPM2_CMRX_SW_INT_ENA_W1S 0x1b8 +#define RPM2_CMRX_SW_INT_ENA_W1S 0x1c8 +#define RPM2_LMAC_FWI 0x12 #define RPM2_CMR_CHAN_MSK_OR 0x3120 #define RPM2_CMR_RX_OVR_BP_EN BIT_ULL(2) #define RPM2_CMR_RX_OVR_BP_BP BIT_ULL(1) From 2e7bc57b976bb016c6569a54d95c1b8d88f9450a Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 30 Jun 2023 11:58:43 +0530 Subject: [PATCH 37/70] octeontx2-af: Fix mapping for NIX block from CGX connection Firmware configures NIX block mapping for all MAC blocks. The current implementation reads the configuration and creates the mapping between RVU PF and NIX blocks. But this configuration is only valid for silicons that support multiple blocks. For all other silicons, all MAC blocks map to NIX0. This patch corrects the mapping by adding a check for the same. Fixes: c5a73b632b90 ("octeontx2-af: Map NIX block from CGX connection") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 11 +++++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index b5a7ee63508c..d4b8d4546de2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -23,6 +23,7 @@ #define PCI_DEVID_OCTEONTX2_LBK 0xA061 /* Subsystem Device ID */ +#define PCI_SUBSYS_DEVID_98XX 0xB100 #define PCI_SUBSYS_DEVID_96XX 0xB200 #define PCI_SUBSYS_DEVID_CN10K_A 0xB900 #define PCI_SUBSYS_DEVID_CNF10K_B 0xBC00 @@ -686,6 +687,16 @@ static inline u16 rvu_nix_chan_cpt(struct rvu *rvu, u8 chan) return rvu->hw->cpt_chan_base + chan; } +static inline bool is_rvu_supports_nix1(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_98XX) + return true; + + return false; +} + /* Function Prototypes * RVU */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 83b342fa8d75..48611e603228 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -114,7 +114,7 @@ static void rvu_map_cgx_nix_block(struct rvu *rvu, int pf, p2x = cgx_lmac_get_p2x(cgx_id, lmac_id); /* Firmware sets P2X_SELECT as either NIX0 or NIX1 */ pfvf->nix_blkaddr = BLKADDR_NIX0; - if (p2x == CMR_P2X_SEL_NIX1) + if (is_rvu_supports_nix1(rvu) && p2x == CMR_P2X_SEL_NIX1) pfvf->nix_blkaddr = BLKADDR_NIX1; } From 79ebb53772c95d3a6ae51b3c65f9985fdd430df6 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 30 Jun 2023 11:58:44 +0530 Subject: [PATCH 38/70] octeontx2-af: Add validation before accessing cgx and lmac with the addition of new MAC blocks like CN10K RPM and CN10KB RPM_USX, LMACs are noncontiguous and CGX blocks are also noncontiguous. But during RVU driver initialization, the driver is assuming they are contiguous and trying to access cgx or lmac with their id which is resulting in kernel panic. This patch fixes the issue by adding proper checks. [ 23.219150] pc : cgx_lmac_read+0x38/0x70 [ 23.219154] lr : rvu_program_channels+0x3f0/0x498 [ 23.223852] sp : ffff000100d6fc80 [ 23.227158] x29: ffff000100d6fc80 x28: ffff00010009f880 x27: 000000000000005a [ 23.234288] x26: ffff000102586768 x25: 0000000000002500 x24: fffffffffff0f000 Fixes: 91c6945ea1f9 ("octeontx2-af: cn10k: Add RPM MAC support") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index bd77152bb8d7..f4bdca662d61 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -169,6 +169,9 @@ void cgx_lmac_write(int cgx_id, int lmac_id, u64 offset, u64 val) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + /* Software must not access disabled LMAC registers */ + if (!is_lmac_valid(cgx_dev, lmac_id)) + return; cgx_write(cgx_dev, lmac_id, offset, val); } @@ -176,6 +179,10 @@ u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + /* Software must not access disabled LMAC registers */ + if (!is_lmac_valid(cgx_dev, lmac_id)) + return 0; + return cgx_read(cgx_dev, lmac_id, offset); } From 2e3e94c2f5dc98a8a0e93850407064bc5389c306 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 30 Jun 2023 11:58:45 +0530 Subject: [PATCH 39/70] octeontx2-af: Reset MAC features in FLR AF driver configures MAC features like internal loopback and PFC upon receiving the request from PF and its VF netdev. But these features are not getting reset in FLR. This patch fixes the issue by resetting the same. Fixes: 23999b30ae67 ("octeontx2-af: Enable or disable CGX internal loopback") Fixes: 1121f6b02e7a ("octeontx2-af: Priority flow control configuration support") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/cgx.c | 26 ++++++++++++++-- .../net/ethernet/marvell/octeontx2/af/cgx.h | 2 ++ .../marvell/octeontx2/af/lmac_common.h | 3 ++ .../net/ethernet/marvell/octeontx2/af/rpm.c | 30 +++++++++++++++++-- .../net/ethernet/marvell/octeontx2/af/rpm.h | 2 ++ .../net/ethernet/marvell/octeontx2/af/rvu.c | 1 + .../net/ethernet/marvell/octeontx2/af/rvu.h | 1 + .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 18 +++++++++++ 8 files changed, 77 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index f4bdca662d61..592037f4e55b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -537,14 +537,15 @@ static u32 cgx_get_lmac_fifo_len(void *cgxd, int lmac_id) int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable) { struct cgx *cgx = cgxd; - u8 lmac_type; + struct lmac *lmac; u64 cfg; if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; - lmac_type = cgx->mac_ops->get_lmac_type(cgx, lmac_id); - if (lmac_type == LMAC_MODE_SGMII || lmac_type == LMAC_MODE_QSGMII) { + lmac = lmac_pdata(lmac_id, cgx); + if (lmac->lmac_type == LMAC_MODE_SGMII || + lmac->lmac_type == LMAC_MODE_QSGMII) { cfg = cgx_read(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL); if (enable) cfg |= CGXX_GMP_PCS_MRX_CTL_LBK; @@ -1563,6 +1564,23 @@ int cgx_lmac_linkup_start(void *cgxd) return 0; } +int cgx_lmac_reset(void *cgxd, int lmac_id, u8 pf_req_flr) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + /* Resetting PFC related CSRs */ + cfg = 0xff; + cgx_write(cgxd, lmac_id, CGXX_CMRX_RX_LOGL_XON, cfg); + + if (pf_req_flr) + cgx_lmac_internal_loopback(cgxd, lmac_id, false); + return 0; +} + static int cgx_configure_interrupt(struct cgx *cgx, struct lmac *lmac, int cnt, bool req_free) { @@ -1682,6 +1700,7 @@ static int cgx_lmac_init(struct cgx *cgx) cgx->lmac_idmap[lmac->lmac_id] = lmac; set_bit(lmac->lmac_id, &cgx->lmac_bmap); cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); + lmac->lmac_type = cgx->mac_ops->get_lmac_type(cgx, lmac->lmac_id); } return cgx_lmac_verify_fwi_version(cgx); @@ -1778,6 +1797,7 @@ static struct mac_ops cgx_mac_ops = { .mac_tx_enable = cgx_lmac_tx_enable, .pfc_config = cgx_lmac_pfc_config, .mac_get_pfc_frm_cfg = cgx_lmac_get_pfc_frm_cfg, + .mac_reset = cgx_lmac_reset, }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 5a20d93004c7..574114179688 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -35,6 +35,7 @@ #define CGXX_CMRX_INT_ENA_W1S 0x058 #define CGXX_CMRX_RX_ID_MAP 0x060 #define CGXX_CMRX_RX_STAT0 0x070 +#define CGXX_CMRX_RX_LOGL_XON 0x100 #define CGXX_CMRX_RX_LMACS 0x128 #define CGXX_CMRX_RX_DMAC_CTL0 (0x1F8 + mac_ops->csr_offset) #define CGX_DMAC_CTL0_CAM_ENABLE BIT_ULL(3) @@ -181,4 +182,5 @@ int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause, u8 *rx_pause); int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, int pfvf_idx); +int cgx_lmac_reset(void *cgxd, int lmac_id, u8 pf_req_flr); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index 39aaf0e4467d..0b4cba03f2e8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -24,6 +24,7 @@ * @cgx: parent cgx port * @mcast_filters_count: Number of multicast filters installed * @lmac_id: lmac port id + * @lmac_type: lmac type like SGMII/XAUI * @cmd_pend: flag set before new command is started * flag cleared after command response is received * @name: lmac port name @@ -43,6 +44,7 @@ struct lmac { struct cgx *cgx; u8 mcast_filters_count; u8 lmac_id; + u8 lmac_type; bool cmd_pend; char *name; }; @@ -125,6 +127,7 @@ struct mac_ops { int (*mac_get_pfc_frm_cfg)(void *cgxd, int lmac_id, u8 *tx_pause, u8 *rx_pause); + int (*mac_reset)(void *cgxd, int lmac_id, u8 pf_req_flr); /* FEC stats */ int (*get_fec_stats)(void *cgxd, int lmac_id, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index a433f92c51ea..b4fcb20c3f4f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -37,6 +37,7 @@ static struct mac_ops rpm_mac_ops = { .mac_tx_enable = rpm_lmac_tx_enable, .pfc_config = rpm_lmac_pfc_config, .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, + .mac_reset = rpm_lmac_reset, }; static struct mac_ops rpm2_mac_ops = { @@ -68,6 +69,7 @@ static struct mac_ops rpm2_mac_ops = { .mac_tx_enable = rpm_lmac_tx_enable, .pfc_config = rpm_lmac_pfc_config, .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, + .mac_reset = rpm_lmac_reset, }; bool is_dev_rpm2(void *rpmd) @@ -537,14 +539,15 @@ u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id) int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable) { rpm_t *rpm = rpmd; - u8 lmac_type; + struct lmac *lmac; u64 cfg; if (!is_lmac_valid(rpm, lmac_id)) return -ENODEV; - lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id); - if (lmac_type == LMAC_MODE_QSGMII || lmac_type == LMAC_MODE_SGMII) { + lmac = lmac_pdata(lmac_id, rpm); + if (lmac->lmac_type == LMAC_MODE_QSGMII || + lmac->lmac_type == LMAC_MODE_SGMII) { dev_err(&rpm->pdev->dev, "loopback not supported for LPC mode\n"); return 0; } @@ -713,3 +716,24 @@ int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp) return 0; } + +int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr) +{ + u64 rx_logl_xon, cfg; + rpm_t *rpm = rpmd; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + /* Resetting PFC related CSRs */ + rx_logl_xon = is_dev_rpm2(rpm) ? RPM2_CMRX_RX_LOGL_XON : + RPMX_CMRX_RX_LOGL_XON; + cfg = 0xff; + + rpm_write(rpm, lmac_id, rx_logl_xon, cfg); + + if (pf_req_flr) + rpm_lmac_internal_loopback(rpm, lmac_id, false); + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index be294eebab26..b79cfbc6f877 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -74,6 +74,7 @@ #define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA 0x80A8 #define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA 0x8108 #define RPM_DEFAULT_PAUSE_TIME 0x7FF +#define RPMX_CMRX_RX_LOGL_XON 0x4100 #define RPMX_MTI_MAC100X_XIF_MODE 0x8100 #define RPMX_ONESTEP_ENABLE BIT_ULL(5) @@ -132,4 +133,5 @@ int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, int rpm2_get_nr_lmacs(void *rpmd); bool is_dev_rpm2(void *rpmd); int rpm_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); +int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 0069e60afa3b..8dbc35c481f6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2629,6 +2629,7 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc) * Since LF is detached use LF number as -1. */ rvu_npc_free_mcam_entries(rvu, pcifunc, -1); + rvu_mac_reset(rvu, pcifunc); mutex_unlock(&rvu->flr_lock); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index d4b8d4546de2..e8e65fd7888d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -895,6 +895,7 @@ int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable); int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause, u16 pfc_en); int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause); +void rvu_mac_reset(struct rvu *rvu, u16 pcifunc); u32 rvu_cgx_get_lmac_fifolen(struct rvu *rvu, int cgx, int lmac); int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 48611e603228..4b8559ac0404 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -1250,3 +1250,21 @@ int rvu_mbox_handler_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause); return err; } + +void rvu_mac_reset(struct rvu *rvu, u16 pcifunc) +{ + int pf = rvu_get_pf(pcifunc); + struct mac_ops *mac_ops; + struct cgx *cgxd; + u8 cgx, lmac; + + if (!is_pf_cgxmapped(rvu, pf)) + return; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx, &lmac); + cgxd = rvu_cgx_pdata(cgx, rvu); + mac_ops = get_mac_ops(cgxd); + + if (mac_ops->mac_reset(cgxd, lmac, !is_vf(pcifunc))) + dev_err(rvu->dev, "Failed to reset MAC\n"); +} From 6ca3c005d0604e8d2b439366e3923ea58db99641 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jun 2023 19:41:18 +0300 Subject: [PATCH 40/70] net: bridge: keep ports without IFF_UNICAST_FLT in BR_PROMISC mode According to the synchronization rules for .ndo_get_stats() as seen in Documentation/networking/netdevices.rst, acquiring a plain spin_lock() should not be illegal, but the bridge driver implementation makes it so. After running these commands, I am being faced with the following lockdep splat: $ ip link add link swp0 name macsec0 type macsec encrypt on && ip link set swp0 up $ ip link add dev br0 type bridge vlan_filtering 1 && ip link set br0 up $ ip link set macsec0 master br0 && ip link set macsec0 up ======================================================== WARNING: possible irq lock inversion dependency detected 6.4.0-04295-g31b577b4bd4a #603 Not tainted -------------------------------------------------------- swapper/1/0 just changed the state of lock: ffff6bd348724cd8 (&br->lock){+.-.}-{3:3}, at: br_forward_delay_timer_expired+0x34/0x198 but this lock took another, SOFTIRQ-unsafe lock in the past: (&ocelot->stats_lock){+.+.}-{3:3} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Chain exists of: &br->lock --> &br->hash_lock --> &ocelot->stats_lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ocelot->stats_lock); local_irq_disable(); lock(&br->lock); lock(&br->hash_lock); lock(&br->lock); *** DEADLOCK *** (details about the 3 locks skipped) swp0 is instantiated by drivers/net/dsa/ocelot/felix.c, and this only matters to the extent that its .ndo_get_stats64() method calls spin_lock(&ocelot->stats_lock). Documentation/locking/lockdep-design.rst says: | A lock is irq-safe means it was ever used in an irq context, while a lock | is irq-unsafe means it was ever acquired with irq enabled. (...) | Furthermore, the following usage based lock dependencies are not allowed | between any two lock-classes:: | | -> | -> Lockdep marks br->hash_lock as softirq-safe, because it is sometimes taken in softirq context (for example br_fdb_update() which runs in NET_RX softirq), and when it's not in softirq context it blocks softirqs by using spin_lock_bh(). Lockdep marks ocelot->stats_lock as softirq-unsafe, because it never blocks softirqs from running, and it is never taken from softirq context. So it can always be interrupted by softirqs. There is a call path through which a function that holds br->hash_lock: fdb_add_hw_addr() will call a function that acquires ocelot->stats_lock: ocelot_port_get_stats64(). This can be seen below: ocelot_port_get_stats64+0x3c/0x1e0 felix_get_stats64+0x20/0x38 dsa_slave_get_stats64+0x3c/0x60 dev_get_stats+0x74/0x2c8 rtnl_fill_stats+0x4c/0x150 rtnl_fill_ifinfo+0x5cc/0x7b8 rtmsg_ifinfo_build_skb+0xe4/0x150 rtmsg_ifinfo+0x5c/0xb0 __dev_notify_flags+0x58/0x200 __dev_set_promiscuity+0xa0/0x1f8 dev_set_promiscuity+0x30/0x70 macsec_dev_change_rx_flags+0x68/0x88 __dev_set_promiscuity+0x1a8/0x1f8 __dev_set_rx_mode+0x74/0xa8 dev_uc_add+0x74/0xa0 fdb_add_hw_addr+0x68/0xd8 fdb_add_local+0xc4/0x110 br_fdb_add_local+0x54/0x88 br_add_if+0x338/0x4a0 br_add_slave+0x20/0x38 do_setlink+0x3a4/0xcb8 rtnl_newlink+0x758/0x9d0 rtnetlink_rcv_msg+0x2f0/0x550 netlink_rcv_skb+0x128/0x148 rtnetlink_rcv+0x24/0x38 the plain English explanation for it is: The macsec0 bridge port is created without p->flags & BR_PROMISC, because it is what br_manage_promisc() decides for a VLAN filtering bridge with a single auto port. As part of the br_add_if() procedure, br_fdb_add_local() is called for the MAC address of the device, and this results in a call to dev_uc_add() for macsec0 while the softirq-safe br->hash_lock is taken. Because macsec0 does not have IFF_UNICAST_FLT, dev_uc_add() ends up calling __dev_set_promiscuity() for macsec0, which is propagated by its implementation, macsec_dev_change_rx_flags(), to the lower device: swp0. This triggers the call path: dev_set_promiscuity(swp0) -> rtmsg_ifinfo() -> dev_get_stats() -> ocelot_port_get_stats64() with a calling context that lockdep doesn't like (br->hash_lock held). Normally we don't see this, because even though many drivers that can be bridge ports don't support IFF_UNICAST_FLT, we need a driver that (a) doesn't support IFF_UNICAST_FLT, *and* (b) it forwards the IFF_PROMISC flag to another driver, and (c) *that* driver implements ndo_get_stats64() using a softirq-unsafe spinlock. Condition (b) is necessary because the first __dev_set_rx_mode() calls __dev_set_promiscuity() with "bool notify=false", and thus, the rtmsg_ifinfo() code path won't be entered. The same criteria also hold true for DSA switches which don't report IFF_UNICAST_FLT. When the DSA master uses a spin_lock() in its ndo_get_stats64() method, the same lockdep splat can be seen. I think the deadlock possibility is real, even though I didn't reproduce it, and I'm thinking of the following situation to support that claim: fdb_add_hw_addr() runs on a CPU A, in a context with softirqs locally disabled and br->hash_lock held, and may end up attempting to acquire ocelot->stats_lock. In parallel, ocelot->stats_lock is currently held by a thread B (say, ocelot_check_stats_work()), which is interrupted while holding it by a softirq which attempts to lock br->hash_lock. Thread B cannot make progress because br->hash_lock is held by A. Whereas thread A cannot make progress because ocelot->stats_lock is held by B. When taking the issue at face value, the bridge can avoid that problem by simply making the ports promiscuous from a code path with a saner calling context (br->hash_lock not held). A bridge port without IFF_UNICAST_FLT is going to become promiscuous as soon as we call dev_uc_add() on it (which we do unconditionally), so why not be preemptive and make it promiscuous right from the beginning, so as to not be taken by surprise. With this, we've broken the links between code that holds br->hash_lock or br->lock and code that calls into the ndo_change_rx_flags() or ndo_get_stats64() ops of the bridge port. Fixes: 2796d0c648c9 ("bridge: Automatically manage port promiscuous mode.") Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/bridge/br_if.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 3f04b40f6056..2450690f98cf 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -166,8 +166,9 @@ void br_manage_promisc(struct net_bridge *br) * This lets us disable promiscuous mode and write * this config to hw. */ - if (br->auto_cnt == 0 || - (br->auto_cnt == 1 && br_auto_port(p))) + if ((p->dev->priv_flags & IFF_UNICAST_FLT) && + (br->auto_cnt == 0 || + (br->auto_cnt == 1 && br_auto_port(p)))) br_port_clear_promisc(p); else br_port_set_promisc(p); From a398b9ea0c3b791b7a0f4c6029a62cf628f97f22 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 1 Jul 2023 01:20:10 +0300 Subject: [PATCH 41/70] net: dsa: tag_sja1105: fix source port decoding in vlan_filtering=0 bridge mode There was a regression introduced by the blamed commit, where pinging to a VLAN-unaware bridge would fail with the repeated message "Couldn't decode source port" coming from the tagging protocol driver. When receiving packets with a bridge_vid as determined by dsa_tag_8021q_bridge_join(), dsa_8021q_rcv() will decode: - source_port = 0 (which isn't really valid, more like "don't know") - switch_id = 0 (which isn't really valid, more like "don't know") - vbid = value in range 1-7 Since the blamed patch has reversed the order of the checks, we are now going to believe that source_port != -1 and switch_id != -1, so they're valid, but they aren't. The minimal solution to the problem is to only populate source_port and switch_id with what dsa_8021q_rcv() came up with, if the vbid is zero, i.e. the source port information is trustworthy. Fixes: c1ae02d87689 ("net: dsa: tag_sja1105: always prefer source port information from INCL_SRCPT") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/dsa/tag_sja1105.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 92a626a05e82..db0a6ac67470 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -573,11 +573,14 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, * if available. This allows us to not overwrite a valid source * port and switch ID with zeroes when receiving link-local * frames from a VLAN-unaware bridged port (non-zero vbid) or a - * VLAN-aware bridged port (non-zero vid). + * VLAN-aware bridged port (non-zero vid). Furthermore, the + * tag_8021q source port information is only of trust when the + * vbid is 0 (precise port). Otherwise, tmp_source_port and + * tmp_switch_id will be zeroes. */ - if (source_port == -1) + if (vbid == 0 && source_port == -1) source_port = tmp_source_port; - if (switch_id == -1) + if (vbid == 0 && switch_id == -1) switch_id = tmp_switch_id; } else if (source_port == -1 && switch_id == -1) { /* Packets with no source information have no chance of From f88fcb1d7d961b4b402d675109726f94db87571c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 1 Jul 2023 02:48:24 +0000 Subject: [PATCH 42/70] net: fix net_dev_start_xmit trace event vs skb_transport_offset() After blamed commit, we must be more careful about using skb_transport_offset(), as reminded us by syzbot: WARNING: CPU: 0 PID: 10 at include/linux/skbuff.h:2868 skb_transport_offset include/linux/skbuff.h:2977 [inline] WARNING: CPU: 0 PID: 10 at include/linux/skbuff.h:2868 perf_trace_net_dev_start_xmit+0x89a/0xce0 include/trace/events/net.h:14 Modules linked in: CPU: 0 PID: 10 Comm: kworker/u4:1 Not tainted 6.1.30-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023 Workqueue: bat_events batadv_iv_send_outstanding_bat_ogm_packet RIP: 0010:skb_transport_header include/linux/skbuff.h:2868 [inline] RIP: 0010:skb_transport_offset include/linux/skbuff.h:2977 [inline] RIP: 0010:perf_trace_net_dev_start_xmit+0x89a/0xce0 include/trace/events/net.h:14 Code: 8b 04 25 28 00 00 00 48 3b 84 24 c0 00 00 00 0f 85 4e 04 00 00 48 8d 65 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc e8 56 22 01 fd <0f> 0b e9 f6 fc ff ff 89 f9 80 e1 07 80 c1 03 38 c1 0f 8c 86 f9 ff RSP: 0018:ffffc900002bf700 EFLAGS: 00010293 RAX: ffffffff8485d8ca RBX: 000000000000ffff RCX: ffff888100914280 RDX: 0000000000000000 RSI: 000000000000ffff RDI: 000000000000ffff RBP: ffffc900002bf818 R08: ffffffff8485d5b6 R09: fffffbfff0f8fb5e R10: 0000000000000000 R11: dffffc0000000001 R12: 1ffff110217d8f67 R13: ffff88810bec7b3a R14: dffffc0000000000 R15: dffffc0000000000 FS: 0000000000000000(0000) GS:ffff8881f6a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f96cf6d52f0 CR3: 000000012224c000 CR4: 0000000000350ef0 Call Trace: [] trace_net_dev_start_xmit include/trace/events/net.h:14 [inline] [] xmit_one net/core/dev.c:3643 [inline] [] dev_hard_start_xmit+0x705/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3030 [inline] [] batadv_send_skb_packet+0x3f3/0x680 net/batman-adv/send.c:108 [] batadv_send_broadcast_skb+0x24/0x30 net/batman-adv/send.c:127 [] batadv_iv_ogm_send_to_if net/batman-adv/bat_iv_ogm.c:393 [inline] [] batadv_iv_ogm_emit net/batman-adv/bat_iv_ogm.c:421 [inline] [] batadv_iv_send_outstanding_bat_ogm_packet+0x69a/0x840 net/batman-adv/bat_iv_ogm.c:1701 [] process_one_work+0x8ac/0x1170 kernel/workqueue.c:2289 [] worker_thread+0xaa8/0x12d0 kernel/workqueue.c:2436 Fixes: 66e4c8d95008 ("net: warn if transport header was not set") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/trace/events/net.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/net.h b/include/trace/events/net.h index da611a7aaf97..f667c76a3b02 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -51,7 +51,8 @@ TRACE_EVENT(net_dev_start_xmit, __entry->network_offset = skb_network_offset(skb); __entry->transport_offset_valid = skb_transport_header_was_set(skb); - __entry->transport_offset = skb_transport_offset(skb); + __entry->transport_offset = skb_transport_header_was_set(skb) ? + skb_transport_offset(skb) : 0; __entry->tx_flags = skb_shinfo(skb)->tx_flags; __entry->gso_size = skb_shinfo(skb)->gso_size; __entry->gso_segs = skb_shinfo(skb)->gso_segs; From f56d1eeaeabf3aa613157b26aa07215fcd2c5719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20D=C3=ADaz?= Date: Fri, 30 Jun 2023 22:41:03 -0600 Subject: [PATCH 43/70] selftests/net: Add xt_policy config for xfrm_policy test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running Kselftests with the current selftests/net/config the following problem can be seen with the net:xfrm_policy.sh selftest: # selftests: net: xfrm_policy.sh [ 41.076721] IPv6: ADDRCONF(NETDEV_CHANGE): veth0: link becomes ready [ 41.094787] IPv6: ADDRCONF(NETDEV_CHANGE): veth0: link becomes ready [ 41.107635] IPv6: ADDRCONF(NETDEV_CHANGE): veth0: link becomes ready # modprobe: FATAL: Module ip_tables not found in directory /lib/modules/6.1.36 # iptables v1.8.7 (legacy): can't initialize iptables table `filter': Table does not exist (do you need to insmod?) # Perhaps iptables or your kernel needs to be upgraded. # modprobe: FATAL: Module ip_tables not found in directory /lib/modules/6.1.36 # iptables v1.8.7 (legacy): can't initialize iptables table `filter': Table does not exist (do you need to insmod?) # Perhaps iptables or your kernel needs to be upgraded. # SKIP: Could not insert iptables rule ok 1 selftests: net: xfrm_policy.sh # SKIP This is because IPsec "policy" match support is not available to the kernel. This patch adds CONFIG_NETFILTER_XT_MATCH_POLICY as a module to the selftests/net/config file, so that `make kselftest-merge` can take this into consideration. Signed-off-by: Daniel Díaz Signed-off-by: David S. Miller --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index d1d421ec10a3..cd3cc52c59b4 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -50,3 +50,4 @@ CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m CONFIG_VXLAN=m CONFIG_IP_SCTP=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m From a27ac5390922059867f645eefd978e533d7af902 Mon Sep 17 00:00:00 2001 From: "J.J. Martzki" Date: Sat, 1 Jul 2023 22:37:37 +0800 Subject: [PATCH 44/70] samples: pktgen: fix append mode failed issue Each sample script sources functions.sh before parameters.sh which makes $APPEND undefined when trapping EXIT no matter in append mode or not. Due to this when sample scripts finished they always do "pgctrl reset" which resets pktgen config. So move trap to each script after sourcing parameters.sh and trap EXIT explicitly. Signed-off-by: J.J. Martzki Signed-off-by: David S. Miller --- samples/pktgen/functions.sh | 13 +++++++------ .../pktgen/pktgen_bench_xmit_mode_netif_receive.sh | 4 ++++ samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh | 4 ++++ samples/pktgen/pktgen_sample01_simple.sh | 4 ++++ samples/pktgen/pktgen_sample02_multiqueue.sh | 3 +++ samples/pktgen/pktgen_sample03_burst_single_flow.sh | 4 ++++ samples/pktgen/pktgen_sample04_many_flows.sh | 4 ++++ samples/pktgen/pktgen_sample05_flow_per_thread.sh | 4 ++++ ...ktgen_sample06_numa_awared_queue_irq_affinity.sh | 3 +++ 9 files changed, 37 insertions(+), 6 deletions(-) diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh index dd4e53ae9b73..c08cefb8eb1f 100644 --- a/samples/pktgen/functions.sh +++ b/samples/pktgen/functions.sh @@ -108,12 +108,13 @@ function pgset() { fi } -if [[ -z "$APPEND" ]]; then - if [[ $EUID -eq 0 ]]; then - # Cleanup pktgen setup on exit if thats not "append mode" - trap 'pg_ctrl "reset"' EXIT - fi -fi +function trap_exit() +{ + # Cleanup pktgen setup on exit if thats not "append mode" + if [[ -z "$APPEND" ]] && [[ $EUID -eq 0 ]]; then + trap 'pg_ctrl "reset"' EXIT + fi +} ## -- General shell tricks -- diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh index 99ec0688b044..b4328db4a164 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh @@ -33,6 +33,10 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + # Using invalid DST_MAC will cause the packets to get dropped in # ip_rcv() which is part of the test if [ -z "$DEST_IP" ]; then diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh index 04b0dd0c36d6..f2beb512c5cd 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh @@ -14,6 +14,10 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" fi diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh index 09a92ea963f9..cdb9f497f87d 100755 --- a/samples/pktgen/pktgen_sample01_simple.sh +++ b/samples/pktgen/pktgen_sample01_simple.sh @@ -13,6 +13,10 @@ root_check_run_with_sudo "$@" # - go look in parameters.sh to see which setting are avail # - required param is the interface "-i" stored in $DEV source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + # # Set some default params, if they didn't get set if [ -z "$DEST_IP" ]; then diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh index 7fa41c84c32f..93f33d7d0a81 100755 --- a/samples/pktgen/pktgen_sample02_multiqueue.sh +++ b/samples/pktgen/pktgen_sample02_multiqueue.sh @@ -14,6 +14,9 @@ root_check_run_with_sudo "$@" # Required param: -i dev in $DEV source ${basedir}/parameters.sh +# Trap EXIT first +trap_exit + [ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely # Base Config diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh index 8bf2fdffba16..8f8ed1ac46a0 100755 --- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh +++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh @@ -25,6 +25,10 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + # Set some default params, if they didn't get set if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh index cff51f861506..65ed486ce4f1 100755 --- a/samples/pktgen/pktgen_sample04_many_flows.sh +++ b/samples/pktgen/pktgen_sample04_many_flows.sh @@ -12,6 +12,10 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + # Set some default params, if they didn't get set if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh index 3578d0aa4ac5..bcbc386b2284 100755 --- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh +++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh @@ -16,6 +16,10 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh + +# Trap EXIT first +trap_exit + # Set some default params, if they didn't get set if [ -z "$DEST_IP" ]; then [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh index 264cc5db9c49..0c5409cb5bab 100755 --- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh +++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh @@ -14,6 +14,9 @@ root_check_run_with_sudo "$@" # Required param: -i dev in $DEV source ${basedir}/parameters.sh +# Trap EXIT first +trap_exit + # Base Config [ -z "$COUNT" ] && COUNT="20000000" # Zero means indefinitely [ -z "$CLONE_SKB" ] && CLONE_SKB="0" From 7387943fa35516f6f8017a3b0e9ce48a3bef9faa Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 3 Jul 2023 03:27:04 +0200 Subject: [PATCH 45/70] wireguard: queueing: use saner cpu selection wrapping Using `% nr_cpumask_bits` is slow and complicated, and not totally robust toward dynamic changes to CPU topologies. Rather than storing the next CPU in the round-robin, just store the last one, and also return that value. This simplifies the loop drastically into a much more common pattern. Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Cc: stable@vger.kernel.org Reported-by: Linus Torvalds Tested-by: Manuel Leiner Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/queueing.c | 1 + drivers/net/wireguard/queueing.h | 25 +++++++++++-------------- drivers/net/wireguard/receive.c | 2 +- drivers/net/wireguard/send.c | 2 +- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c index 8084e7408c0a..26d235d15235 100644 --- a/drivers/net/wireguard/queueing.c +++ b/drivers/net/wireguard/queueing.c @@ -28,6 +28,7 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, int ret; memset(queue, 0, sizeof(*queue)); + queue->last_cpu = -1; ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); if (ret) return ret; diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index 125284b346a7..1ea4f874e367 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -117,20 +117,17 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) return cpu; } -/* This function is racy, in the sense that next is unlocked, so it could return - * the same CPU twice. A race-free version of this would be to instead store an - * atomic sequence number, do an increment-and-return, and then iterate through - * every possible CPU until we get to that index -- choose_cpu. However that's - * a bit slower, and it doesn't seem like this potential race actually - * introduces any performance loss, so we live with it. +/* This function is racy, in the sense that it's called while last_cpu is + * unlocked, so it could return the same CPU twice. Adding locking or using + * atomic sequence numbers is slower though, and the consequences of racing are + * harmless, so live with it. */ -static inline int wg_cpumask_next_online(int *next) +static inline int wg_cpumask_next_online(int *last_cpu) { - int cpu = *next; - - while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) - cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; - *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + int cpu = cpumask_next(*last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + *last_cpu = cpu; return cpu; } @@ -159,7 +156,7 @@ static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue) static inline int wg_queue_enqueue_per_device_and_peer( struct crypt_queue *device_queue, struct prev_queue *peer_queue, - struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) + struct sk_buff *skb, struct workqueue_struct *wq) { int cpu; @@ -173,7 +170,7 @@ static inline int wg_queue_enqueue_per_device_and_peer( /* Then we queue it up in the device queue, which consumes the * packet as soon as it can. */ - cpu = wg_cpumask_next_online(next_cpu); + cpu = wg_cpumask_next_online(&device_queue->last_cpu); if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) return -EPIPE; queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 7135d51d2d87..0b3f0c843550 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -524,7 +524,7 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) goto err; ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb, - wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu); + wg->packet_crypt_wq); if (unlikely(ret == -EPIPE)) wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD); if (likely(!ret || ret == -EPIPE)) { diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index 5368f7c35b4b..95c853b59e1d 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -318,7 +318,7 @@ static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first) goto err; ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first, - wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu); + wg->packet_crypt_wq); if (unlikely(ret == -EPIPE)) wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD); err: From f58d0a9b4c6a7a5199c3af967e43cc8b654604d4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 3 Jul 2023 03:27:05 +0200 Subject: [PATCH 46/70] wireguard: netlink: send staged packets when setting initial private key Packets bound for peers can queue up prior to the device private key being set. For example, if persistent keepalive is set, a packet is queued up to be sent as soon as the device comes up. However, if the private key hasn't been set yet, the handshake message never sends, and no timer is armed to retry, since that would be pointless. But, if a user later sets a private key, the expectation is that those queued packets, such as a persistent keepalive, are actually sent. So adjust the configuration logic to account for this edge case, and add a test case to make sure this works. Maxim noticed this with a wg-quick(8) config to the tune of: [Interface] PostUp = wg set %i private-key somefile [Peer] PublicKey = ... Endpoint = ... PersistentKeepalive = 25 Here, the private key gets set after the device comes up using a PostUp script, triggering the bug. Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Cc: stable@vger.kernel.org Reported-by: Maxim Cournoyer Tested-by: Maxim Cournoyer Link: https://lore.kernel.org/wireguard/87fs7xtqrv.fsf@gmail.com/ Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/netlink.c | 14 ++++++---- tools/testing/selftests/wireguard/netns.sh | 30 +++++++++++++++++++--- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index 43c8c84e7ea8..6d1bd9f52d02 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -546,6 +546,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]); u8 public_key[NOISE_PUBLIC_KEY_LEN]; struct wg_peer *peer, *temp; + bool send_staged_packets; if (!crypto_memneq(wg->static_identity.static_private, private_key, NOISE_PUBLIC_KEY_LEN)) @@ -564,14 +565,17 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) } down_write(&wg->static_identity.lock); - wg_noise_set_static_identity_private_key(&wg->static_identity, - private_key); - list_for_each_entry_safe(peer, temp, &wg->peer_list, - peer_list) { + send_staged_packets = !wg->static_identity.has_identity && netif_running(wg->dev); + wg_noise_set_static_identity_private_key(&wg->static_identity, private_key); + send_staged_packets = send_staged_packets && wg->static_identity.has_identity; + + wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); + list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { wg_noise_precompute_static_static(peer); wg_noise_expire_current_peer_keypairs(peer); + if (send_staged_packets) + wg_packet_send_staged_packets(peer); } - wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); up_write(&wg->static_identity.lock); } skip_set_private_key: diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 69c7796c7ca9..405ff262ca93 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -514,10 +514,32 @@ n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' n1 ping -W 1 -c 1 192.168.241.2 [[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] -ip1 link del veth1 -ip1 link del veth3 -ip1 link del wg0 -ip2 link del wg0 +ip1 link del dev veth3 +ip1 link del dev wg0 +ip2 link del dev wg0 + +# Make sure persistent keep alives are sent when an adapter comes up +ip1 link add dev wg0 type wireguard +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1 +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -eq 0 ]] +ip1 link set dev wg0 up +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -gt 0 ]] +ip1 link del dev wg0 +# This should also happen even if the private key is set later +ip1 link add dev wg0 type wireguard +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1 +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -eq 0 ]] +ip1 link set dev wg0 up +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -eq 0 ]] +n1 wg set wg0 private-key <(echo "$key1") +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -gt 0 ]] +ip1 link del dev veth1 +ip1 link del dev wg0 # We test that Netlink/IPC is working properly by doing things that usually cause split responses ip0 link add dev wg0 type wireguard From 326534e837c731496bdf0e02a8a61e987eb3bed0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 3 Jul 2023 03:27:06 +0200 Subject: [PATCH 47/70] wireguard: timers: move to using timer_delete_sync The documentation says that del_timer_sync is obsolete, and code should use the equivalent timer_delete_sync instead, so switch to it. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/timers.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c index 53d8a57a0dfa..968bdb4df0b3 100644 --- a/drivers/net/wireguard/timers.c +++ b/drivers/net/wireguard/timers.c @@ -234,10 +234,10 @@ void wg_timers_init(struct wg_peer *peer) void wg_timers_stop(struct wg_peer *peer) { - del_timer_sync(&peer->timer_retransmit_handshake); - del_timer_sync(&peer->timer_send_keepalive); - del_timer_sync(&peer->timer_new_handshake); - del_timer_sync(&peer->timer_zero_key_material); - del_timer_sync(&peer->timer_persistent_keepalive); + timer_delete_sync(&peer->timer_retransmit_handshake); + timer_delete_sync(&peer->timer_send_keepalive); + timer_delete_sync(&peer->timer_new_handshake); + timer_delete_sync(&peer->timer_zero_key_material); + timer_delete_sync(&peer->timer_persistent_keepalive); flush_work(&peer->clear_peer_work); } From 998127cdb4699b9d470a9348ffe9f1154346be5f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Jun 2023 16:41:50 +0000 Subject: [PATCH 48/70] tcp: annotate data races in __tcp_oow_rate_limited() request sockets are lockless, __tcp_oow_rate_limited() could be called on the same object from different cpus. This is harmless. Add READ_ONCE()/WRITE_ONCE() annotations to avoid a KCSAN report. Fixes: 4ce7e93cb3fe ("tcp: rate limit ACK sent by SYN_RECV request sockets") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6f072095211e..57c8af1859c1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3590,8 +3590,11 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, u32 *last_oow_ack_time) { - if (*last_oow_ack_time) { - s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); + /* Paired with the WRITE_ONCE() in this function. */ + u32 val = READ_ONCE(*last_oow_ack_time); + + if (val) { + s32 elapsed = (s32)(tcp_jiffies32 - val); if (0 <= elapsed && elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { @@ -3600,7 +3603,10 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, } } - *last_oow_ack_time = tcp_jiffies32; + /* Paired with the prior READ_ONCE() and with itself, + * as we might be lockless. + */ + WRITE_ONCE(*last_oow_ack_time, tcp_jiffies32); return false; /* not rate-limited: go ahead, send dupack now! */ } From acd9755894c96c27078b52e0bfd894e48b0b1508 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 30 Jun 2023 01:20:20 -0600 Subject: [PATCH 49/70] Documentation: ABI: sysfs-class-net-qmi: pass_through contact update Switch to the quicinc.com id. Fixes: bd1af6b5fffd ("Documentation: ABI: sysfs-class-net-qmi: document pass-through file") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net-qmi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-class-net-qmi b/Documentation/ABI/testing/sysfs-class-net-qmi index 47e6b9732337..b028f5bc86db 100644 --- a/Documentation/ABI/testing/sysfs-class-net-qmi +++ b/Documentation/ABI/testing/sysfs-class-net-qmi @@ -62,7 +62,7 @@ Description: What: /sys/class/net//qmi/pass_through Date: January 2021 KernelVersion: 5.12 -Contact: Subash Abhinov Kasiviswanathan +Contact: Subash Abhinov Kasiviswanathan Description: Boolean. Default: 'N' From 3de4d22cc9ac7c9f38e10edcf54f9a8891a9c2aa Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 1 Jul 2023 17:14:47 +0000 Subject: [PATCH 50/70] bpf, btf: Warn but return no error for NULL btf from __register_btf_kfunc_id_set() __register_btf_kfunc_id_set() assumes .BTF to be part of the module's .ko file if CONFIG_DEBUG_INFO_BTF is enabled. If that's not the case, the function prints an error message and return an error. As a result, such modules cannot be loaded. However, the section could be stripped out during a build process. It would be better to let the modules loaded, because their basic functionalities have no problem [0], though the BTF functionalities will not be supported. Make the function to lower the level of the message from error to warn, and return no error. [0] https://lore.kernel.org/bpf/20220219082037.ow2kbq5brktf4f2u@apollo.legion Fixes: c446fdacb10d ("bpf: fix register_btf_kfunc_id_set for !CONFIG_DEBUG_INFO_BTF") Reported-by: Alexander Egorenkov Suggested-by: Kumar Kartikeya Dwivedi Signed-off-by: SeongJae Park Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/87y228q66f.fsf@oc8242746057.ibm.com Link: https://lore.kernel.org/bpf/20220219082037.ow2kbq5brktf4f2u@apollo.legion Link: https://lore.kernel.org/bpf/20230701171447.56464-1-sj@kernel.org --- kernel/bpf/btf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 29fe21099298..817204d53372 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7891,10 +7891,8 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, pr_err("missing vmlinux BTF, cannot register kfuncs\n"); return -ENOENT; } - if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) { - pr_err("missing module BTF, cannot register kfuncs\n"); - return -ENOENT; - } + if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + pr_warn("missing module BTF, cannot register kfuncs\n"); return 0; } if (IS_ERR(btf)) From 2c5d234d7f55e4ba7f3ee00fb9452ac7c97b4a46 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 27 Jun 2023 16:21:39 -0700 Subject: [PATCH 51/70] ptp: Make max_phase_adjustment sysfs device attribute invisible when not supported The .adjphase operation is an operation that is implemented only by certain PHCs. The sysfs device attribute node for querying the maximum phase adjustment supported should not be exposed on devices that do not support .adjphase. Fixes: c3b60ab7a4df ("ptp: Add .getmaxphase callback to ptp_clock_info") Signed-off-by: Rahul Rameshbabu Reported-by: Nathan Chancellor Reported-by: Naresh Kamboju Reported-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/netdev/20230627162146.GA114473@dev-arch.thelio-3990X/ Link: https://lore.kernel.org/all/CA+G9fYtKCZeAUTtwe69iK8Xcz1mOKQzwcy49wd+imZrfj6ifXA@mail.gmail.com/ Tested-by: Nathan Chancellor Reviewed-by: Andrew Lunn Acked-by: Richard Cochran Reviewed-by: Petr Vorel Message-ID: <20230627232139.213130-1-rrameshbabu@nvidia.com> Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 77219cdcd683..6e4d5456a885 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -358,6 +358,9 @@ static umode_t ptp_is_attribute_visible(struct kobject *kobj, attr == &dev_attr_max_vclocks.attr) { if (ptp->is_virtual_clock) mode = 0; + } else if (attr == &dev_attr_max_phase_adjustment.attr) { + if (!info->adjphase || !info->getmaxphase) + mode = 0; } return mode; From f7306acec9aae9893d15e745c8791124d42ab10a Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 3 Jul 2023 19:53:29 +0200 Subject: [PATCH 52/70] xsk: Honor SO_BINDTODEVICE on bind Initial creation of an AF_XDP socket requires CAP_NET_RAW capability. A privileged process might create the socket and pass it to a non-privileged process for later use. However, that process will be able to bind the socket to any network interface. Even though it will not be able to receive any traffic without modification of the BPF map, the situation is not ideal. Sockets already have a mechanism that can be used to restrict what interface they can be attached to. That is SO_BINDTODEVICE. To change the SO_BINDTODEVICE binding the process will need CAP_NET_RAW. Make xsk_bind() honor the SO_BINDTODEVICE in order to allow safer workflow when non-privileged process is using AF_XDP. The intended workflow is following: 1. First process creates a bare socket with socket(AF_XDP, ...). 2. First process loads the XSK program to the interface. 3. First process adds the socket fd to a BPF map. 4. First process ties socket fd to a particular interface using SO_BINDTODEVICE. 5. First process sends socket fd to a second process. 6. Second process allocates UMEM. 7. Second process binds socket to the interface with bind(...). 8. Second process sends/receives the traffic. All the steps above are possible today if the first process is privileged and the second one has sufficient RLIMIT_MEMLOCK and no capabilities. However, the second process will be able to bind the socket to any interface it wants on step 7 and send traffic from it. With the proposed change, the second process will be able to bind the socket only to a specific interface chosen by the first process at step 4. Fixes: 965a99098443 ("xsk: add support for bind for Rx") Signed-off-by: Ilya Maximets Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Acked-by: John Fastabend Acked-by: Jason Wang Link: https://lore.kernel.org/bpf/20230703175329.3259672-1-i.maximets@ovn.org --- Documentation/networking/af_xdp.rst | 9 +++++++++ net/xdp/xsk.c | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index 247c6c4127e9..1cc35de336a4 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -433,6 +433,15 @@ start N bytes into the buffer leaving the first N bytes for the application to use. The final option is the flags field, but it will be dealt with in separate sections for each UMEM flag. +SO_BINDTODEVICE setsockopt +-------------------------- + +This is a generic SOL_SOCKET option that can be used to tie AF_XDP +socket to a particular network interface. It is useful when a socket +is created by a privileged process and passed to a non-privileged one. +Once the option is set, kernel will refuse attempts to bind that socket +to a different interface. Updating the value requires CAP_NET_RAW. + XDP_STATISTICS getsockopt ------------------------- diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5a8c0dd250af..31dca4ecb2c5 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -886,6 +886,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); struct net_device *dev; + int bound_dev_if; u32 flags, qid; int err = 0; @@ -899,6 +900,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) XDP_USE_NEED_WAKEUP)) return -EINVAL; + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && bound_dev_if != sxdp->sxdp_ifindex) + return -EINVAL; + rtnl_lock(); mutex_lock(&xs->mutex); if (xs->state != XSK_READY) { From 30c45b5361d39b4b793780ffac5538090b9e2eb1 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Mon, 3 Jul 2023 19:08:42 +0800 Subject: [PATCH 53/70] net/sched: act_pedit: Add size check for TCA_PEDIT_PARMS_EX The attribute TCA_PEDIT_PARMS_EX is not be included in pedit_policy and one malicious user could fake a TCA_PEDIT_PARMS_EX whose length is smaller than the intended sizeof(struct tc_pedit). Hence, the dereference in tcf_pedit_init() could access dirty heap data. static int tcf_pedit_init(...) { // ... pattr = tb[TCA_PEDIT_PARMS]; // TCA_PEDIT_PARMS is included if (!pattr) pattr = tb[TCA_PEDIT_PARMS_EX]; // but this is not // ... parm = nla_data(pattr); index = parm->index; // parm is able to be smaller than 4 bytes // and this dereference gets dirty skb_buff // data created in netlink_sendmsg } This commit adds TCA_PEDIT_PARMS_EX length in pedit_policy which avoid the above case, just like the TCA_PEDIT_PARMS. Fixes: 71d0ed7079df ("net/act_pedit: Support using offset relative to the conventional network headers") Signed-off-by: Lin Ma Reviewed-by: Pedro Tammela Link: https://lore.kernel.org/r/20230703110842.590282-1-linma@zju.edu.cn Signed-off-by: Paolo Abeni --- net/sched/act_pedit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b562fc2bb5b1..1ef8fcfa9997 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -29,6 +29,7 @@ static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, + [TCA_PEDIT_PARMS_EX] = { .len = sizeof(struct tc_pedit) }, [TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED }, }; From 90a8007bbeb616e3ea57e2696190e57aa0329531 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 3 Jul 2023 18:24:52 +0300 Subject: [PATCH 54/70] mlxsw: spectrum_router: Fix an IS_ERR() vs NULL check The mlxsw_sp_crif_alloc() function returns NULL on error. It doesn't return error pointers. Fix the check. Fixes: 78126cfd5dc9 ("mlxsw: spectrum_router: Maintain CRIF for fallback loopback RIF") Signed-off-by: Dan Carpenter Reviewed-by: Alexander Lobakin Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 445ba7fe3c40..b32adf277a22 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -10794,8 +10794,8 @@ static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, int err; router->lb_crif = mlxsw_sp_crif_alloc(NULL); - if (IS_ERR(router->lb_crif)) - return PTR_ERR(router->lb_crif); + if (!router->lb_crif) + return -ENOMEM; /* Create a generic loopback RIF associated with the main table * (default VRF). Any table can be used, but the main table exists From 84bef5b6037c15180ef88ac4216dc621d16df1a6 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 3 Jul 2023 19:14:46 +0200 Subject: [PATCH 55/70] pptp: Fix fib lookup calls. PPTP uses pppox sockets (struct pppox_sock). These sockets don't embed an inet_sock structure, so it's invalid to call inet_sk() on them. Therefore, the ip_route_output_ports() call in pptp_connect() has two problems: * The tos variable is set with RT_CONN_FLAGS(sk), which calls inet_sk() on the pppox socket. * ip_route_output_ports() tries to retrieve routing flags using inet_sk_flowi_flags(), which is also going to call inet_sk() on the pppox socket. While PPTP doesn't use inet sockets, it's actually really layered on top of IP and therefore needs a proper way to do fib lookups. So let's define pptp_route_output() to get a struct rtable from a pptp socket. Let's also replace the ip_route_output_ports() call of pptp_xmit() for consistency. In practice, this means that: * pptp_connect() sets ->flowi4_tos and ->flowi4_flags to zero instead of using bits of unrelated struct pppox_sock fields. * pptp_xmit() now respects ->sk_mark and ->sk_uid. * pptp_xmit() now calls the security_sk_classify_flow() security hook, thus allowing to set ->flowic_secid. * pptp_xmit() now passes the pppox socket to xfrm_lookup_route(). Found by code inspection. Fixes: 00959ade36ac ("PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol)") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/pptp.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 0fe78826c8fa..32183f24e63f 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -128,6 +129,23 @@ static void del_chan(struct pppox_sock *sock) spin_unlock(&chan_lock); } +static struct rtable *pptp_route_output(struct pppox_sock *po, + struct flowi4 *fl4) +{ + struct sock *sk = &po->sk; + struct net *net; + + net = sock_net(sk); + flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 0, + RT_SCOPE_UNIVERSE, IPPROTO_GRE, 0, + po->proto.pptp.dst_addr.sin_addr.s_addr, + po->proto.pptp.src_addr.sin_addr.s_addr, + 0, 0, sock_net_uid(net, sk)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); + + return ip_route_output_flow(net, fl4, sk); +} + static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk = (struct sock *) chan->private; @@ -151,11 +169,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (sk_pppox(po)->sk_state & PPPOX_DEAD) goto tx_error; - rt = ip_route_output_ports(net, &fl4, NULL, - opt->dst_addr.sin_addr.s_addr, - opt->src_addr.sin_addr.s_addr, - 0, 0, IPPROTO_GRE, - RT_TOS(0), sk->sk_bound_dev_if); + rt = pptp_route_output(po, &fl4); if (IS_ERR(rt)) goto tx_error; @@ -438,12 +452,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.private = sk; po->chan.ops = &pptp_chan_ops; - rt = ip_route_output_ports(sock_net(sk), &fl4, sk, - opt->dst_addr.sin_addr.s_addr, - opt->src_addr.sin_addr.s_addr, - 0, 0, - IPPROTO_GRE, RT_CONN_FLAGS(sk), - sk->sk_bound_dev_if); + rt = pptp_route_output(po, &fl4); if (IS_ERR(rt)) { error = -EHOSTUNREACH; goto end; From ba7bdec3cbec7b0135f7ec0458073cbe9ae74de5 Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Mon, 3 Jul 2023 17:58:40 +0000 Subject: [PATCH 56/70] net: Replace strlcpy with strscpy strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). No return values were used, so direct replacement is safe. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Reviewed-by: Simon Horman Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/trace/events/fib.h | 2 +- include/trace/events/fib6.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 76297ecd4935..20b914250ce9 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -65,7 +65,7 @@ TRACE_EVENT(fib_table_lookup, } dev = nhc ? nhc->nhc_dev : NULL; - strlcpy(__entry->name, dev ? dev->name : "-", IFNAMSIZ); + strscpy(__entry->name, dev ? dev->name : "-", IFNAMSIZ); if (nhc) { if (nhc->nhc_gw_family == AF_INET) { diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 4d3e607b3cde..5d7ee2610728 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -63,7 +63,7 @@ TRACE_EVENT(fib6_table_lookup, } if (res->nh && res->nh->fib_nh_dev) { - strlcpy(__entry->name, res->nh->fib_nh_dev->name, IFNAMSIZ); + strscpy(__entry->name, res->nh->fib_nh_dev->name, IFNAMSIZ); } else { strcpy(__entry->name, "-"); } From 1dcf6efd5f0c1f4496b3ef7ec5a7db104a53b38c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 4 Jul 2023 01:05:44 +0300 Subject: [PATCH 57/70] net: dsa: tag_sja1105: fix MAC DA patching from meta frames The SJA1105 manual says that at offset 4 into the meta frame payload we have "MAC destination byte 2" and at offset 5 we have "MAC destination byte 1". These are counted from the LSB, so byte 1 is h_dest[ETH_HLEN-2] aka h_dest[4] and byte 2 is h_dest[ETH_HLEN-3] aka h_dest[3]. The sja1105_meta_unpack() function decodes these the other way around, so a frame with MAC DA 01:80:c2:11:22:33 is received by the network stack as having 01:80:c2:22:11:33. Fixes: e53e18a6fe4d ("net: dsa: sja1105: Receive and decode meta frames") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_sja1105.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index db0a6ac67470..ec48165673ed 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -118,8 +118,8 @@ static void sja1105_meta_unpack(const struct sk_buff *skb, * a unified unpacking command for both device series. */ packing(buf, &meta->tstamp, 31, 0, 4, UNPACK, 0); - packing(buf + 4, &meta->dmac_byte_4, 7, 0, 1, UNPACK, 0); - packing(buf + 5, &meta->dmac_byte_3, 7, 0, 1, UNPACK, 0); + packing(buf + 4, &meta->dmac_byte_3, 7, 0, 1, UNPACK, 0); + packing(buf + 5, &meta->dmac_byte_4, 7, 0, 1, UNPACK, 0); packing(buf + 6, &meta->source_port, 7, 0, 1, UNPACK, 0); packing(buf + 7, &meta->switch_id, 7, 0, 1, UNPACK, 0); } From a372d66af48506d9f7aaae2a474cd18f14d98cb8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 4 Jul 2023 01:05:45 +0300 Subject: [PATCH 58/70] net: dsa: sja1105: always enable the send_meta options incl_srcpt has the limitation, mentioned in commit b4638af8885a ("net: dsa: sja1105: always enable the INCL_SRCPT option"), that frames with a MAC DA of 01:80:c2:xx:yy:zz will be received as 01:80:c2:00:00:zz unless PTP RX timestamping is enabled. The incl_srcpt option was initially unconditionally enabled, then that changed with commit 42824463d38d ("net: dsa: sja1105: Limit use of incl_srcpt to bridge+vlan mode"), then again with b4638af8885a ("net: dsa: sja1105: always enable the INCL_SRCPT option"). Bottom line is that it now needs to be always enabled, otherwise the driver does not have a reliable source of information regarding source_port and switch_id for link-local traffic (tag_8021q VLANs may be imprecise since now they identify an entire bridging domain when ports are not standalone). If we accept that PTP RX timestamping (and therefore, meta frame generation) is always enabled in hardware, then that limitation could be avoided and packets with any MAC DA can be properly received, because meta frames do contain the original bytes from the MAC DA of their associated link-local packet. This change enables meta frame generation unconditionally, which also has the nice side effects of simplifying the switch control path (a switch reset is no longer required on hwtstamping settings change) and the tagger data path (it no longer needs to be informed whether to expect meta frames or not - it always does). Fixes: 227d07a07ef1 ("net: dsa: sja1105: Add support for traffic through standalone ports") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105.h | 2 +- drivers/net/dsa/sja1105/sja1105_main.c | 5 ++- drivers/net/dsa/sja1105/sja1105_ptp.c | 48 +++----------------------- include/linux/dsa/sja1105.h | 4 --- net/dsa/tag_sja1105.c | 45 ------------------------ 5 files changed, 7 insertions(+), 97 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index fb1549a5fe32..dee35ba924ad 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -252,6 +252,7 @@ struct sja1105_private { unsigned long ucast_egress_floods; unsigned long bcast_egress_floods; unsigned long hwts_tx_en; + unsigned long hwts_rx_en; const struct sja1105_info *info; size_t max_xfer_len; struct spi_device *spidev; @@ -289,7 +290,6 @@ struct sja1105_spi_message { /* From sja1105_main.c */ enum sja1105_reset_reason { SJA1105_VLAN_FILTERING = 0, - SJA1105_RX_HWTSTAMPING, SJA1105_AGEING_TIME, SJA1105_SCHEDULING, SJA1105_BEST_EFFORT_POLICING, diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index dd154b2b9680..3529a565b4aa 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -867,11 +867,11 @@ static int sja1105_init_general_params(struct sja1105_private *priv) .mac_fltres1 = SJA1105_LINKLOCAL_FILTER_A, .mac_flt1 = SJA1105_LINKLOCAL_FILTER_A_MASK, .incl_srcpt1 = true, - .send_meta1 = false, + .send_meta1 = true, .mac_fltres0 = SJA1105_LINKLOCAL_FILTER_B, .mac_flt0 = SJA1105_LINKLOCAL_FILTER_B_MASK, .incl_srcpt0 = true, - .send_meta0 = false, + .send_meta0 = true, /* Default to an invalid value */ .mirr_port = priv->ds->num_ports, /* No TTEthernet */ @@ -2215,7 +2215,6 @@ static int sja1105_reload_cbs(struct sja1105_private *priv) static const char * const sja1105_reset_reasons[] = { [SJA1105_VLAN_FILTERING] = "VLAN filtering", - [SJA1105_RX_HWTSTAMPING] = "RX timestamping", [SJA1105_AGEING_TIME] = "Ageing time", [SJA1105_SCHEDULING] = "Time-aware scheduling", [SJA1105_BEST_EFFORT_POLICING] = "Best-effort policing", diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index 30fb2cc40164..a7d41e781398 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -58,35 +58,10 @@ enum sja1105_ptp_clk_mode { #define ptp_data_to_sja1105(d) \ container_of((d), struct sja1105_private, ptp_data) -/* Must be called only while the RX timestamping state of the tagger - * is turned off - */ -static int sja1105_change_rxtstamping(struct sja1105_private *priv, - bool on) -{ - struct sja1105_ptp_data *ptp_data = &priv->ptp_data; - struct sja1105_general_params_entry *general_params; - struct sja1105_table *table; - - table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; - general_params = table->entries; - general_params->send_meta1 = on; - general_params->send_meta0 = on; - - ptp_cancel_worker_sync(ptp_data->clock); - skb_queue_purge(&ptp_data->skb_txtstamp_queue); - skb_queue_purge(&ptp_data->skb_rxtstamp_queue); - - return sja1105_static_config_reload(priv, SJA1105_RX_HWTSTAMPING); -} - int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr) { - struct sja1105_tagger_data *tagger_data = sja1105_tagger_data(ds); struct sja1105_private *priv = ds->priv; struct hwtstamp_config config; - bool rx_on; - int rc; if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; @@ -104,26 +79,13 @@ int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr) switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: - rx_on = false; + priv->hwts_rx_en &= ~BIT(port); break; default: - rx_on = true; + priv->hwts_rx_en |= BIT(port); break; } - if (rx_on != tagger_data->rxtstamp_get_state(ds)) { - tagger_data->rxtstamp_set_state(ds, false); - - rc = sja1105_change_rxtstamping(priv, rx_on); - if (rc < 0) { - dev_err(ds->dev, - "Failed to change RX timestamping: %d\n", rc); - return rc; - } - if (rx_on) - tagger_data->rxtstamp_set_state(ds, true); - } - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) return -EFAULT; return 0; @@ -131,7 +93,6 @@ int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr) int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr) { - struct sja1105_tagger_data *tagger_data = sja1105_tagger_data(ds); struct sja1105_private *priv = ds->priv; struct hwtstamp_config config; @@ -140,7 +101,7 @@ int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr) config.tx_type = HWTSTAMP_TX_ON; else config.tx_type = HWTSTAMP_TX_OFF; - if (tagger_data->rxtstamp_get_state(ds)) + if (priv->hwts_rx_en & BIT(port)) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; else config.rx_filter = HWTSTAMP_FILTER_NONE; @@ -413,11 +374,10 @@ static long sja1105_rxtstamp_work(struct ptp_clock_info *ptp) bool sja1105_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb) { - struct sja1105_tagger_data *tagger_data = sja1105_tagger_data(ds); struct sja1105_private *priv = ds->priv; struct sja1105_ptp_data *ptp_data = &priv->ptp_data; - if (!tagger_data->rxtstamp_get_state(ds)) + if (!(priv->hwts_rx_en & BIT(port))) return false; /* We need to read the full PTP clock to reconstruct the Rx diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index 159e43171ccc..c177322f793d 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -48,13 +48,9 @@ struct sja1105_deferred_xmit_work { /* Global tagger data */ struct sja1105_tagger_data { - /* Tagger to switch */ void (*xmit_work_fn)(struct kthread_work *work); void (*meta_tstamp_handler)(struct dsa_switch *ds, int port, u8 ts_id, enum sja1110_meta_tstamp dir, u64 tstamp); - /* Switch to tagger */ - bool (*rxtstamp_get_state)(struct dsa_switch *ds); - void (*rxtstamp_set_state)(struct dsa_switch *ds, bool on); }; struct sja1105_skb_cb { diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index ec48165673ed..ade3eeb2f3e6 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -58,11 +58,8 @@ #define SJA1110_TX_TRAILER_LEN 4 #define SJA1110_MAX_PADDING_LEN 15 -#define SJA1105_HWTS_RX_EN 0 - struct sja1105_tagger_private { struct sja1105_tagger_data data; /* Must be first */ - unsigned long state; /* Protects concurrent access to the meta state machine * from taggers running on multiple ports on SMP systems */ @@ -392,10 +389,6 @@ static struct sk_buff priv = sja1105_tagger_private(ds); - if (!test_bit(SJA1105_HWTS_RX_EN, &priv->state)) - /* Do normal processing. */ - return skb; - spin_lock(&priv->meta_lock); /* Was this a link-local frame instead of the meta * that we were expecting? @@ -431,12 +424,6 @@ static struct sk_buff priv = sja1105_tagger_private(ds); - /* Drop the meta frame if we're not in the right state - * to process it. - */ - if (!test_bit(SJA1105_HWTS_RX_EN, &priv->state)) - return NULL; - spin_lock(&priv->meta_lock); stampable_skb = priv->stampable_skb; @@ -472,30 +459,6 @@ static struct sk_buff return skb; } -static bool sja1105_rxtstamp_get_state(struct dsa_switch *ds) -{ - struct sja1105_tagger_private *priv = sja1105_tagger_private(ds); - - return test_bit(SJA1105_HWTS_RX_EN, &priv->state); -} - -static void sja1105_rxtstamp_set_state(struct dsa_switch *ds, bool on) -{ - struct sja1105_tagger_private *priv = sja1105_tagger_private(ds); - - if (on) - set_bit(SJA1105_HWTS_RX_EN, &priv->state); - else - clear_bit(SJA1105_HWTS_RX_EN, &priv->state); - - /* Initialize the meta state machine to a known state */ - if (!priv->stampable_skb) - return; - - kfree_skb(priv->stampable_skb); - priv->stampable_skb = NULL; -} - static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb) { u16 tpid = ntohs(eth_hdr(skb)->h_proto); @@ -552,9 +515,6 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, */ source_port = hdr->h_dest[3]; switch_id = hdr->h_dest[4]; - /* Clear the DMAC bytes that were mangled by the switch */ - hdr->h_dest[3] = 0; - hdr->h_dest[4] = 0; } else if (is_meta) { sja1105_meta_unpack(skb, &meta); source_port = meta.source_port; @@ -785,7 +745,6 @@ static void sja1105_disconnect(struct dsa_switch *ds) static int sja1105_connect(struct dsa_switch *ds) { - struct sja1105_tagger_data *tagger_data; struct sja1105_tagger_private *priv; struct kthread_worker *xmit_worker; int err; @@ -805,10 +764,6 @@ static int sja1105_connect(struct dsa_switch *ds) } priv->xmit_worker = xmit_worker; - /* Export functions for switch driver use */ - tagger_data = &priv->data; - tagger_data->rxtstamp_get_state = sja1105_rxtstamp_get_state; - tagger_data->rxtstamp_set_state = sja1105_rxtstamp_set_state; ds->tagger_data = priv; return 0; From 14bb236b29922c4f57d8c05bfdbcb82677f917c9 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Tue, 4 Jul 2023 09:56:53 +0530 Subject: [PATCH 59/70] octeontx-af: fix hardware timestamp configuration MAC block on CN10K (RPM) supports hardware timestamp configuration. The previous patch which added timestamp configuration support has a bug. Though the netdev driver requests to disable timestamp configuration, the driver is always enabling it. This patch fixes the same. Fixes: d1489208681d ("octeontx2-af: cn10k: RPM hardware timestamp configuration") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 4b8559ac0404..095b2cc4a699 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -763,7 +763,7 @@ static int rvu_cgx_ptp_rx_cfg(struct rvu *rvu, u16 pcifunc, bool enable) cgxd = rvu_cgx_pdata(cgx_id, rvu); mac_ops = get_mac_ops(cgxd); - mac_ops->mac_enadis_ptp_config(cgxd, lmac_id, true); + mac_ops->mac_enadis_ptp_config(cgxd, lmac_id, enable); /* If PTP is enabled then inform NPC that packets to be * parsed by this PF will have their data shifted by 8 bytes * and if PTP is disabled then no shift is required From 80de809bd35e2a8999edf9f5aaa2d8de18921f11 Mon Sep 17 00:00:00 2001 From: Thorsten Winkler Date: Tue, 4 Jul 2023 16:41:21 +0200 Subject: [PATCH 60/70] s390/qeth: Fix vipa deletion Change boolean parameter of function "qeth_l3_vipa_store" inside the "qeth_l3_dev_vipa_del4_store" function from "true" to "false" because "true" is used for adding a virtual ip address and "false" for deleting. Fixes: 2390166a6b45 ("s390/qeth: clean up L3 sysfs code") Reviewed-by: Alexandra Winter Reviewed-by: Wenjia Zhang Signed-off-by: Thorsten Winkler Signed-off-by: Alexandra Winter Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index 9f90a860ca2c..a6b64228ead2 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -625,7 +625,7 @@ static QETH_DEVICE_ATTR(vipa_add4, add4, 0644, static ssize_t qeth_l3_dev_vipa_del4_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV4); + return qeth_l3_vipa_store(dev, buf, false, count, QETH_PROT_IPV4); } static QETH_DEVICE_ATTR(vipa_del4, del4, 0200, NULL, From 3fffa15bfef48b0ad6424779c03e68ae8ace5acb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 4 Jul 2023 22:44:33 +0200 Subject: [PATCH 61/70] mptcp: ensure subflow is unhashed before cleaning the backlog While tacking care of the mptcp-level listener I unintentionally moved the subflow level unhash after the subflow listener backlog cleanup. That could cause some nasty race and makes the code harder to read. Address the issue restoring the proper order of operations. Fixes: 57fc0f1ceaa4 ("mptcp: ensure listener is unhashed before updating the sk status") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e892673deb73..489a3defdde5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2909,10 +2909,10 @@ static void mptcp_check_listen_stop(struct sock *sk) return; lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + tcp_set_state(ssk, TCP_CLOSE); mptcp_subflow_queue_clean(sk, ssk); inet_csk_listen_stop(ssk); mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); - tcp_set_state(ssk, TCP_CLOSE); release_sock(ssk); } From 0226436acf2495cde4b93e7400e5a87305c26054 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 4 Jul 2023 22:44:34 +0200 Subject: [PATCH 62/70] mptcp: do not rely on implicit state check in mptcp_listen() Since the blamed commit, closing the first subflow resets the first subflow socket state to SS_UNCONNECTED. The current mptcp listen implementation relies only on such state to prevent touching not-fully-disconnected sockets. Incoming mptcp fastclose (or paired endpoint removal) unconditionally closes the first subflow. All the above allows an incoming fastclose followed by a listen() call to successfully race with a blocking recvmsg(), potentially causing the latter to hit a divide by zero bug in cleanup_rbuf/__tcp_select_window(). Address the issue explicitly checking the msk socket state in mptcp_listen(). An alternative solution would be moving the first subflow socket state update into mptcp_disconnect(), but in the long term the first subflow socket should be removed: better avoid relaying on it for internal consistency check. Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/414 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 489a3defdde5..3613489eb6e3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3703,6 +3703,11 @@ static int mptcp_listen(struct socket *sock, int backlog) pr_debug("msk=%p", msk); lock_sock(sk); + + err = -EINVAL; + if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) + goto unlock; + ssock = __mptcp_nmpc_socket(msk); if (IS_ERR(ssock)) { err = PTR_ERR(ssock); From 221e4550454a822f9a11834e30694c7d1d65747c Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:35 +0200 Subject: [PATCH 63/70] selftests: mptcp: connect: fail if nft supposed to work In case of "external" errors when preparing the environment for the TProxy tests, the subtests were marked as skipped. This is fine but it means these errors are ignored. On MPTCP Public CI, we do want to catch such issues and mark the selftest as failed if there are such issues. We can then use mptcp_lib_fail_if_expected_feature() helper that has been recently added to fail if needed. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 5fb62e9cd3ad ("selftests: mptcp: add tproxy test case") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 13561e5bc0cd..bbae40882bfa 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -718,6 +718,7 @@ table inet mangle { EOF if [ $? -ne 0 ]; then echo "SKIP: $msg, could not load nft ruleset" + mptcp_lib_fail_if_expected_feature "nft rules" return fi @@ -733,6 +734,7 @@ EOF if [ $? -ne 0 ]; then ip netns exec "$listener_ns" nft flush ruleset echo "SKIP: $msg, ip $r6flag rule failed" + mptcp_lib_fail_if_expected_feature "ip rule" return fi @@ -741,6 +743,7 @@ EOF ip netns exec "$listener_ns" nft flush ruleset ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 echo "SKIP: $msg, ip route add local $local_addr failed" + mptcp_lib_fail_if_expected_feature "ip route" return fi From a5a5990c099dd354e05e89ee77cd2dbf6655d4a1 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:36 +0200 Subject: [PATCH 64/70] selftests: mptcp: sockopt: use 'iptables-legacy' if available IPTables commands using 'iptables-nft' fail on old kernels, at least on v5.15 because it doesn't see the default IPTables chains: $ iptables -L iptables/1.8.2 Failed to initialize nft: Protocol not supported As a first step before switching to NFTables, we can use iptables-legacy if available. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: dc65fe82fb07 ("selftests: mptcp: add packet mark test case") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- .../selftests/net/mptcp/mptcp_sockopt.sh | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index f295a371ff14..c21bfd7f0c01 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -12,6 +12,8 @@ ksft_skip=4 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" +iptables="iptables" +ip6tables="ip6tables" sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) @@ -25,7 +27,7 @@ add_mark_rules() local m=$2 local t - for t in iptables ip6tables; do + for t in ${iptables} ${ip6tables}; do # just to debug: check we have multiple subflows connection requests ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT @@ -95,14 +97,14 @@ if [ $? -ne 0 ];then exit $ksft_skip fi -iptables -V > /dev/null 2>&1 -if [ $? -ne 0 ];then +# Use the legacy version if available to support old kernel versions +if iptables-legacy -V &> /dev/null; then + iptables="iptables-legacy" + ip6tables="ip6tables-legacy" +elif ! iptables -V &> /dev/null; then echo "SKIP: Could not run all tests without iptables tool" exit $ksft_skip -fi - -ip6tables -V > /dev/null 2>&1 -if [ $? -ne 0 ];then +elif ! ip6tables -V &> /dev/null; then echo "SKIP: Could not run all tests without ip6tables tool" exit $ksft_skip fi @@ -112,10 +114,10 @@ check_mark() local ns=$1 local af=$2 - local tables=iptables + local tables=${iptables} if [ $af -eq 6 ];then - tables=ip6tables + tables=${ip6tables} fi local counters values From 9ac4c28eb70cd5ea5472a5e1c495dcdd597d4597 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:37 +0200 Subject: [PATCH 65/70] selftests: mptcp: sockopt: return error if wrong mark When an error was detected when checking the marks, a message was correctly printed mentioning the error but followed by another one saying everything was OK and the selftest was not marked as failed as expected. Now the 'ret' variable is directly set to 1 in order to make sure the exit is done with an error, similar to what is done in other functions. While at it, the error is correctly propagated to the caller. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: dc65fe82fb07 ("selftests: mptcp: add packet mark test case") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index c21bfd7f0c01..dc8d473fc82c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -128,6 +128,7 @@ check_mark() for v in $values; do if [ $v -ne 0 ]; then echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2 + ret=1 return 1 fi done @@ -227,11 +228,11 @@ do_transfer() fi if [ $local_addr = "::" ];then - check_mark $listener_ns 6 - check_mark $connector_ns 6 + check_mark $listener_ns 6 || retc=1 + check_mark $connector_ns 6 || retc=1 else - check_mark $listener_ns 4 - check_mark $connector_ns 4 + check_mark $listener_ns 4 || retc=1 + check_mark $connector_ns 4 || retc=1 fi check_transfer $cin $sout "file received by server" From d8566d0e03922217f70d9be2d401fcb860986374 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:38 +0200 Subject: [PATCH 66/70] selftests: mptcp: userspace_pm: use correct server port "server4_port" variable is not set but "app4_port" is the server port in v4 and the correct variable name to use. The port is optional so there was no visible impact. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: ca188a25d43f ("selftests: mptcp: userspace PM support for MP_PRIO signals") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 98d9e4d2d3fc..841a67a7d524 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -848,7 +848,7 @@ test_prio() local count # Send MP_PRIO signal from client to server machine - ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$server4_port" + ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$app4_port" sleep 0.5 # Check TX From 966c6c3adfb1257ea8a839cdfad2b74092cc5532 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:39 +0200 Subject: [PATCH 67/70] selftests: mptcp: userspace_pm: report errors with 'remove' tests A message was mentioning an issue with the "remove" tests but the selftest was not marked as failed. Directly exit with an error like it is done everywhere else in this selftest. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 259a834fadda ("selftests: mptcp: functional tests for the userspace PM type") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 841a67a7d524..b180133a30af 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -423,6 +423,7 @@ test_remove() stdbuf -o0 -e0 printf "[OK]\n" else stdbuf -o0 -e0 printf "[FAIL]\n" + exit 1 fi # RM_ADDR using an invalid addr id should result in no action @@ -437,6 +438,7 @@ test_remove() stdbuf -o0 -e0 printf "[OK]\n" else stdbuf -o0 -e0 printf "[FAIL]\n" + exit 1 fi # RM_ADDR from the client to server machine From 6c8880fcaa5c45355179b759c1d11737775e31fc Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:40 +0200 Subject: [PATCH 68/70] selftests: mptcp: depend on SYN_COOKIES MPTCP selftests are using TCP SYN Cookies for quite a while now, since v5.9. Some CIs don't have this config option enabled and this is causing issues in the tests: # ns1 MPTCP -> ns1 (10.0.1.1:10000 ) MPTCP (duration 167ms) sysctl: cannot stat /proc/sys/net/ipv4/tcp_syncookies: No such file or directory # [ OK ]./mptcp_connect.sh: line 554: [: -eq: unary operator expected There is no impact in the results but the test is not doing what it is supposed to do. Fixes: fed61c4b584c ("selftests: mptcp: make 2nd net namespace use tcp syn cookies unconditionally") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 6032f9b23c4c..e317c2e44dae 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -6,6 +6,7 @@ CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m +CONFIG_SYN_COOKIES=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m From 61d9658050260dbcbf9055479b7ac5bbbe1e8831 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:41 +0200 Subject: [PATCH 69/70] selftests: mptcp: pm_nl_ctl: fix 32-bit support When using pm_nl_ctl to validate userspace path-manager's behaviours, it was failing on 32-bit architectures ~half of the time. pm_nl_ctl was not reporting any error but the command was not doing what it was expected to do. As a result, the expected linked event was not triggered after and the test failed. This is due to the fact the token given in argument to the application was parsed as an integer with atoi(): in a 32-bit arch, if the number was bigger than INT_MAX, 2147483647 was used instead. This can simply be fixed by using strtoul() instead of atoi(). The errors have been seen "by chance" when manually looking at the results from LKFT. Fixes: 9a0b36509df0 ("selftests: mptcp: support MPTCP_PM_CMD_ANNOUNCE") Cc: stable@vger.kernel.org Fixes: ecd2a77d672f ("selftests: mptcp: support MPTCP_PM_CMD_REMOVE") Fixes: cf8d0a6dfd64 ("selftests: mptcp: support MPTCP_PM_CMD_SUBFLOW_CREATE") Fixes: 57cc361b8d38 ("selftests: mptcp: support MPTCP_PM_CMD_SUBFLOW_DESTROY") Fixes: ca188a25d43f ("selftests: mptcp: userspace PM support for MP_PRIO signals") Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index abddf4c63e79..1887bd61bd9a 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -425,7 +425,7 @@ int dsf(int fd, int pm_family, int argc, char *argv[]) } /* token */ - token = atoi(params[4]); + token = strtoul(params[4], NULL, 10); rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ATTR_TOKEN; rta->rta_len = RTA_LENGTH(4); @@ -551,7 +551,7 @@ int csf(int fd, int pm_family, int argc, char *argv[]) } /* token */ - token = atoi(params[4]); + token = strtoul(params[4], NULL, 10); rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ATTR_TOKEN; rta->rta_len = RTA_LENGTH(4); @@ -598,7 +598,7 @@ int remove_addr(int fd, int pm_family, int argc, char *argv[]) if (++arg >= argc) error(1, 0, " missing token value"); - token = atoi(argv[arg]); + token = strtoul(argv[arg], NULL, 10); rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ATTR_TOKEN; rta->rta_len = RTA_LENGTH(4); @@ -710,7 +710,7 @@ int announce_addr(int fd, int pm_family, int argc, char *argv[]) if (++arg >= argc) error(1, 0, " missing token value"); - token = atoi(argv[arg]); + token = strtoul(argv[arg], NULL, 10); } else error(1, 0, "unknown keyword %s", argv[arg]); } @@ -1347,7 +1347,7 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) error(1, 0, " missing token value"); /* token */ - token = atoi(argv[arg]); + token = strtoul(argv[arg], NULL, 10); } else if (!strcmp(argv[arg], "flags")) { char *tok, *str; From cc7eab25b1cf3f9594fe61142d3523ce4d14a788 Mon Sep 17 00:00:00 2001 From: Yinjun Zhang Date: Wed, 5 Jul 2023 07:28:18 +0200 Subject: [PATCH 70/70] nfp: clean mc addresses in application firmware when closing port When moving devices from one namespace to another, mc addresses are cleaned in software while not removed from application firmware. Thus the mc addresses are remained and will cause resource leak. Now use `__dev_mc_unsync` to clean mc addresses when closing port. Fixes: e20aa071cd95 ("nfp: fix schedule in atomic context when sync mc address") Cc: stable@vger.kernel.org Signed-off-by: Yinjun Zhang Acked-by: Simon Horman Signed-off-by: Louis Peens Reviewed-by: Jacob Keller Message-ID: <20230705052818.7122-1-louis.peens@corigine.com> Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 49f2f081ebb5..6b1fb5708434 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -53,6 +53,8 @@ #include "crypto/crypto.h" #include "crypto/fw.h" +static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr); + /** * nfp_net_get_fw_version() - Read and parse the FW version * @fw_ver: Output fw_version structure to read to @@ -1084,6 +1086,9 @@ static int nfp_net_netdev_close(struct net_device *netdev) /* Step 2: Tell NFP */ + if (nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER) + __dev_mc_unsync(netdev, nfp_net_mc_unsync); + nfp_net_clear_config_and_disable(nn); nfp_port_configure(netdev, false);