Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Minor conflicts in net/mptcp/protocol.h and
tools/testing/selftests/net/Makefile.

In both cases code was added on both sides in the same place
so just keep both.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2020-10-15 12:43:21 -07:00
Родитель 346e320cb2 2ecbc1f684
Коммит 2295cddf99
45 изменённых файлов: 1210 добавлений и 141 удалений

Просмотреть файл

@ -465,9 +465,9 @@ XPS Configuration
----------------- -----------------
XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by
default for SMP). The functionality remains disabled until explicitly default for SMP). If compiled in, it is driver dependent whether, and
configured. To enable XPS, the bitmap of CPUs/receive-queues that may how, XPS is configured at device init. The mapping of CPUs/receive-queues
use a transmit queue is configured using the sysfs file entry: to transmit queue can be inspected and configured using sysfs:
For selection based on CPUs map:: For selection based on CPUs map::

Просмотреть файл

@ -144,8 +144,6 @@ static int __maybe_unused m_can_runtime_suspend(struct device *dev)
struct net_device *ndev = dev_get_drvdata(dev); struct net_device *ndev = dev_get_drvdata(dev);
struct m_can_classdev *mcan_class = netdev_priv(ndev); struct m_can_classdev *mcan_class = netdev_priv(ndev);
m_can_class_suspend(dev);
clk_disable_unprepare(mcan_class->cclk); clk_disable_unprepare(mcan_class->cclk);
clk_disable_unprepare(mcan_class->hclk); clk_disable_unprepare(mcan_class->hclk);

Просмотреть файл

@ -103,14 +103,8 @@ void ksz_init_mib_timer(struct ksz_device *dev)
INIT_DELAYED_WORK(&dev->mib_read, ksz_mib_read_work); INIT_DELAYED_WORK(&dev->mib_read, ksz_mib_read_work);
/* Read MIB counters every 30 seconds to avoid overflow. */
dev->mib_read_interval = msecs_to_jiffies(30000);
for (i = 0; i < dev->mib_port_cnt; i++) for (i = 0; i < dev->mib_port_cnt; i++)
dev->dev_ops->port_init_cnt(dev, i); dev->dev_ops->port_init_cnt(dev, i);
/* Start the timer 2 seconds later. */
schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000));
} }
EXPORT_SYMBOL_GPL(ksz_init_mib_timer); EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
@ -143,7 +137,9 @@ void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
/* Read all MIB counters when the link is going down. */ /* Read all MIB counters when the link is going down. */
p->read = true; p->read = true;
schedule_delayed_work(&dev->mib_read, 0); /* timer started */
if (dev->mib_read_interval)
schedule_delayed_work(&dev->mib_read, 0);
} }
EXPORT_SYMBOL_GPL(ksz_mac_link_down); EXPORT_SYMBOL_GPL(ksz_mac_link_down);
@ -451,6 +447,12 @@ int ksz_switch_register(struct ksz_device *dev,
return ret; return ret;
} }
/* Read MIB counters every 30 seconds to avoid overflow. */
dev->mib_read_interval = msecs_to_jiffies(30000);
/* Start the MIB timer. */
schedule_delayed_work(&dev->mib_read, 0);
return 0; return 0;
} }
EXPORT_SYMBOL(ksz_switch_register); EXPORT_SYMBOL(ksz_switch_register);

Просмотреть файл

@ -60,6 +60,89 @@ static struct ch_tc_pedit_fields pedits[] = {
PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12), PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12),
}; };
static const struct cxgb4_natmode_config cxgb4_natmode_config_array[] = {
/* Default supported NAT modes */
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_NONE,
.natmode = NAT_MODE_NONE,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP,
.natmode = NAT_MODE_DIP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT,
.natmode = NAT_MODE_DIP_DP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
CXGB4_ACTION_NATMODE_SIP,
.natmode = NAT_MODE_DIP_DP_SIP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
CXGB4_ACTION_NATMODE_SPORT,
.natmode = NAT_MODE_DIP_DP_SP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_SIP | CXGB4_ACTION_NATMODE_SPORT,
.natmode = NAT_MODE_SIP_SP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
CXGB4_ACTION_NATMODE_SPORT,
.natmode = NAT_MODE_DIP_SIP_SP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
CXGB4_ACTION_NATMODE_DPORT |
CXGB4_ACTION_NATMODE_SPORT,
.natmode = NAT_MODE_ALL,
},
/* T6+ can ignore L4 ports when they're disabled. */
{
.chip = CHELSIO_T6,
.flags = CXGB4_ACTION_NATMODE_SIP,
.natmode = NAT_MODE_SIP_SP,
},
{
.chip = CHELSIO_T6,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SPORT,
.natmode = NAT_MODE_DIP_DP_SP,
},
{
.chip = CHELSIO_T6,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP,
.natmode = NAT_MODE_ALL,
},
};
static void cxgb4_action_natmode_tweak(struct ch_filter_specification *fs,
u8 natmode_flags)
{
u8 i = 0;
/* Translate the enabled NAT 4-tuple fields to one of the
* hardware supported NAT mode configurations. This ensures
* that we pick a valid combination, where the disabled fields
* do not get overwritten to 0.
*/
for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
if (cxgb4_natmode_config_array[i].flags == natmode_flags) {
fs->nat_mode = cxgb4_natmode_config_array[i].natmode;
return;
}
}
}
static struct ch_tc_flower_entry *allocate_flower_entry(void) static struct ch_tc_flower_entry *allocate_flower_entry(void)
{ {
struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL); struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
@ -289,7 +372,8 @@ static void offload_pedit(struct ch_filter_specification *fs, u32 val, u32 mask,
} }
static void process_pedit_field(struct ch_filter_specification *fs, u32 val, static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
u32 mask, u32 offset, u8 htype) u32 mask, u32 offset, u8 htype,
u8 *natmode_flags)
{ {
switch (htype) { switch (htype) {
case FLOW_ACT_MANGLE_HDR_TYPE_ETH: case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
@ -314,67 +398,102 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
switch (offset) { switch (offset) {
case PEDIT_IP4_SRC: case PEDIT_IP4_SRC:
offload_pedit(fs, val, mask, IP4_SRC); offload_pedit(fs, val, mask, IP4_SRC);
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break; break;
case PEDIT_IP4_DST: case PEDIT_IP4_DST:
offload_pedit(fs, val, mask, IP4_DST); offload_pedit(fs, val, mask, IP4_DST);
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
} }
fs->nat_mode = NAT_MODE_ALL;
break; break;
case FLOW_ACT_MANGLE_HDR_TYPE_IP6: case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
switch (offset) { switch (offset) {
case PEDIT_IP6_SRC_31_0: case PEDIT_IP6_SRC_31_0:
offload_pedit(fs, val, mask, IP6_SRC_31_0); offload_pedit(fs, val, mask, IP6_SRC_31_0);
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break; break;
case PEDIT_IP6_SRC_63_32: case PEDIT_IP6_SRC_63_32:
offload_pedit(fs, val, mask, IP6_SRC_63_32); offload_pedit(fs, val, mask, IP6_SRC_63_32);
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break; break;
case PEDIT_IP6_SRC_95_64: case PEDIT_IP6_SRC_95_64:
offload_pedit(fs, val, mask, IP6_SRC_95_64); offload_pedit(fs, val, mask, IP6_SRC_95_64);
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break; break;
case PEDIT_IP6_SRC_127_96: case PEDIT_IP6_SRC_127_96:
offload_pedit(fs, val, mask, IP6_SRC_127_96); offload_pedit(fs, val, mask, IP6_SRC_127_96);
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break; break;
case PEDIT_IP6_DST_31_0: case PEDIT_IP6_DST_31_0:
offload_pedit(fs, val, mask, IP6_DST_31_0); offload_pedit(fs, val, mask, IP6_DST_31_0);
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break; break;
case PEDIT_IP6_DST_63_32: case PEDIT_IP6_DST_63_32:
offload_pedit(fs, val, mask, IP6_DST_63_32); offload_pedit(fs, val, mask, IP6_DST_63_32);
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break; break;
case PEDIT_IP6_DST_95_64: case PEDIT_IP6_DST_95_64:
offload_pedit(fs, val, mask, IP6_DST_95_64); offload_pedit(fs, val, mask, IP6_DST_95_64);
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break; break;
case PEDIT_IP6_DST_127_96: case PEDIT_IP6_DST_127_96:
offload_pedit(fs, val, mask, IP6_DST_127_96); offload_pedit(fs, val, mask, IP6_DST_127_96);
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
} }
fs->nat_mode = NAT_MODE_ALL;
break; break;
case FLOW_ACT_MANGLE_HDR_TYPE_TCP: case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
switch (offset) { switch (offset) {
case PEDIT_TCP_SPORT_DPORT: case PEDIT_TCP_SPORT_DPORT:
if (~mask & PEDIT_TCP_UDP_SPORT_MASK) if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
fs->nat_fport = val; fs->nat_fport = val;
else *natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
} else {
fs->nat_lport = val >> 16; fs->nat_lport = val >> 16;
*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
}
} }
fs->nat_mode = NAT_MODE_ALL;
break; break;
case FLOW_ACT_MANGLE_HDR_TYPE_UDP: case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
switch (offset) { switch (offset) {
case PEDIT_UDP_SPORT_DPORT: case PEDIT_UDP_SPORT_DPORT:
if (~mask & PEDIT_TCP_UDP_SPORT_MASK) if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
fs->nat_fport = val; fs->nat_fport = val;
else *natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
} else {
fs->nat_lport = val >> 16; fs->nat_lport = val >> 16;
*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
}
} }
fs->nat_mode = NAT_MODE_ALL; break;
} }
} }
static int cxgb4_action_natmode_validate(struct adapter *adap, u8 natmode_flags,
struct netlink_ext_ack *extack)
{
u8 i = 0;
/* Extract the NAT mode to enable based on what 4-tuple fields
* are enabled to be overwritten. This ensures that the
* disabled fields don't get overwritten to 0.
*/
for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
const struct cxgb4_natmode_config *c;
c = &cxgb4_natmode_config_array[i];
if (CHELSIO_CHIP_VERSION(adap->params.chip) >= c->chip &&
natmode_flags == c->flags)
return 0;
}
NL_SET_ERR_MSG_MOD(extack, "Unsupported NAT mode 4-tuple combination");
return -EOPNOTSUPP;
}
void cxgb4_process_flow_actions(struct net_device *in, void cxgb4_process_flow_actions(struct net_device *in,
struct flow_action *actions, struct flow_action *actions,
struct ch_filter_specification *fs) struct ch_filter_specification *fs)
{ {
struct flow_action_entry *act; struct flow_action_entry *act;
u8 natmode_flags = 0;
int i; int i;
flow_action_for_each(i, act, actions) { flow_action_for_each(i, act, actions) {
@ -426,7 +545,8 @@ void cxgb4_process_flow_actions(struct net_device *in,
val = act->mangle.val; val = act->mangle.val;
offset = act->mangle.offset; offset = act->mangle.offset;
process_pedit_field(fs, val, mask, offset, htype); process_pedit_field(fs, val, mask, offset, htype,
&natmode_flags);
} }
break; break;
case FLOW_ACTION_QUEUE: case FLOW_ACTION_QUEUE:
@ -438,6 +558,9 @@ void cxgb4_process_flow_actions(struct net_device *in,
break; break;
} }
} }
if (natmode_flags)
cxgb4_action_natmode_tweak(fs, natmode_flags);
} }
static bool valid_l4_mask(u32 mask) static bool valid_l4_mask(u32 mask)
@ -454,7 +577,8 @@ static bool valid_l4_mask(u32 mask)
} }
static bool valid_pedit_action(struct net_device *dev, static bool valid_pedit_action(struct net_device *dev,
const struct flow_action_entry *act) const struct flow_action_entry *act,
u8 *natmode_flags)
{ {
u32 mask, offset; u32 mask, offset;
u8 htype; u8 htype;
@ -479,7 +603,10 @@ static bool valid_pedit_action(struct net_device *dev,
case FLOW_ACT_MANGLE_HDR_TYPE_IP4: case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
switch (offset) { switch (offset) {
case PEDIT_IP4_SRC: case PEDIT_IP4_SRC:
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP4_DST: case PEDIT_IP4_DST:
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break; break;
default: default:
netdev_err(dev, "%s: Unsupported pedit field\n", netdev_err(dev, "%s: Unsupported pedit field\n",
@ -493,10 +620,13 @@ static bool valid_pedit_action(struct net_device *dev,
case PEDIT_IP6_SRC_63_32: case PEDIT_IP6_SRC_63_32:
case PEDIT_IP6_SRC_95_64: case PEDIT_IP6_SRC_95_64:
case PEDIT_IP6_SRC_127_96: case PEDIT_IP6_SRC_127_96:
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_DST_31_0: case PEDIT_IP6_DST_31_0:
case PEDIT_IP6_DST_63_32: case PEDIT_IP6_DST_63_32:
case PEDIT_IP6_DST_95_64: case PEDIT_IP6_DST_95_64:
case PEDIT_IP6_DST_127_96: case PEDIT_IP6_DST_127_96:
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break; break;
default: default:
netdev_err(dev, "%s: Unsupported pedit field\n", netdev_err(dev, "%s: Unsupported pedit field\n",
@ -512,6 +642,10 @@ static bool valid_pedit_action(struct net_device *dev,
__func__); __func__);
return false; return false;
} }
if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
else
*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
break; break;
default: default:
netdev_err(dev, "%s: Unsupported pedit field\n", netdev_err(dev, "%s: Unsupported pedit field\n",
@ -527,6 +661,10 @@ static bool valid_pedit_action(struct net_device *dev,
__func__); __func__);
return false; return false;
} }
if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
else
*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
break; break;
default: default:
netdev_err(dev, "%s: Unsupported pedit field\n", netdev_err(dev, "%s: Unsupported pedit field\n",
@ -546,10 +684,12 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
struct netlink_ext_ack *extack, struct netlink_ext_ack *extack,
u8 matchall_filter) u8 matchall_filter)
{ {
struct adapter *adap = netdev2adap(dev);
struct flow_action_entry *act; struct flow_action_entry *act;
bool act_redir = false; bool act_redir = false;
bool act_pedit = false; bool act_pedit = false;
bool act_vlan = false; bool act_vlan = false;
u8 natmode_flags = 0;
int i; int i;
if (!flow_action_basic_hw_stats_check(actions, extack)) if (!flow_action_basic_hw_stats_check(actions, extack))
@ -563,7 +703,6 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
break; break;
case FLOW_ACTION_MIRRED: case FLOW_ACTION_MIRRED:
case FLOW_ACTION_REDIRECT: { case FLOW_ACTION_REDIRECT: {
struct adapter *adap = netdev2adap(dev);
struct net_device *n_dev, *target_dev; struct net_device *n_dev, *target_dev;
bool found = false; bool found = false;
unsigned int i; unsigned int i;
@ -620,7 +759,8 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
} }
break; break;
case FLOW_ACTION_MANGLE: { case FLOW_ACTION_MANGLE: {
bool pedit_valid = valid_pedit_action(dev, act); bool pedit_valid = valid_pedit_action(dev, act,
&natmode_flags);
if (!pedit_valid) if (!pedit_valid)
return -EOPNOTSUPP; return -EOPNOTSUPP;
@ -642,6 +782,15 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
return -EINVAL; return -EINVAL;
} }
if (act_pedit) {
int ret;
ret = cxgb4_action_natmode_validate(adap, natmode_flags,
extack);
if (ret)
return ret;
}
return 0; return 0;
} }

Просмотреть файл

@ -108,6 +108,21 @@ struct ch_tc_pedit_fields {
#define PEDIT_TCP_SPORT_DPORT 0x0 #define PEDIT_TCP_SPORT_DPORT 0x0
#define PEDIT_UDP_SPORT_DPORT 0x0 #define PEDIT_UDP_SPORT_DPORT 0x0
enum cxgb4_action_natmode_flags {
CXGB4_ACTION_NATMODE_NONE = 0,
CXGB4_ACTION_NATMODE_DIP = (1 << 0),
CXGB4_ACTION_NATMODE_SIP = (1 << 1),
CXGB4_ACTION_NATMODE_DPORT = (1 << 2),
CXGB4_ACTION_NATMODE_SPORT = (1 << 3),
};
/* TC PEDIT action to NATMODE translation entry */
struct cxgb4_natmode_config {
enum chip_type chip;
u8 flags;
u8 natmode;
};
void cxgb4_process_flow_actions(struct net_device *in, void cxgb4_process_flow_actions(struct net_device *in,
struct flow_action *actions, struct flow_action *actions,
struct ch_filter_specification *fs); struct ch_filter_specification *fs);

Просмотреть файл

@ -1912,6 +1912,27 @@ out:
return ret; return ret;
} }
static void fec_enet_phy_reset_after_clk_enable(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
struct phy_device *phy_dev = ndev->phydev;
if (phy_dev) {
phy_reset_after_clk_enable(phy_dev);
} else if (fep->phy_node) {
/*
* If the PHY still is not bound to the MAC, but there is
* OF PHY node and a matching PHY device instance already,
* use the OF PHY node to obtain the PHY device instance,
* and then use that PHY device instance when triggering
* the PHY reset.
*/
phy_dev = of_phy_find_device(fep->phy_node);
phy_reset_after_clk_enable(phy_dev);
put_device(&phy_dev->mdio.dev);
}
}
static int fec_enet_clk_enable(struct net_device *ndev, bool enable) static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
{ {
struct fec_enet_private *fep = netdev_priv(ndev); struct fec_enet_private *fep = netdev_priv(ndev);
@ -1938,7 +1959,7 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
if (ret) if (ret)
goto failed_clk_ref; goto failed_clk_ref;
phy_reset_after_clk_enable(ndev->phydev); fec_enet_phy_reset_after_clk_enable(ndev);
} else { } else {
clk_disable_unprepare(fep->clk_enet_out); clk_disable_unprepare(fep->clk_enet_out);
if (fep->clk_ptp) { if (fep->clk_ptp) {
@ -2983,16 +3004,16 @@ fec_enet_open(struct net_device *ndev)
/* Init MAC prior to mii bus probe */ /* Init MAC prior to mii bus probe */
fec_restart(ndev); fec_restart(ndev);
/* Probe and connect to PHY when open the interface */
ret = fec_enet_mii_probe(ndev);
if (ret)
goto err_enet_mii_probe;
/* Call phy_reset_after_clk_enable() again if it failed during /* Call phy_reset_after_clk_enable() again if it failed during
* phy_reset_after_clk_enable() before because the PHY wasn't probed. * phy_reset_after_clk_enable() before because the PHY wasn't probed.
*/ */
if (reset_again) if (reset_again)
phy_reset_after_clk_enable(ndev->phydev); fec_enet_phy_reset_after_clk_enable(ndev);
/* Probe and connect to PHY when open the interface */
ret = fec_enet_mii_probe(ndev);
if (ret)
goto err_enet_mii_probe;
if (fep->quirks & FEC_QUIRK_ERR006687) if (fep->quirks & FEC_QUIRK_ERR006687)
imx6q_cpuidle_fec_irqs_used(); imx6q_cpuidle_fec_irqs_used();

Просмотреть файл

@ -1349,6 +1349,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
int offset = ibmveth_rxq_frame_offset(adapter); int offset = ibmveth_rxq_frame_offset(adapter);
int csum_good = ibmveth_rxq_csum_good(adapter); int csum_good = ibmveth_rxq_csum_good(adapter);
int lrg_pkt = ibmveth_rxq_large_packet(adapter); int lrg_pkt = ibmveth_rxq_large_packet(adapter);
__sum16 iph_check = 0;
skb = ibmveth_rxq_get_buffer(adapter); skb = ibmveth_rxq_get_buffer(adapter);
@ -1385,16 +1386,26 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
skb_put(skb, length); skb_put(skb, length);
skb->protocol = eth_type_trans(skb, netdev); skb->protocol = eth_type_trans(skb, netdev);
/* PHYP without PLSO support places a -1 in the ip
* checksum for large send frames.
*/
if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
struct iphdr *iph = (struct iphdr *)skb->data;
iph_check = iph->check;
}
if ((length > netdev->mtu + ETH_HLEN) ||
lrg_pkt || iph_check == 0xffff) {
ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
adapter->rx_large_packets++;
}
if (csum_good) { if (csum_good) {
skb->ip_summed = CHECKSUM_UNNECESSARY; skb->ip_summed = CHECKSUM_UNNECESSARY;
ibmveth_rx_csum_helper(skb, adapter); ibmveth_rx_csum_helper(skb, adapter);
} }
if (length > netdev->mtu + ETH_HLEN) {
ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
adapter->rx_large_packets++;
}
napi_gro_receive(napi, skb); /* send it up */ napi_gro_receive(napi, skb); /* send it up */
netdev->stats.rx_packets++; netdev->stats.rx_packets++;

Просмотреть файл

@ -1113,7 +1113,7 @@ out:
return rc; return rc;
probe_err_register: probe_err_register:
kfree(lp->td_ring); kfree(KSEG0ADDR(lp->td_ring));
probe_err_td_ring: probe_err_td_ring:
iounmap(lp->tx_dma_regs); iounmap(lp->tx_dma_regs);
probe_err_dma_tx: probe_err_dma_tx:
@ -1133,6 +1133,7 @@ static int korina_remove(struct platform_device *pdev)
iounmap(lp->eth_regs); iounmap(lp->eth_regs);
iounmap(lp->rx_dma_regs); iounmap(lp->rx_dma_regs);
iounmap(lp->tx_dma_regs); iounmap(lp->tx_dma_regs);
kfree(KSEG0ADDR(lp->td_ring));
unregister_netdev(bif->dev); unregister_netdev(bif->dev);
free_netdev(bif->dev); free_netdev(bif->dev);

Просмотреть файл

@ -943,6 +943,9 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
bool clean_complete = true; bool clean_complete = true;
int done; int done;
if (!budget)
return 0;
if (priv->tx_ring_num[TX_XDP]) { if (priv->tx_ring_num[TX_XDP]) {
xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring]; xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
if (xdp_tx_cq->xdp_busy) { if (xdp_tx_cq->xdp_busy) {

Просмотреть файл

@ -350,7 +350,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
.dma = tx_info->map0_dma, .dma = tx_info->map0_dma,
}; };
if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
dma_unmap_page(priv->ddev, tx_info->map0_dma, dma_unmap_page(priv->ddev, tx_info->map0_dma,
PAGE_SIZE, priv->dma_dir); PAGE_SIZE, priv->dma_dir);
put_page(tx_info->page); put_page(tx_info->page);

Просмотреть файл

@ -2504,7 +2504,7 @@ static void tlan_phy_power_down(struct net_device *dev)
} }
/* Wait for 50 ms and powerup /* Wait for 50 ms and powerup
* This is abitrary. It is intended to make sure the * This is arbitrary. It is intended to make sure the
* transceiver settles. * transceiver settles.
*/ */
tlan_set_timer(dev, msecs_to_jiffies(50), TLAN_TIMER_PHY_PUP); tlan_set_timer(dev, msecs_to_jiffies(50), TLAN_TIMER_PHY_PUP);

Просмотреть файл

@ -1432,6 +1432,9 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
void ipa_endpoint_suspend(struct ipa *ipa) void ipa_endpoint_suspend(struct ipa *ipa)
{ {
if (!ipa->setup_complete)
return;
if (ipa->modem_netdev) if (ipa->modem_netdev)
ipa_modem_suspend(ipa->modem_netdev); ipa_modem_suspend(ipa->modem_netdev);
@ -1443,6 +1446,9 @@ void ipa_endpoint_suspend(struct ipa *ipa)
void ipa_endpoint_resume(struct ipa *ipa) void ipa_endpoint_resume(struct ipa *ipa)
{ {
if (!ipa->setup_complete)
return;
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]); ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]);
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]); ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]);

Просмотреть файл

@ -108,6 +108,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
unsigned int logflags); unsigned int logflags);
void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m, void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
struct sock *sk); struct sock *sk);
void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb);
void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb, unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in, const struct net_device *in,

Просмотреть файл

@ -56,7 +56,10 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a)
{ {
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_CLS_ACT
struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); struct tcf_tunnel_key_params *params;
params = rcu_dereference_protected(t->params,
lockdep_is_held(&a->tcfa_lock));
return &params->tcft_enc_metadata->u.tun_info; return &params->tcft_enc_metadata->u.tun_info;
#else #else

Просмотреть файл

@ -679,10 +679,6 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
struct scatterlist *sgout); struct scatterlist *sgout);
struct sk_buff *tls_encrypt_skb(struct sk_buff *skb); struct sk_buff *tls_encrypt_skb(struct sk_buff *skb);
struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
struct net_device *dev,
struct sk_buff *skb);
int tls_sw_fallback_init(struct sock *sk, int tls_sw_fallback_init(struct sock *sk,
struct tls_offload_context_tx *offload_ctx, struct tls_offload_context_tx *offload_ctx,
struct tls_crypto_info *crypto_info); struct tls_crypto_info *crypto_info);

Просмотреть файл

@ -580,6 +580,7 @@ sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
skb->dev = priv->ndev; skb->dev = priv->ndev;
can_skb_reserve(skb); can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = priv->ndev->ifindex; can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
can_skb_prv(skb)->skbcnt = 0;
/* reserve CAN header */ /* reserve CAN header */
skb_reserve(skb, offsetof(struct can_frame, data)); skb_reserve(skb, offsetof(struct can_frame, data));
@ -1487,6 +1488,7 @@ j1939_session *j1939_session_fresh_new(struct j1939_priv *priv,
skb->dev = priv->ndev; skb->dev = priv->ndev;
can_skb_reserve(skb); can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = priv->ndev->ifindex; can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
can_skb_prv(skb)->skbcnt = 0;
skcb = j1939_skb_to_cb(skb); skcb = j1939_skb_to_cb(skb);
memcpy(skcb, rel_skcb, sizeof(*skcb)); memcpy(skcb, rel_skcb, sizeof(*skcb));

Просмотреть файл

@ -757,7 +757,6 @@ static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
} else { } else {
sock_reset_flag(sk, SOCK_RCVTSTAMP); sock_reset_flag(sk, SOCK_RCVTSTAMP);
sock_reset_flag(sk, SOCK_RCVTSTAMPNS); sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
sock_reset_flag(sk, SOCK_TSTAMP_NEW);
} }
} }
@ -994,8 +993,6 @@ set_sndbuf:
__sock_set_timestamps(sk, valbool, true, true); __sock_set_timestamps(sk, valbool, true, true);
break; break;
case SO_TIMESTAMPING_NEW: case SO_TIMESTAMPING_NEW:
sock_set_flag(sk, SOCK_TSTAMP_NEW);
fallthrough;
case SO_TIMESTAMPING_OLD: case SO_TIMESTAMPING_OLD:
if (val & ~SOF_TIMESTAMPING_MASK) { if (val & ~SOF_TIMESTAMPING_MASK) {
ret = -EINVAL; ret = -EINVAL;
@ -1024,16 +1021,14 @@ set_sndbuf:
} }
sk->sk_tsflags = val; sk->sk_tsflags = val;
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
if (val & SOF_TIMESTAMPING_RX_SOFTWARE) if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
sock_enable_timestamp(sk, sock_enable_timestamp(sk,
SOCK_TIMESTAMPING_RX_SOFTWARE); SOCK_TIMESTAMPING_RX_SOFTWARE);
else { else
if (optname == SO_TIMESTAMPING_NEW)
sock_reset_flag(sk, SOCK_TSTAMP_NEW);
sock_disable_timestamp(sk, sock_disable_timestamp(sk,
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
}
break; break;
case SO_RCVLOWAT: case SO_RCVLOWAT:

Просмотреть файл

@ -457,6 +457,23 @@ out_bh_enable:
local_bh_enable(); local_bh_enable();
} }
/*
* The device used for looking up which routing table to use for sending an ICMP
* error is preferably the source whenever it is set, which should ensure the
* icmp error can be sent to the source host, else lookup using the routing
* table of the destination device, else use the main routing table (index 0).
*/
static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
{
struct net_device *route_lookup_dev = NULL;
if (skb->dev)
route_lookup_dev = skb->dev;
else if (skb_dst(skb))
route_lookup_dev = skb_dst(skb)->dev;
return route_lookup_dev;
}
static struct rtable *icmp_route_lookup(struct net *net, static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4, struct flowi4 *fl4,
struct sk_buff *skb_in, struct sk_buff *skb_in,
@ -465,6 +482,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
int type, int code, int type, int code,
struct icmp_bxm *param) struct icmp_bxm *param)
{ {
struct net_device *route_lookup_dev;
struct rtable *rt, *rt2; struct rtable *rt, *rt2;
struct flowi4 fl4_dec; struct flowi4 fl4_dec;
int err; int err;
@ -479,7 +497,8 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP; fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type; fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code; fl4->fl4_icmp_code = code;
fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); route_lookup_dev = icmp_get_route_lookup_dev(skb_in);
fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = ip_route_output_key_hash(net, fl4, skb_in); rt = ip_route_output_key_hash(net, fl4, skb_in);
@ -503,7 +522,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err) if (err)
goto relookup_failed; goto relookup_failed;
if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev, if (inet_addr_type_dev_table(net, route_lookup_dev,
fl4_dec.saddr) == RTN_LOCAL) { fl4_dec.saddr) == RTN_LOCAL) {
rt2 = __ip_route_output_key(net, &fl4_dec); rt2 = __ip_route_output_key(net, &fl4_dec);
if (IS_ERR(rt2)) if (IS_ERR(rt2))

Просмотреть файл

@ -625,9 +625,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
} }
if (dev->header_ops) { if (dev->header_ops) {
/* Need space for new headers */ if (skb_cow_head(skb, 0))
if (skb_cow_head(skb, dev->needed_headroom -
(tunnel->hlen + sizeof(struct iphdr))))
goto free_skb; goto free_skb;
tnl_params = (const struct iphdr *)skb->data; tnl_params = (const struct iphdr *)skb->data;
@ -748,7 +746,11 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
len = tunnel->tun_hlen - len; len = tunnel->tun_hlen - len;
tunnel->hlen = tunnel->hlen + len; tunnel->hlen = tunnel->hlen + len;
dev->needed_headroom = dev->needed_headroom + len; if (dev->header_ops)
dev->hard_header_len += len;
else
dev->needed_headroom += len;
if (set_mtu) if (set_mtu)
dev->mtu = max_t(int, dev->mtu - len, 68); dev->mtu = max_t(int, dev->mtu - len, 68);
@ -944,6 +946,7 @@ static void __gre_tunnel_init(struct net_device *dev)
tunnel->parms.iph.protocol = IPPROTO_GRE; tunnel->parms.iph.protocol = IPPROTO_GRE;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
dev->features |= GRE_FEATURES; dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES;
@ -987,10 +990,14 @@ static int ipgre_tunnel_init(struct net_device *dev)
return -EINVAL; return -EINVAL;
dev->flags = IFF_BROADCAST; dev->flags = IFF_BROADCAST;
dev->header_ops = &ipgre_header_ops; dev->header_ops = &ipgre_header_ops;
dev->hard_header_len = tunnel->hlen + sizeof(*iph);
dev->needed_headroom = 0;
} }
#endif #endif
} else if (!tunnel->collect_md) { } else if (!tunnel->collect_md) {
dev->header_ops = &ipgre_header_ops; dev->header_ops = &ipgre_header_ops;
dev->hard_header_len = tunnel->hlen + sizeof(*iph);
dev->needed_headroom = 0;
} }
return ip_tunnel_init(dev); return ip_tunnel_init(dev);

Просмотреть файл

@ -43,16 +43,31 @@ static void dump_arp_packet(struct nf_log_buf *m,
const struct nf_loginfo *info, const struct nf_loginfo *info,
const struct sk_buff *skb, unsigned int nhoff) const struct sk_buff *skb, unsigned int nhoff)
{ {
const struct arphdr *ah;
struct arphdr _arph;
const struct arppayload *ap; const struct arppayload *ap;
struct arppayload _arpp; struct arppayload _arpp;
const struct arphdr *ah;
unsigned int logflags;
struct arphdr _arph;
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL) { if (ah == NULL) {
nf_log_buf_add(m, "TRUNCATED"); nf_log_buf_add(m, "TRUNCATED");
return; return;
} }
if (info->type == NF_LOG_TYPE_LOG)
logflags = info->u.log.logflags;
else
logflags = NF_LOG_DEFAULT_MASK;
if (logflags & NF_LOG_MACDECODE) {
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);
nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
}
nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d", nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op)); ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));

Просмотреть файл

@ -284,8 +284,10 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m,
switch (dev->type) { switch (dev->type) {
case ARPHRD_ETHER: case ARPHRD_ETHER:
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);
nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto)); ntohs(eth_hdr(skb)->h_proto));
return; return;
default: default:

Просмотреть файл

@ -2770,10 +2770,12 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
if (IS_ERR(rt)) if (IS_ERR(rt))
return rt; return rt;
if (flp4->flowi4_proto) if (flp4->flowi4_proto) {
flp4->flowi4_oif = rt->dst.dev->ifindex;
rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
flowi4_to_flowi(flp4), flowi4_to_flowi(flp4),
sk, 0); sk, 0);
}
return rt; return rt;
} }

Просмотреть файл

@ -501,8 +501,11 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (__ipv6_addr_needs_scope_id(addr_type)) { if (__ipv6_addr_needs_scope_id(addr_type)) {
iif = icmp6_iif(skb); iif = icmp6_iif(skb);
} else { } else {
dst = skb_dst(skb); /*
iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev); * The source device is used for looking up which routing table
* to use for sending an ICMP error.
*/
iif = l3mdev_master_ifindex(skb->dev);
} }
/* /*

Просмотреть файл

@ -2622,8 +2622,10 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
iter->skip = *pos; iter->skip = *pos;
if (iter->tbl) { if (iter->tbl) {
loff_t p = 0;
ipv6_route_seq_setup_walk(iter, net); ipv6_route_seq_setup_walk(iter, net);
return ipv6_route_seq_next(seq, NULL, pos); return ipv6_route_seq_next(seq, NULL, &p);
} else { } else {
return NULL; return NULL;
} }

Просмотреть файл

@ -468,8 +468,6 @@ int ip6_forward(struct sk_buff *skb)
* check and decrement ttl * check and decrement ttl
*/ */
if (hdr->hop_limit <= 1) { if (hdr->hop_limit <= 1) {
/* Force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);

Просмотреть файл

@ -297,9 +297,11 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m,
switch (dev->type) { switch (dev->type) {
case ARPHRD_ETHER: case ARPHRD_ETHER:
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
ntohs(eth_hdr(skb)->h_proto)); nf_log_dump_vlan(m, skb);
nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
return; return;
default: default:
break; break;

Просмотреть файл

@ -2745,7 +2745,8 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (confirm_neigh) if (confirm_neigh)
dst_confirm_neigh(dst, daddr); dst_confirm_neigh(dst, daddr);
mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu < IPV6_MIN_MTU)
return;
if (mtu >= dst_mtu(dst)) if (mtu >= dst_mtu(dst))
return; return;

Просмотреть файл

@ -517,7 +517,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
return ret; return ret;
} }
if (subflow->use_64bit_ack) { if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64; ack_size = TCPOLEN_MPTCP_DSS_ACK64;
opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq); opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
opts->ext_copy.ack64 = 1; opts->ext_copy.ack64 = 1;
@ -657,6 +657,12 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
if (unlikely(mptcp_check_fallback(sk))) if (unlikely(mptcp_check_fallback(sk)))
return false; return false;
/* prevent adding of any MPTCP related options on reset packet
* until we support MP_TCPRST/MP_FASTCLOSE
*/
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
return false;
if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts)) if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
ret = true; ret = true;
else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
@ -711,7 +717,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
return false; return false;
} }
static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
struct mptcp_subflow_context *subflow, struct mptcp_subflow_context *subflow,
struct sk_buff *skb, struct sk_buff *skb,
struct mptcp_options_received *mp_opt) struct mptcp_options_received *mp_opt)
@ -728,15 +734,20 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq && TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && mp_opt->mp_join && subflow->mp_join && mp_opt->mp_join &&
READ_ONCE(msk->pm.server_side)) READ_ONCE(msk->pm.server_side))
tcp_send_ack(sk); tcp_send_ack(ssk);
goto fully_established; goto fully_established;
} }
/* we should process OoO packets before the first subflow is fully /* we must process OoO packets before the first subflow is fully
* established, but not expected for MP_JOIN subflows * established. OoO packets are instead a protocol violation
* for MP_JOIN subflows as the peer must not send any data
* before receiving the forth ack - cfr. RFC 8684 section 3.2.
*/ */
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
if (subflow->mp_join)
goto reset;
return subflow->mp_capable; return subflow->mp_capable;
}
if (mp_opt->dss && mp_opt->use_ack) { if (mp_opt->dss && mp_opt->use_ack) {
/* subflows are fully established as soon as we get any /* subflows are fully established as soon as we get any
@ -748,9 +759,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
} }
/* If the first established packet does not contain MP_CAPABLE + data /* If the first established packet does not contain MP_CAPABLE + data
* then fallback to TCP * then fallback to TCP. Fallback scenarios requires a reset for
* MP_JOIN subflows.
*/ */
if (!mp_opt->mp_capable) { if (!mp_opt->mp_capable) {
if (subflow->mp_join)
goto reset;
subflow->mp_capable = 0; subflow->mp_capable = 0;
pr_fallback(msk); pr_fallback(msk);
__mptcp_do_fallback(msk); __mptcp_do_fallback(msk);
@ -767,12 +781,16 @@ fully_established:
subflow->pm_notified = 1; subflow->pm_notified = 1;
if (subflow->mp_join) { if (subflow->mp_join) {
clear_3rdack_retransmission(sk); clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk, subflow); mptcp_pm_subflow_established(msk, subflow);
} else { } else {
mptcp_pm_fully_established(msk); mptcp_pm_fully_established(msk);
} }
return true; return true;
reset:
mptcp_subflow_reset(ssk);
return false;
} }
static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit) static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)

Просмотреть файл

@ -1710,6 +1710,20 @@ static void pm_work(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock); spin_unlock_bh(&msk->pm.lock);
} }
static void __mptcp_close_subflow(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (inet_sk_state_load(ssk) != TCP_CLOSE)
continue;
__mptcp_close_ssk((struct sock *)msk, ssk, subflow, 0);
}
}
static void mptcp_worker(struct work_struct *work) static void mptcp_worker(struct work_struct *work)
{ {
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@ -1727,6 +1741,9 @@ static void mptcp_worker(struct work_struct *work)
mptcp_clean_una(sk); mptcp_clean_una(sk);
mptcp_check_data_fin_ack(sk); mptcp_check_data_fin_ack(sk);
__mptcp_flush_join_list(msk); __mptcp_flush_join_list(msk);
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);
__mptcp_move_skbs(msk); __mptcp_move_skbs(msk);
if (msk->pm.status) if (msk->pm.status)

Просмотреть файл

@ -90,6 +90,7 @@
#define MPTCP_WORK_RTX 2 #define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3 #define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4 #define MPTCP_FALLBACK_DONE 4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
struct mptcp_options_received { struct mptcp_options_received {
u64 sndr_key; u64 sndr_key;
@ -211,6 +212,7 @@ struct mptcp_sock {
bool fully_established; bool fully_established;
bool rcv_data_fin; bool rcv_data_fin;
bool snd_data_fin_enable; bool snd_data_fin_enable;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
spinlock_t join_list_lock; spinlock_t join_list_lock;
struct work_struct work; struct work_struct work;
struct sk_buff *ooo_last_skb; struct sk_buff *ooo_last_skb;
@ -310,7 +312,6 @@ struct mptcp_subflow_context {
mpc_map : 1, mpc_map : 1,
backup : 1, backup : 1,
rx_eof : 1, rx_eof : 1,
use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
can_ack : 1; /* only after processing the remote a key */ can_ack : 1; /* only after processing the remote a key */
enum mptcp_data_avail data_avail; enum mptcp_data_avail data_avail;
u32 remote_nonce; u32 remote_nonce;
@ -369,6 +370,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow, struct mptcp_subflow_context *subflow,
long timeout); long timeout);
void mptcp_subflow_reset(struct sock *ssk);
/* called with sk socket lock held */ /* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,

Просмотреть файл

@ -271,6 +271,19 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
return thmac == subflow->thmac; return thmac == subflow->thmac;
} }
void mptcp_subflow_reset(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
tcp_set_state(ssk, TCP_CLOSE);
tcp_send_active_reset(ssk, GFP_ATOMIC);
tcp_done(ssk);
if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
schedule_work(&mptcp_sk(sk)->work))
sock_hold(sk);
}
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@ -343,8 +356,7 @@ fallback:
return; return;
do_reset: do_reset:
tcp_send_active_reset(sk, GFP_ATOMIC); mptcp_subflow_reset(sk);
tcp_done(sk);
} }
struct request_sock_ops mptcp_subflow_request_sock_ops; struct request_sock_ops mptcp_subflow_request_sock_ops;
@ -770,12 +782,11 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
if (!mpext->dsn64) { if (!mpext->dsn64) {
map_seq = expand_seq(subflow->map_seq, subflow->map_data_len, map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
mpext->data_seq); mpext->data_seq);
subflow->use_64bit_ack = 0;
pr_debug("expanded seq=%llu", subflow->map_seq); pr_debug("expanded seq=%llu", subflow->map_seq);
} else { } else {
map_seq = mpext->data_seq; map_seq = mpext->data_seq;
subflow->use_64bit_ack = 1;
} }
WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
if (subflow->map_valid) { if (subflow->map_valid) {
/* Allow replacing only with an identical map */ /* Allow replacing only with an identical map */

Просмотреть файл

@ -609,6 +609,8 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
if (ret == NF_ACCEPT) { if (ret == NF_ACCEPT) {
nf_reset_ct(skb); nf_reset_ct(skb);
skb_forward_csum(skb); skb_forward_csum(skb);
if (skb->dev)
skb->tstamp = 0;
} }
return ret; return ret;
} }
@ -649,6 +651,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
if (!local) { if (!local) {
skb_forward_csum(skb); skb_forward_csum(skb);
if (skb->dev)
skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output); NULL, skb_dst(skb)->dev, dst_output);
} else } else
@ -669,6 +673,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
if (!local) { if (!local) {
ip_vs_drop_early_demux_sk(skb); ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb); skb_forward_csum(skb);
if (skb->dev)
skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output); NULL, skb_dst(skb)->dev, dst_output);
} else } else

Просмотреть файл

@ -171,6 +171,18 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
} }
EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
{
u16 vid;
if (!skb_vlan_tag_present(skb))
return;
vid = skb_vlan_tag_get(skb);
nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid);
}
EXPORT_SYMBOL_GPL(nf_log_dump_vlan);
/* bridge and netdev logging families share this code. */ /* bridge and netdev logging families share this code. */
void nf_log_l2packet(struct net *net, u_int8_t pf, void nf_log_l2packet(struct net *net, u_int8_t pf,
__be16 protocol, __be16 protocol,

Просмотреть файл

@ -2138,7 +2138,8 @@ static bool nft_hook_list_equal(struct list_head *hook_list1,
} }
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
u32 flags) u32 flags, const struct nlattr *attr,
struct netlink_ext_ack *extack)
{ {
const struct nlattr * const *nla = ctx->nla; const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table; struct nft_table *table = ctx->table;
@ -2154,9 +2155,10 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (nla[NFTA_CHAIN_HOOK]) { if (nla[NFTA_CHAIN_HOOK]) {
if (!nft_is_base_chain(chain)) if (!nft_is_base_chain(chain)) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST; return -EEXIST;
}
err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family, err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
false); false);
if (err < 0) if (err < 0)
@ -2165,6 +2167,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
basechain = nft_base_chain(chain); basechain = nft_base_chain(chain);
if (basechain->type != hook.type) { if (basechain->type != hook.type) {
nft_chain_release_hook(&hook); nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST; return -EEXIST;
} }
@ -2172,6 +2175,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (!nft_hook_list_equal(&basechain->hook_list, if (!nft_hook_list_equal(&basechain->hook_list,
&hook.list)) { &hook.list)) {
nft_chain_release_hook(&hook); nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST; return -EEXIST;
} }
} else { } else {
@ -2179,6 +2183,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (ops->hooknum != hook.num || if (ops->hooknum != hook.num ||
ops->priority != hook.priority) { ops->priority != hook.priority) {
nft_chain_release_hook(&hook); nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST; return -EEXIST;
} }
} }
@ -2191,8 +2196,10 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
chain2 = nft_chain_lookup(ctx->net, table, chain2 = nft_chain_lookup(ctx->net, table,
nla[NFTA_CHAIN_NAME], genmask); nla[NFTA_CHAIN_NAME], genmask);
if (!IS_ERR(chain2)) if (!IS_ERR(chain2)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
return -EEXIST; return -EEXIST;
}
} }
if (nla[NFTA_CHAIN_COUNTERS]) { if (nla[NFTA_CHAIN_COUNTERS]) {
@ -2235,6 +2242,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
nft_trans_chain_update(tmp) && nft_trans_chain_update(tmp) &&
nft_trans_chain_name(tmp) && nft_trans_chain_name(tmp) &&
strcmp(name, nft_trans_chain_name(tmp)) == 0) { strcmp(name, nft_trans_chain_name(tmp)) == 0) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
kfree(name); kfree(name);
goto err; goto err;
} }
@ -2357,7 +2365,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return -EOPNOTSUPP; return -EOPNOTSUPP;
flags |= chain->flags & NFT_CHAIN_BASE; flags |= chain->flags & NFT_CHAIN_BASE;
return nf_tables_updchain(&ctx, genmask, policy, flags); return nf_tables_updchain(&ctx, genmask, policy, flags, attr,
extack);
} }
return nf_tables_addchain(&ctx, family, genmask, policy, flags); return nf_tables_addchain(&ctx, family, genmask, policy, flags);

Просмотреть файл

@ -1596,7 +1596,7 @@ out:
return rc; return rc;
} }
#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ #define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
bool is_dmb, int bufsize) bool is_dmb, int bufsize)
@ -1615,7 +1615,8 @@ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
if (rc) { if (rc) {
kfree(buf_desc); kfree(buf_desc);
return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc); return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) :
ERR_PTR(-EIO);
} }
buf_desc->pages = virt_to_page(buf_desc->cpu_addr); buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
/* CDC header stored in buf. So, pretend it was smaller */ /* CDC header stored in buf. So, pretend it was smaller */

Просмотреть файл

@ -233,8 +233,6 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow,
default: default:
flow->type = SMC_LLC_FLOW_NONE; flow->type = SMC_LLC_FLOW_NONE;
} }
if (qentry == lgr->delayed_event)
lgr->delayed_event = NULL;
smc_llc_flow_qentry_set(flow, qentry); smc_llc_flow_qentry_set(flow, qentry);
spin_unlock_bh(&lgr->llc_flow_lock); spin_unlock_bh(&lgr->llc_flow_lock);
return true; return true;
@ -1209,7 +1207,7 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
/* enqueue a local add_link req to trigger a new add_link flow */ /* enqueue a local add_link req to trigger a new add_link flow */
void smc_llc_add_link_local(struct smc_link *link) void smc_llc_add_link_local(struct smc_link *link)
{ {
struct smc_llc_msg_add_link add_llc = {0}; struct smc_llc_msg_add_link add_llc = {};
add_llc.hd.length = sizeof(add_llc); add_llc.hd.length = sizeof(add_llc);
add_llc.hd.common.type = SMC_LLC_ADD_LINK; add_llc.hd.common.type = SMC_LLC_ADD_LINK;
@ -1242,7 +1240,7 @@ out:
*/ */
void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id) void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
{ {
struct smc_llc_msg_del_link del_llc = {0}; struct smc_llc_msg_del_link del_llc = {};
del_llc.hd.length = sizeof(del_llc); del_llc.hd.length = sizeof(del_llc);
del_llc.hd.common.type = SMC_LLC_DELETE_LINK; del_llc.hd.common.type = SMC_LLC_DELETE_LINK;
@ -1314,7 +1312,7 @@ out:
*/ */
void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
{ {
struct smc_llc_msg_del_link delllc = {0}; struct smc_llc_msg_del_link delllc = {};
int i; int i;
delllc.hd.common.type = SMC_LLC_DELETE_LINK; delllc.hd.common.type = SMC_LLC_DELETE_LINK;
@ -1603,13 +1601,12 @@ static void smc_llc_event_work(struct work_struct *work)
struct smc_llc_qentry *qentry; struct smc_llc_qentry *qentry;
if (!lgr->llc_flow_lcl.type && lgr->delayed_event) { if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
if (smc_link_usable(lgr->delayed_event->link)) { qentry = lgr->delayed_event;
smc_llc_event_handler(lgr->delayed_event); lgr->delayed_event = NULL;
} else { if (smc_link_usable(qentry->link))
qentry = lgr->delayed_event; smc_llc_event_handler(qentry);
lgr->delayed_event = NULL; else
kfree(qentry); kfree(qentry);
}
} }
again: again:

Просмотреть файл

@ -150,7 +150,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) { if (fragid == FIRST_FRAGMENT) {
if (unlikely(head)) if (unlikely(head))
goto err; goto err;
frag = skb_unshare(frag, GFP_ATOMIC); if (skb_cloned(frag))
frag = skb_copy(frag, GFP_ATOMIC);
if (unlikely(!frag)) if (unlikely(!frag))
goto err; goto err;
head = *headbuf = frag; head = *headbuf = frag;

Просмотреть файл

@ -327,8 +327,13 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
struct tipc_msg *hdr; struct tipc_msg *hdr;
u16 seqno; u16 seqno;
spin_lock_bh(&namedq->lock);
skb_queue_walk_safe(namedq, skb, tmp) { skb_queue_walk_safe(namedq, skb, tmp) {
skb_linearize(skb); if (unlikely(skb_linearize(skb))) {
__skb_unlink(skb, namedq);
kfree_skb(skb);
continue;
}
hdr = buf_msg(skb); hdr = buf_msg(skb);
seqno = msg_named_seqno(hdr); seqno = msg_named_seqno(hdr);
if (msg_is_last_bulk(hdr)) { if (msg_is_last_bulk(hdr)) {
@ -338,12 +343,14 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) {
__skb_unlink(skb, namedq); __skb_unlink(skb, namedq);
spin_unlock_bh(&namedq->lock);
return skb; return skb;
} }
if (*open && (*rcv_nxt == seqno)) { if (*open && (*rcv_nxt == seqno)) {
(*rcv_nxt)++; (*rcv_nxt)++;
__skb_unlink(skb, namedq); __skb_unlink(skb, namedq);
spin_unlock_bh(&namedq->lock);
return skb; return skb;
} }
@ -353,6 +360,7 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
continue; continue;
} }
} }
spin_unlock_bh(&namedq->lock);
return NULL; return NULL;
} }

Просмотреть файл

@ -1496,7 +1496,7 @@ static void node_lost_contact(struct tipc_node *n,
/* Clean up broadcast state */ /* Clean up broadcast state */
tipc_bcast_remove_peer(n->net, n->bc_entry.link); tipc_bcast_remove_peer(n->net, n->bc_entry.link);
__skb_queue_purge(&n->bc_entry.namedq); skb_queue_purge(&n->bc_entry.namedq);
/* Abort any ongoing link failover */ /* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) { for (i = 0; i < MAX_BEARERS; i++) {

Просмотреть файл

@ -418,14 +418,14 @@ static int tls_push_data(struct sock *sk,
struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
struct tls_record_info *record = ctx->open_record; struct tls_record_info *record = ctx->open_record;
int tls_push_record_flags; int tls_push_record_flags;
struct page_frag *pfrag; struct page_frag *pfrag;
size_t orig_size = size; size_t orig_size = size;
u32 max_open_record_len; u32 max_open_record_len;
int copy, rc = 0; bool more = false;
bool done = false; bool done = false;
int copy, rc = 0;
long timeo; long timeo;
if (flags & if (flags &
@ -492,9 +492,8 @@ handle_error:
if (!size) { if (!size) {
last_record: last_record:
tls_push_record_flags = flags; tls_push_record_flags = flags;
if (more) { if (flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE)) {
tls_ctx->pending_open_record_frags = more = true;
!!record->num_frags;
break; break;
} }
@ -526,6 +525,8 @@ last_record:
} }
} while (!done); } while (!done);
tls_ctx->pending_open_record_frags = more;
if (orig_size - size > 0) if (orig_size - size > 0)
rc = orig_size - size; rc = orig_size - size;

Просмотреть файл

@ -20,6 +20,7 @@ TEST_PROGS += vrf-xfrm-tests.sh
TEST_PROGS += rxtimestamp.sh TEST_PROGS += rxtimestamp.sh
TEST_PROGS += devlink_port_split.py TEST_PROGS += devlink_port_split.py
TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += drop_monitor_tests.sh
TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS_EXTENDED := in_netns.sh TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any

Просмотреть файл

@ -0,0 +1,626 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved.
# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved.
#
# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS.
#
#
# Symmetric routing topology
#
# blue red
# +----+ .253 +----+ .253 +----+
# | h1 |-------------------| r1 |-------------------| h2 |
# +----+ .1 +----+ .2 +----+
# 172.16.1/24 172.16.2/24
# 2001:db8:16:1/64 2001:db8:16:2/64
#
#
# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route
# to the outgoing vrf red for the n2 network and red has a route back to n1.
# The red VRF interface has a MTU of 1400.
#
# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the
# output of the command to check that a ttl expired error is received.
#
# The second test runs traceroute from h1 to h2 and parses the output to check
# for a hop on r1.
#
# The third test sends a ping with a packet size of 1450 from h1 to h2 and
# parses the output of the command to check that a fragmentation error is
# received.
#
#
# Asymmetric routing topology
#
# This topology represents a customer setup where the issue with icmp errors
# and VRF route leaking was initialy reported. The MTU test isn't done here
# because of the lack of a return route in the red VRF.
#
# blue red
# .253 +----+ .253
# +----| r1 |----+
# | +----+ |
# +----+ | | +----+
# | h1 |--------------+ +--------------| h2 |
# +----+ .1 | | .2 +----+
# 172.16.1/24 | +----+ | 172.16.2/24
# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64
# .254 +----+ .254
#
#
# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the
# outgoing vrf red for the n2 network but red doesn't have a route back to n1.
# Route from h2 to h1 goes through r2.
#
# The objective is to check that the incoming vrf routing table is selected
# to send an ICMP error back to the source when the ttl of a packet reaches 1
# while it is forwarded between different vrfs.
VERBOSE=0
PAUSE_ON_FAIL=no
DEFAULT_TTYPE=sym
H1_N1=172.16.1.0/24
H1_N1_6=2001:db8:16:1::/64
H1_N1_IP=172.16.1.1
R1_N1_IP=172.16.1.253
R2_N1_IP=172.16.1.254
H1_N1_IP6=2001:db8:16:1::1
R1_N1_IP6=2001:db8:16:1::253
R2_N1_IP6=2001:db8:16:1::254
H2_N2=172.16.2.0/24
H2_N2_6=2001:db8:16:2::/64
H2_N2_IP=172.16.2.2
R1_N2_IP=172.16.2.253
R2_N2_IP=172.16.2.254
H2_N2_IP6=2001:db8:16:2::2
R1_N2_IP6=2001:db8:16:2::253
R2_N2_IP6=2001:db8:16:2::254
################################################################################
# helpers
log_section()
{
echo
echo "###########################################################################"
echo "$*"
echo "###########################################################################"
echo
}
log_test()
{
local rc=$1
local expected=$2
local msg="$3"
if [ "${rc}" -eq "${expected}" ]; then
printf "TEST: %-60s [ OK ]\n" "${msg}"
nsuccess=$((nsuccess+1))
else
ret=1
nfail=$((nfail+1))
printf "TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
read -r a
[ "$a" = "q" ] && exit 1
fi
fi
}
run_cmd()
{
local cmd="$*"
local out
local rc
if [ "$VERBOSE" = "1" ]; then
echo "COMMAND: $cmd"
fi
# shellcheck disable=SC2086
out=$(eval $cmd 2>&1)
rc=$?
if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
echo "$out"
fi
[ "$VERBOSE" = "1" ] && echo
return $rc
}
run_cmd_grep()
{
local grep_pattern="$1"
shift
local cmd="$*"
local out
local rc
if [ "$VERBOSE" = "1" ]; then
echo "COMMAND: $cmd"
fi
# shellcheck disable=SC2086
out=$(eval $cmd 2>&1)
if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
echo "$out"
fi
echo "$out" | grep -q "$grep_pattern"
rc=$?
[ "$VERBOSE" = "1" ] && echo
return $rc
}
################################################################################
# setup and teardown
cleanup()
{
local ns
for ns in h1 h2 r1 r2; do
ip netns del $ns 2>/dev/null
done
}
setup_vrf()
{
local ns=$1
ip -netns "${ns}" rule del pref 0
ip -netns "${ns}" rule add pref 32765 from all lookup local
ip -netns "${ns}" -6 rule del pref 0
ip -netns "${ns}" -6 rule add pref 32765 from all lookup local
}
create_vrf()
{
local ns=$1
local vrf=$2
local table=$3
ip -netns "${ns}" link add "${vrf}" type vrf table "${table}"
ip -netns "${ns}" link set "${vrf}" up
ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192
ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192
ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}"
ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad
}
setup_sym()
{
local ns
# make sure we are starting with a clean slate
cleanup
#
# create nodes as namespaces
#
for ns in h1 h2 r1; do
ip netns add $ns
ip -netns $ns link set lo up
case "${ns}" in
h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
;;
r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
esac
done
#
# create interconnects
#
ip -netns h1 link add eth0 type veth peer name r1h1
ip -netns h1 link set r1h1 netns r1 name eth0 up
ip -netns h2 link add eth0 type veth peer name r1h2
ip -netns h2 link set r1h2 netns r1 name eth1 up
#
# h1
#
ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24
ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
ip -netns h1 link set eth0 up
# h1 to h2 via r1
ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0
ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
#
# h2
#
ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
ip -netns h2 link set eth0 up
# h2 to h1 via r1
ip -netns h2 route add default via ${R1_N2_IP} dev eth0
ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0
#
# r1
#
setup_vrf r1
create_vrf r1 blue 1101
create_vrf r1 red 1102
ip -netns r1 link set mtu 1400 dev eth1
ip -netns r1 link set eth0 vrf blue up
ip -netns r1 link set eth1 vrf red up
ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
# Route leak from blue to red
ip -netns r1 route add vrf blue ${H2_N2} dev red
ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
# Route leak from red to blue
ip -netns r1 route add vrf red ${H1_N1} dev blue
ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue
# Wait for ip config to settle
sleep 2
}
setup_asym()
{
local ns
# make sure we are starting with a clean slate
cleanup
#
# create nodes as namespaces
#
for ns in h1 h2 r1 r2; do
ip netns add $ns
ip -netns $ns link set lo up
case "${ns}" in
h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
;;
r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
esac
done
#
# create interconnects
#
ip -netns h1 link add eth0 type veth peer name r1h1
ip -netns h1 link set r1h1 netns r1 name eth0 up
ip -netns h1 link add eth1 type veth peer name r2h1
ip -netns h1 link set r2h1 netns r2 name eth0 up
ip -netns h2 link add eth0 type veth peer name r1h2
ip -netns h2 link set r1h2 netns r1 name eth1 up
ip -netns h2 link add eth1 type veth peer name r2h2
ip -netns h2 link set r2h2 netns r2 name eth1 up
#
# h1
#
ip -netns h1 link add br0 type bridge
ip -netns h1 link set br0 up
ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
ip -netns h1 link set eth0 master br0 up
ip -netns h1 link set eth1 master br0 up
# h1 to h2 via r1
ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0
ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
#
# h2
#
ip -netns h2 link add br0 type bridge
ip -netns h2 link set br0 up
ip -netns h2 addr add dev br0 ${H2_N2_IP}/24
ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
ip -netns h2 link set eth0 master br0 up
ip -netns h2 link set eth1 master br0 up
# h2 to h1 via r2
ip -netns h2 route add default via ${R2_N2_IP} dev br0
ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0
#
# r1
#
setup_vrf r1
create_vrf r1 blue 1101
create_vrf r1 red 1102
ip -netns r1 link set mtu 1400 dev eth1
ip -netns r1 link set eth0 vrf blue up
ip -netns r1 link set eth1 vrf red up
ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
# Route leak from blue to red
ip -netns r1 route add vrf blue ${H2_N2} dev red
ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
# No route leak from red to blue
#
# r2
#
ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24
ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
# Wait for ip config to settle
sleep 2
}
check_connectivity()
{
ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
log_test $? 0 "Basic IPv4 connectivity"
return $?
}
check_connectivity6()
{
ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
log_test $? 0 "Basic IPv6 connectivity"
return $?
}
check_traceroute()
{
if [ ! -x "$(command -v traceroute)" ]; then
echo "SKIP: Could not run IPV4 test without traceroute"
return 1
fi
}
check_traceroute6()
{
if [ ! -x "$(command -v traceroute6)" ]; then
echo "SKIP: Could not run IPV6 test without traceroute6"
return 1
fi
}
ipv4_traceroute()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute"
check_traceroute || return
setup_"$ttype"
check_connectivity || return
run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP}
log_test $? 0 "Traceroute reports a hop on r1"
}
ipv4_traceroute_asym()
{
ipv4_traceroute asym
}
ipv6_traceroute()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute"
check_traceroute6 || return
setup_"$ttype"
check_connectivity6 || return
run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6}
log_test $? 0 "Traceroute6 reports a hop on r1"
}
ipv6_traceroute_asym()
{
ipv6_traceroute asym
}
ipv4_ping_ttl()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping"
setup_"$ttype"
check_connectivity || return
run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP}
log_test $? 0 "Ping received ICMP ttl exceeded"
}
ipv4_ping_ttl_asym()
{
ipv4_ping_ttl asym
}
ipv4_ping_frag()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping"
setup_"$ttype"
check_connectivity || return
run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
log_test $? 0 "Ping received ICMP Frag needed"
}
ipv4_ping_frag_asym()
{
ipv4_ping_frag asym
}
ipv6_ping_ttl()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping"
setup_"$ttype"
check_connectivity6 || return
run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
log_test $? 0 "Ping received ICMP Hop limit"
}
ipv6_ping_ttl_asym()
{
ipv6_ping_ttl asym
}
ipv6_ping_frag()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping"
setup_"$ttype"
check_connectivity6 || return
run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
log_test $? 0 "Ping received ICMP Packet too big"
}
ipv6_ping_frag_asym()
{
ipv6_ping_frag asym
}
################################################################################
# usage
usage()
{
cat <<EOF
usage: ${0##*/} OPTS
-4 Run IPv4 tests only
-6 Run IPv6 tests only
-t TEST Run only TEST
-p Pause on fail
-v verbose mode (show commands and output)
EOF
}
################################################################################
# main
# Some systems don't have a ping6 binary anymore
command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym"
ret=0
nsuccess=0
nfail=0
while getopts :46t:pvh o
do
case $o in
4) TESTS=ipv4;;
6) TESTS=ipv6;;
t) TESTS=$OPTARG;;
p) PAUSE_ON_FAIL=yes;;
v) VERBOSE=1;;
h) usage; exit 0;;
*) usage; exit 1;;
esac
done
#
# show user test config
#
if [ -z "$TESTS" ]; then
TESTS="$TESTS_IPV4 $TESTS_IPV6"
elif [ "$TESTS" = "ipv4" ]; then
TESTS="$TESTS_IPV4"
elif [ "$TESTS" = "ipv6" ]; then
TESTS="$TESTS_IPV6"
fi
for t in $TESTS
do
case $t in
ipv4_ping_ttl|ping) ipv4_ping_ttl;;&
ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;&
ipv4_traceroute|traceroute) ipv4_traceroute;;&
ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
ipv4_ping_frag|ping) ipv4_ping_frag;;&
ipv6_ping_ttl|ping) ipv6_ping_ttl;;&
ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;&
ipv6_traceroute|traceroute) ipv6_traceroute;;&
ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
ipv6_ping_frag|ping) ipv6_ping_frag;;&
# setup namespaces and config, but do not run any tests
setup_sym|setup) setup_sym; exit 0;;
setup_asym) setup_asym; exit 0;;
help) echo "Test names: $TESTS"; exit 0;;
esac
done
cleanup
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
exit $ret

Просмотреть файл

@ -17,9 +17,12 @@
struct options { struct options {
bool count_packets; bool count_packets;
bool gso_enabled;
int verbose; int verbose;
unsigned int queue_num; unsigned int queue_num;
unsigned int timeout; unsigned int timeout;
uint32_t verdict;
uint32_t delay_ms;
}; };
static unsigned int queue_stats[5]; static unsigned int queue_stats[5];
@ -27,7 +30,7 @@ static struct options opts;
static void help(const char *p) static void help(const char *p)
{ {
printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p); printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
} }
static int parse_attr_cb(const struct nlattr *attr, void *data) static int parse_attr_cb(const struct nlattr *attr, void *data)
@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
} }
static struct nlmsghdr * static struct nlmsghdr *
nfq_build_verdict(char *buf, int id, int queue_num, int verd) nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
{ {
struct nfqnl_msg_verdict_hdr vh = { struct nfqnl_msg_verdict_hdr vh = {
.verdict = htonl(verd), .verdict = htonl(verd),
@ -189,9 +192,6 @@ static void print_stats(void)
unsigned int last, total; unsigned int last, total;
int i; int i;
if (!opts.count_packets)
return;
total = 0; total = 0;
last = queue_stats[0]; last = queue_stats[0];
@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void)
nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num); nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID; flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
flags |= NFQA_CFG_F_UID_GID;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void)
return nl; return nl;
} }
static void sleep_ms(uint32_t delay)
{
struct timespec ts = { .tv_sec = delay / 1000 };
delay %= 1000;
ts.tv_nsec = delay * 1000llu * 1000llu;
nanosleep(&ts, NULL);
}
static int mainloop(void) static int mainloop(void)
{ {
unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
@ -278,7 +290,7 @@ static int mainloop(void)
ret = mnl_socket_recvfrom(nl, buf, buflen); ret = mnl_socket_recvfrom(nl, buf, buflen);
if (ret == -1) { if (ret == -1) {
if (errno == ENOBUFS) if (errno == ENOBUFS || errno == EINTR)
continue; continue;
if (errno == EAGAIN) { if (errno == EAGAIN) {
@ -298,7 +310,10 @@ static int mainloop(void)
} }
id = ret - MNL_CB_OK; id = ret - MNL_CB_OK;
nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT); if (opts.delay_ms)
sleep_ms(opts.delay_ms);
nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto"); perror("mnl_socket_sendto");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv)
{ {
int c; int c;
while ((c = getopt(argc, argv, "chvt:q:")) != -1) { while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
switch (c) { switch (c) {
case 'c': case 'c':
opts.count_packets = true; opts.count_packets = true;
@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv)
if (opts.queue_num > 0xffff) if (opts.queue_num > 0xffff)
opts.queue_num = 0; opts.queue_num = 0;
break; break;
case 'Q':
opts.verdict = atoi(optarg);
if (opts.verdict > 0xffff) {
fprintf(stderr, "Expected destination queue number\n");
exit(1);
}
opts.verdict <<= 16;
opts.verdict |= NF_QUEUE;
break;
case 'd':
opts.delay_ms = atoi(optarg);
if (opts.delay_ms == 0) {
fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
exit(1);
}
break;
case 't': case 't':
opts.timeout = atoi(optarg); opts.timeout = atoi(optarg);
break; break;
case 'G':
opts.gso_enabled = false;
break;
case 'v': case 'v':
opts.verbose++; opts.verbose++;
break; break;
} }
} }
if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
fprintf(stderr, "Cannot use same destination and source queue\n");
exit(1);
}
} }
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
int ret; int ret;
opts.verdict = NF_ACCEPT;
opts.gso_enabled = true;
parse_opts(argc, argv); parse_opts(argc, argv);
ret = mainloop(); ret = mainloop();

Просмотреть файл

@ -7,8 +7,7 @@ ksft_skip=4
sfx=$(mktemp -u "XXXXXXXX") sfx=$(mktemp -u "XXXXXXXX")
ns0="ns0-$sfx" ns0="ns0-$sfx"
nft --version > /dev/null 2>&1 if ! nft --version > /dev/null 2>&1; then
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without nft tool" echo "SKIP: Could not run test without nft tool"
exit $ksft_skip exit $ksft_skip
fi fi
@ -24,6 +23,8 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo
trap cleanup EXIT trap cleanup EXIT
currentyear=$(date +%G)
lastyear=$((currentyear-1))
ip netns exec "$ns0" nft -f /dev/stdin <<EOF ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table inet filter { table inet filter {
counter iifcount {} counter iifcount {}
@ -33,6 +34,9 @@ table inet filter {
counter infproto4count {} counter infproto4count {}
counter il4protocounter {} counter il4protocounter {}
counter imarkcounter {} counter imarkcounter {}
counter icpu0counter {}
counter ilastyearcounter {}
counter icurrentyearcounter {}
counter oifcount {} counter oifcount {}
counter oifnamecount {} counter oifnamecount {}
@ -54,6 +58,9 @@ table inet filter {
meta nfproto ipv4 counter name "infproto4count" meta nfproto ipv4 counter name "infproto4count"
meta l4proto icmp counter name "il4protocounter" meta l4proto icmp counter name "il4protocounter"
meta mark 42 counter name "imarkcounter" meta mark 42 counter name "imarkcounter"
meta cpu 0 counter name "icpu0counter"
meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
} }
chain output { chain output {
@ -84,11 +91,10 @@ check_one_counter()
local want="packets $2" local want="packets $2"
local verbose="$3" local verbose="$3"
cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want") if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
if [ $? -ne 0 ];then
echo "FAIL: $cname, want \"$want\", got" echo "FAIL: $cname, want \"$want\", got"
ret=1 ret=1
ip netns exec "$ns0" nft list counter inet filter $counter ip netns exec "$ns0" nft list counter inet filter $cname
fi fi
} }
@ -100,8 +106,7 @@ check_lo_counters()
for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \ for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \ oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
il4protocounter \ il4protocounter icurrentyearcounter ol4protocounter \
ol4protocounter \
; do ; do
check_one_counter "$counter" "$want" "$verbose" check_one_counter "$counter" "$want" "$verbose"
done done
@ -116,9 +121,22 @@ check_one_counter oskuidcounter "1" true
check_one_counter oskgidcounter "1" true check_one_counter oskgidcounter "1" true
check_one_counter imarkcounter "1" true check_one_counter imarkcounter "1" true
check_one_counter omarkcounter "1" true check_one_counter omarkcounter "1" true
check_one_counter ilastyearcounter "0" true
if [ $ret -eq 0 ];then if [ $ret -eq 0 ];then
echo "OK: nftables meta iif/oif counters at expected values" echo "OK: nftables meta iif/oif counters at expected values"
else
exit $ret
fi
#First CPU execution and counter
taskset -p 01 $$ > /dev/null
ip netns exec "$ns0" nft reset counters > /dev/null
ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
check_one_counter icpu0counter "2" true
if [ $ret -eq 0 ];then
echo "OK: nftables meta cpu counter at expected values"
fi fi
exit $ret exit $ret

Просмотреть файл

@ -12,6 +12,7 @@ sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx" ns1="ns1-$sfx"
ns2="ns2-$sfx" ns2="ns2-$sfx"
nsrouter="nsrouter-$sfx" nsrouter="nsrouter-$sfx"
timeout=4
cleanup() cleanup()
{ {
@ -20,6 +21,7 @@ cleanup()
ip netns del ${nsrouter} ip netns del ${nsrouter}
rm -f "$TMPFILE0" rm -f "$TMPFILE0"
rm -f "$TMPFILE1" rm -f "$TMPFILE1"
rm -f "$TMPFILE2" "$TMPFILE3"
} }
nft --version > /dev/null 2>&1 nft --version > /dev/null 2>&1
@ -42,6 +44,8 @@ fi
TMPFILE0=$(mktemp) TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp) TMPFILE1=$(mktemp)
TMPFILE2=$(mktemp)
TMPFILE3=$(mktemp)
trap cleanup EXIT trap cleanup EXIT
ip netns add ${ns1} ip netns add ${ns1}
@ -83,7 +87,7 @@ load_ruleset() {
local name=$1 local name=$1
local prio=$2 local prio=$2
ip netns exec ${nsrouter} nft -f - <<EOF ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet $name { table inet $name {
chain nfq { chain nfq {
ip protocol icmp queue bypass ip protocol icmp queue bypass
@ -118,7 +122,7 @@ EOF
load_counter_ruleset() { load_counter_ruleset() {
local prio=$1 local prio=$1
ip netns exec ${nsrouter} nft -f - <<EOF ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet countrules { table inet countrules {
chain pre { chain pre {
type filter hook prerouting priority $prio; policy accept; type filter hook prerouting priority $prio; policy accept;
@ -175,7 +179,7 @@ test_ping_router() {
test_queue_blackhole() { test_queue_blackhole() {
local proto=$1 local proto=$1
ip netns exec ${nsrouter} nft -f - <<EOF ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table $proto blackh { table $proto blackh {
chain forward { chain forward {
type filter hook forward priority 0; policy accept; type filter hook forward priority 0; policy accept;
@ -184,10 +188,10 @@ table $proto blackh {
} }
EOF EOF
if [ $proto = "ip" ] ;then if [ $proto = "ip" ] ;then
ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
lret=$? lret=$?
elif [ $proto = "ip6" ]; then elif [ $proto = "ip6" ]; then
ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
lret=$? lret=$?
else else
lret=111 lret=111
@ -214,8 +218,8 @@ test_queue()
local last="" local last=""
# spawn nf-queue listeners # spawn nf-queue listeners
ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" & ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" & ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
sleep 1 sleep 1
test_ping test_ping
ret=$? ret=$?
@ -250,11 +254,11 @@ test_queue()
test_tcp_forward() test_tcp_forward()
{ {
ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 & ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
local nfqpid=$! local nfqpid=$!
tmpfile=$(mktemp) || exit 1 tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$! local rpid=$!
@ -270,15 +274,13 @@ test_tcp_forward()
test_tcp_localhost() test_tcp_localhost()
{ {
tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
tmpfile=$(mktemp) || exit 1 tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$! local rpid=$!
ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 & ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
local nfqpid=$! local nfqpid=$!
sleep 1 sleep 1
@ -287,6 +289,47 @@ test_tcp_localhost()
wait $rpid wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback" [ $? -eq 0 ] && echo "PASS: tcp via loopback"
wait 2>/dev/null
}
test_tcp_localhost_requeue()
{
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
flush ruleset
table inet filter {
chain output {
type filter hook output priority 0; policy accept;
tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
}
chain post {
type filter hook postrouting priority 0; policy accept;
tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
}
}
EOF
tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
# nfqueue 1 will be called via output hook. But this time,
# re-queue the packet to nfqueue program on queue 2.
ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
sleep 1
ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
rm -f "$tmpfile"
wait
if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
echo "FAIL: lost packets during requeue?!" 1>&2
return
fi
echo "PASS: tcp via loopback and re-queueing"
} }
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@ -328,5 +371,6 @@ test_queue 20
test_tcp_forward test_tcp_forward
test_tcp_localhost test_tcp_localhost
test_tcp_localhost_requeue
exit $ret exit $ret