From 703fb94ec58e0e8769380c2877a8a34aeb5b6c97 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 13 Nov 2012 08:52:24 +0100 Subject: [PATCH 1/7] xfrm: Fix the gc threshold value for ipv4 The xfrm gc threshold value depends on ip_rt_max_size. This value was set to INT_MAX with the routing cache removal patch, so we start doing garbage collecting when we have INT_MAX/2 IPsec routes cached. Fix this by going back to the static threshold of 1024 routes. Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/ipv4/route.c | 2 +- net/ipv4/xfrm4_policy.c | 13 +------------ 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6f0ba01afe73..63445ede48bb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1351,7 +1351,7 @@ struct xfrm6_tunnel { }; extern void xfrm_init(void); -extern void xfrm4_init(int rt_hash_size); +extern void xfrm4_init(void); extern int xfrm_state_init(struct net *net); extern void xfrm_state_fini(struct net *net); extern void xfrm4_state_init(void); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a8c651216fa6..200d287e49f5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2597,7 +2597,7 @@ int __init ip_rt_init(void) pr_err("Unable to create route proc files\n"); #ifdef CONFIG_XFRM xfrm_init(); - xfrm4_init(ip_rt_max_size); + xfrm4_init(); #endif rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 05c5ab8d983c..3be0ac2c1920 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -279,19 +279,8 @@ static void __exit xfrm4_policy_fini(void) xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); } -void __init xfrm4_init(int rt_max_size) +void __init xfrm4_init(void) { - /* - * Select a default value for the gc_thresh based on the main route - * table hash size. It seems to me the worst case scenario is when - * we have ipsec operating in transport mode, in which we create a - * dst_entry per socket. The xfrm gc algorithm starts trying to remove - * entries at gc_thresh, and prevents new allocations as 2*gc_thresh - * so lets set an initial xfrm gc_thresh value at the rt_max_size/2. - * That will let us store an ipsec connection per route table entry, - * and start cleaning when were 1/2 full - */ - xfrm4_dst_ops.gc_thresh = rt_max_size/2; dst_entries_init(&xfrm4_dst_ops); xfrm4_state_init(); From 4fe198e6b136c516df493ec325ab8f70d470f477 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 19 Nov 2012 06:42:21 +0000 Subject: [PATCH 2/7] netfilter: ipset: Fix range bug in hash:ip,port,net Due to the missing ininitalization at adding/deleting entries, when a plain_ip,port,net element was the object, multiple elements were added/deleted instead. The bug came from the missing dangling default initialization. The error-prone default initialization is corrected in all hash:* types. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ip.c | 4 ++-- net/netfilter/ipset/ip_set_hash_ipport.c | 7 +++---- net/netfilter/ipset/ip_set_hash_ipportip.c | 7 +++---- net/netfilter/ipset/ip_set_hash_ipportnet.c | 7 +++++-- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index ec3dba5dcd62..5c0b78528e55 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -173,6 +173,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return adtfn(set, &nip, timeout, flags); } + ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) @@ -185,8 +186,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); - } else - ip_to = ip; + } hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 0171f7502fa5..6283351f4eeb 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -162,7 +162,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem data = { }; - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -210,7 +210,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], return ip_set_eexist(ret, flags) ? 0 : ret; } - ip = ntohl(data.ip); + ip_to = ip = ntohl(data.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) @@ -223,8 +223,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); - } else - ip_to = ip; + } port_to = port = ntohs(data.port); if (with_ports && tb[IPSET_ATTR_PORT_TO]) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 6344ef551ec8..6a21271c8d5a 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -166,7 +166,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem data = { }; - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -218,7 +218,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], return ip_set_eexist(ret, flags) ? 0 : ret; } - ip = ntohl(data.ip); + ip_to = ip = ntohl(data.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) @@ -231,8 +231,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); - } else - ip_to = ip; + } port_to = port = ntohs(data.port); if (with_ports && tb[IPSET_ATTR_PORT_TO]) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index cb71f9a774e7..2d5cd4ee30eb 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -215,8 +215,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem data = { .cidr = HOST_MASK - 1 }; - u32 ip, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to, ip2_last, ip2; + u32 ip, ip_to, p = 0, port, port_to; + u32 ip2_from, ip2_to, ip2_last, ip2; u32 timeout = h->timeout; bool with_ports = false; u8 cidr; @@ -286,6 +286,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ip_set_eexist(ret, flags) ? 0 : ret; } + ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) @@ -306,6 +307,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (port > port_to) swap(port, port_to); } + + ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); if (ret) From e93b5f9f320db431ec8623a4c667811007e07fd7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Nov 2012 01:37:38 +0000 Subject: [PATCH 3/7] netfilter: cttimeout: fix buffer overflow Chen Gang reports: the length of nla_data(cda[CTA_TIMEOUT_NAME]) is not limited in server side. And indeed, its used to strcpy to a fixed-sized buffer. Fortunately, nfnetlink users need CAP_NET_ADMIN. Reported-by: Chen Gang Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 8847b4d8be06..701c88a20fea 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -41,7 +41,8 @@ MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tu static LIST_HEAD(cttimeout_list); static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { - [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING }, + [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, + .len = CTNL_TIMEOUT_NAME_MAX - 1}, [CTA_TIMEOUT_L3PROTO] = { .type = NLA_U16 }, [CTA_TIMEOUT_L4PROTO] = { .type = NLA_U8 }, [CTA_TIMEOUT_DATA] = { .type = NLA_NESTED }, From 636174219b52b5a8bc51bc23bbcba97cd30a65e3 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 22 Nov 2012 23:04:14 +0200 Subject: [PATCH 4/7] ipv4: do not cache looped multicasts Starting from 3.6 we cache output routes for multicasts only when using route to 224/4. For local receivers we can set RTCF_LOCAL flag depending on the membership but in such case we use maddr and saddr which are not caching keys as before. Additionally, we can not use same place to cache routes that differ in RTCF_LOCAL flag value. Fix it by caching only RTCF_MULTICAST entries without RTCF_LOCAL (send-only, no loopback). As a side effect, we avoid unneeded lookup for fnhe when not caching because multicasts are not redirected and they do not learn PMTU. Thanks to Maxime Bizon for showing the caching problems in __mkroute_output for 3.6 kernels: different RTCF_LOCAL flag in cache can lead to wrong ip_mc_output or ip_output call and the visible problem is that traffic can not reach local receivers via loopback. Reported-by: Maxime Bizon Tested-by: Maxime Bizon Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/route.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 200d287e49f5..df251424d816 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1785,6 +1785,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; + do_cache = true; if (type == RTN_BROADCAST) { flags |= RTCF_BROADCAST | RTCF_LOCAL; fi = NULL; @@ -1793,6 +1794,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, fl4->flowi4_proto)) flags &= ~RTCF_LOCAL; + else + do_cache = false; /* If multicast route do not exist use * default one, but do not gateway in this case. * Yes, it is hack. @@ -1802,8 +1805,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } fnhe = NULL; - do_cache = fi != NULL; - if (fi) { + do_cache &= fi != NULL; + if (do_cache) { struct rtable __rcu **prth; struct fib_nh *nh = &FIB_RES_NH(*res); From b26623dab7eeb1e9f5898c7a49458789dd492f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?fran=C3=A7ois=20romieu?= Date: Wed, 21 Nov 2012 10:07:29 +0000 Subject: [PATCH 5/7] 8139cp: revert "set ring address before enabling receiver" This patch reverts b01af4579ec41f48e9b9c774e70bd6474ad210db. The original patch was tested with emulated hardware. Real hardware chokes. Fixes https://bugzilla.kernel.org/show_bug.cgi?id=47041 Signed-off-by: Francois Romieu Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/8139cp.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 1c818254b7be..b01f83a044c4 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -979,17 +979,6 @@ static void cp_init_hw (struct cp_private *cp) cpw32_f (MAC0 + 0, le32_to_cpu (*(__le32 *) (dev->dev_addr + 0))); cpw32_f (MAC0 + 4, le32_to_cpu (*(__le32 *) (dev->dev_addr + 4))); - cpw32_f(HiTxRingAddr, 0); - cpw32_f(HiTxRingAddr + 4, 0); - - ring_dma = cp->ring_dma; - cpw32_f(RxRingAddr, ring_dma & 0xffffffff); - cpw32_f(RxRingAddr + 4, (ring_dma >> 16) >> 16); - - ring_dma += sizeof(struct cp_desc) * CP_RX_RING_SIZE; - cpw32_f(TxRingAddr, ring_dma & 0xffffffff); - cpw32_f(TxRingAddr + 4, (ring_dma >> 16) >> 16); - cp_start_hw(cp); cpw8(TxThresh, 0x06); /* XXX convert magic num to a constant */ @@ -1003,6 +992,17 @@ static void cp_init_hw (struct cp_private *cp) cpw8(Config5, cpr8(Config5) & PMEStatus); + cpw32_f(HiTxRingAddr, 0); + cpw32_f(HiTxRingAddr + 4, 0); + + ring_dma = cp->ring_dma; + cpw32_f(RxRingAddr, ring_dma & 0xffffffff); + cpw32_f(RxRingAddr + 4, (ring_dma >> 16) >> 16); + + ring_dma += sizeof(struct cp_desc) * CP_RX_RING_SIZE; + cpw32_f(TxRingAddr, ring_dma & 0xffffffff); + cpw32_f(TxRingAddr + 4, (ring_dma >> 16) >> 16); + cpw16(MultiIntr, 0); cpw8_f(Cfg9346, Cfg9346_Lock); From cc9b310165e7ea2f3dc90e1eea6ce57c9b7981d1 Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Thu, 22 Nov 2012 00:10:01 +0000 Subject: [PATCH 6/7] vxlan: fix command usage in its doc Some commands don't work in its example doc. The patch will fix it. Signed-off-by: Zhi Yong Wu Signed-off-by: David S. Miller --- Documentation/networking/vxlan.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/vxlan.txt b/Documentation/networking/vxlan.txt index 5b34b762d7d5..6d993510f091 100644 --- a/Documentation/networking/vxlan.txt +++ b/Documentation/networking/vxlan.txt @@ -32,7 +32,7 @@ no entry is in the forwarding table. # ip link delete vxlan0 3. Show vxlan info - # ip -d show vxlan0 + # ip -d link show vxlan0 It is possible to create, destroy and display the vxlan forwarding table using the new bridge command. @@ -41,7 +41,7 @@ forwarding table using the new bridge command. # bridge fdb add to 00:17:42:8a:b4:05 dst 192.19.0.2 dev vxlan0 2. Delete forwarding table entry - # bridge fdb delete 00:17:42:8a:b4:05 + # bridge fdb delete 00:17:42:8a:b4:05 dev vxlan0 3. Show forwarding table # bridge fdb show dev vxlan0 From 4a25417c20fac00b3afd58ce27408f964d19e708 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Thu, 22 Nov 2012 07:16:17 +0000 Subject: [PATCH 7/7] bnx2x: remove redundant warning log fix bug where a register which was only meant to be read in 578xx/57712 devices causes a bogus error message to be logged when read from other devices. Signed-off-by: Ariel Elior Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index bd1fd3d87c24..01611b33a93d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9545,10 +9545,13 @@ static int __devinit bnx2x_prev_unload_common(struct bnx2x *bp) */ static void __devinit bnx2x_prev_interrupted_dmae(struct bnx2x *bp) { - u32 val = REG_RD(bp, PGLUE_B_REG_PGLUE_B_INT_STS); - if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) { - BNX2X_ERR("was error bit was found to be set in pglueb upon startup. Clearing"); - REG_WR(bp, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, 1 << BP_FUNC(bp)); + if (!CHIP_IS_E1x(bp)) { + u32 val = REG_RD(bp, PGLUE_B_REG_PGLUE_B_INT_STS); + if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) { + BNX2X_ERR("was error bit was found to be set in pglueb upon startup. Clearing"); + REG_WR(bp, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, + 1 << BP_FUNC(bp)); + } } }