From eae8dee992af622fd992cb2370cd596ac80ef141 Mon Sep 17 00:00:00 2001 From: Thomas Egerer Date: Mon, 27 Jul 2015 10:50:19 +0200 Subject: [PATCH 1/4] xfrm6: Fix IPv6 ECN decapsulation Using ipv6_get_dsfield on the outer IP header implies that inner and outer header are of the the same address family. For interfamily tunnels, particularly 646, the code reading the DSCP field obtains the wrong values (IHL and the upper four bits of the DSCP field). This can cause the code to detect a congestion encoutered state in the outer header and enable the corresponding bits in the inner header, too. Since the DSCP field is stored in the xfrm mode common buffer independently from the IP version of the outer header, it's safe (and correct) to take this value from there. Signed-off-by: Thomas Egerer Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_mode_tunnel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 901ef6f8addc..f7fbdbabe50e 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -20,10 +20,9 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) { - const struct ipv6hdr *outer_iph = ipv6_hdr(skb); struct ipv6hdr *inner_iph = ipipv6_hdr(skb); - if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) + if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) IP6_ECN_set_ce(inner_iph); } From df367561ffe5a66cd0b2970fdb8897d5487d38e6 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 7 Aug 2015 09:59:34 +0200 Subject: [PATCH 2/4] net/xfrm: use kmemdup rather than duplicating its implementation The patch was generated using fixed coccinelle semantic patch scripts/coccinelle/api/memdup.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2014320 Signed-off-by: Andrzej Hajda Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0cebf1fc37a2..a8de9e300200 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -925,12 +925,10 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) return err; if (attrs[XFRMA_ADDRESS_FILTER]) { - filter = kmalloc(sizeof(*filter), GFP_KERNEL); + filter = kmemdup(nla_data(attrs[XFRMA_ADDRESS_FILTER]), + sizeof(*filter), GFP_KERNEL); if (filter == NULL) return -ENOMEM; - - memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]), - sizeof(*filter)); } if (attrs[XFRMA_PROTO]) From 42a7b32b73d6bf22e4bdd7bf68746e2d71f4cd8d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 10 Aug 2015 16:58:11 -0600 Subject: [PATCH 3/4] xfrm: Add oif to dst lookups Rules can be installed that direct route lookups to specific tables based on oif. Plumb the oif through the xfrm lookups so it gets set in the flow struct and passed to the resolver routines. Signed-off-by: David Ahern Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 7 +++++-- net/ipv4/xfrm4_policy.c | 11 ++++++----- net/ipv6/xfrm6_policy.c | 7 ++++--- net/xfrm/xfrm_policy.c | 24 ++++++++++++++---------- 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f0ee97eec24d..312e3fee9ccf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -285,10 +285,13 @@ struct xfrm_policy_afinfo { unsigned short family; struct dst_ops *dst_ops; void (*garbage_collect)(struct net *net); - struct dst_entry *(*dst_lookup)(struct net *net, int tos, + struct dst_entry *(*dst_lookup)(struct net *net, + int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr); - int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr); + int (*get_saddr)(struct net *net, int oif, + xfrm_address_t *saddr, + xfrm_address_t *daddr); void (*decode_session)(struct sk_buff *skb, struct flowi *fl, int reverse); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index bff69746e05f..55b3c0f4dde5 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -19,7 +19,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, - int tos, + int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { @@ -28,6 +28,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, memset(fl4, 0, sizeof(*fl4)); fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; + fl4->flowi4_oif = oif; if (saddr) fl4->saddr = saddr->a4; @@ -38,22 +39,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, return ERR_CAST(rt); } -static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, +static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { struct flowi4 fl4; - return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr); + return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr); } -static int xfrm4_get_saddr(struct net *net, +static int xfrm4_get_saddr(struct net *net, int oif, xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct flowi4 fl4; - dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr); + dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ed0583c1b9fc..a74013d3eceb 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -26,7 +26,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, +static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { @@ -35,6 +35,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int err; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); @@ -50,13 +51,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, return dst; } -static int xfrm6_get_saddr(struct net *net, +static int xfrm6_get_saddr(struct net *net, int oif, xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(net, 0, NULL, daddr); + dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 18cead7645be..94af3d065785 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -115,7 +115,8 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) rcu_read_unlock(); } -static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, + int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, int family) @@ -127,14 +128,15 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(net, tos, saddr, daddr); + dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr); xfrm_policy_put_afinfo(afinfo); return dst; } -static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, +static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, + int tos, int oif, xfrm_address_t *prev_saddr, xfrm_address_t *prev_daddr, int family) @@ -153,7 +155,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -1373,15 +1375,15 @@ int __xfrm_sk_clone_policy(struct sock *sk) } static int -xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, - unsigned short family) +xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, + xfrm_address_t *remote, unsigned short family) { int err; struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(net, local, remote); + err = afinfo->get_saddr(net, oif, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1410,7 +1412,9 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, remote = &tmpl->id.daddr; local = &tmpl->saddr; if (xfrm_addr_any(local, tmpl->encap_family)) { - error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); + error = xfrm_get_saddr(net, fl->flowi_oif, + &tmp, remote, + tmpl->encap_family); if (error) goto fail; local = &tmp; @@ -1690,8 +1694,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; - dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, - family); + dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, + &saddr, &daddr, family); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; From e69948a0a5309f3ef5715cb4ca7a9bd77d64e2cf Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 11 Aug 2015 13:35:01 -0700 Subject: [PATCH 4/4] net: Document xfrm4_gc_thresh and xfrm6_gc_thresh This change adds documentation for xfrm4_gc_thresh and xfrm6_gc_thresh based on the comments in commit eeb1b73378b56 ("xfrm: Increase the garbage collector threshold"). Signed-off-by: Alexander Duyck Signed-off-by: Steffen Klassert --- Documentation/networking/ip-sysctl.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 56db1efd7189..46e88ed7f41d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1181,6 +1181,11 @@ tag - INTEGER Allows you to write a number, which can be used as required. Default value is 0. +xfrm4_gc_thresh - INTEGER + The threshold at which we will start garbage collecting for IPv4 + destination cache entries. At twice this value the system will + refuse new allocations. + Alexey Kuznetsov. kuznet@ms2.inr.ac.ru @@ -1617,6 +1622,11 @@ ratelimit - INTEGER otherwise the minimal space between responses in milliseconds. Default: 1000 +xfrm6_gc_thresh - INTEGER + The threshold at which we will start garbage collecting for IPv6 + destination cache entries. At twice this value the system will + refuse new allocations. + IPv6 Update by: Pekka Savola