From 93efac3f2e03321129de67a3c0ba53048bb53e31 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 4 Sep 2015 13:21:06 +0800
Subject: [PATCH 01/79] ipv6: Fix IPsec pre-encap fragmentation check

The IPv6 IPsec pre-encap path performs fragmentation for tunnel-mode
packets.  That is, we perform fragmentation pre-encap rather than
post-encap.

A check was added later to ensure that proper MTU information is
passed back for locally generated traffic.  Unfortunately this
check was performed on all IPsec packets, including transport-mode
packets.

What's more, the check failed to take GSO into account.

The end result is that transport-mode GSO packets get dropped at
the check.

This patch fixes it by moving the tunnel mode check forward as well
as adding the GSO check.

Fixes: dd767856a36e ("xfrm6: Don't call icmpv6_send on local error")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_output.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 09c76a7b474d..be033f22a3f3 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -136,6 +136,7 @@ static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
 	struct dst_entry *dst = skb_dst(skb);
 	struct xfrm_state *x = dst->xfrm;
 	int mtu;
+	bool toobig;
 
 #ifdef CONFIG_NETFILTER
 	if (!x) {
@@ -144,25 +145,29 @@ static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
 	}
 #endif
 
+	if (x->props.mode != XFRM_MODE_TUNNEL)
+		goto skip_frag;
+
 	if (skb->protocol == htons(ETH_P_IPV6))
 		mtu = ip6_skb_dst_mtu(skb);
 	else
 		mtu = dst_mtu(skb_dst(skb));
 
-	if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
+	toobig = skb->len > mtu && !skb_is_gso(skb);
+
+	if (toobig && xfrm6_local_dontfrag(skb)) {
 		xfrm6_local_rxpmtu(skb, mtu);
 		return -EMSGSIZE;
-	} else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
+	} else if (!skb->ignore_df && toobig && skb->sk) {
 		xfrm_local_error(skb, mtu);
 		return -EMSGSIZE;
 	}
 
-	if (x->props.mode == XFRM_MODE_TUNNEL &&
-	    ((skb->len > mtu && !skb_is_gso(skb)) ||
-		dst_allfrag(skb_dst(skb)))) {
+	if (toobig || dst_allfrag(skb_dst(skb)))
 		return ip6_fragment(sk, skb,
 				    x->outer_mode->afinfo->output_finish);
-	}
+
+skip_frag:
 	return x->outer_mode->afinfo->output_finish(sk, skb);
 }
 

From 04a6b8bfee06e309be7e9ae4527cdab19c081761 Mon Sep 17 00:00:00 2001
From: Mathias Krause <mathias.krause@secunet.com>
Date: Fri, 11 Sep 2015 09:57:20 +0200
Subject: [PATCH 02/79] xfrm6: Fix ICMPv6 and MH header checks in
 _decode_session6

Ensure there's enough data left prior calling pskb_may_pull(). If
skb->data was already advanced, we'll call pskb_may_pull() with a
negative value converted to unsigned int -- leading to a huge
positive value. That won't matter in practice as pskb_may_pull()
will likely fail in this case, but it leads to underflow reports on
kernels handling such kind of over-/underflows, e.g. a PaX enabled
kernel instrumented with the size_overflow plugin.

Reported-by: satmd <satmd@lain.at>
Reported-and-tested-by: Marcin Jurkowski <marcin1j@gmail.com>
Signed-off-by: Mathias Krause <mathias.krause@secunet.com>
Cc: PaX Team <pageexec@freemail.hu>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_policy.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 30caa289c5db..f10b9400b6d7 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -178,7 +178,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 			return;
 
 		case IPPROTO_ICMPV6:
-			if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
+			if (!onlyproto && (nh + offset + 2 < skb->data ||
+			    pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
 				u8 *icmp;
 
 				nh = skb_network_header(skb);
@@ -192,7 +193,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 		case IPPROTO_MH:
 			offset += ipv6_optlen(exthdr);
-			if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
+			if (!onlyproto && (nh + offset + 3 < skb->data ||
+			    pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
 				struct ip6_mh *mh;
 
 				nh = skb_network_header(skb);

From 4e077237cfb6ab13701d504060d3ae248b191e6e Mon Sep 17 00:00:00 2001
From: Michael Rossberg <michael.rossberg@tu-ilmenau.de>
Date: Tue, 29 Sep 2015 11:25:08 +0200
Subject: [PATCH 03/79] xfrm: Fix state threshold configuration from userspace

Allow to change the replay threshold (XFRMA_REPLAY_THRESH) and expiry
timer (XFRMA_ETIMER_THRESH) of a state without having to set other
attributes like replay counter and byte lifetime. Changing these other
values while traffic flows will break the state.

Signed-off-by: Michael Rossberg <michael.rossberg@tu-ilmenau.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a8de9e300200..24e06a2377f6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1928,8 +1928,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
 	struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
 	struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
+	struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
+	struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
 
-	if (!lt && !rp && !re)
+	if (!lt && !rp && !re && !et && !rt)
 		return err;
 
 	/* pedantic mode - thou shalt sayeth replaceth */

From 6ece90f9a13e2592cbd6634f74bcb306169b5ab6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 29 Sep 2015 21:10:05 +0200
Subject: [PATCH 04/79] netfilter: fix Kconfig dependencies for nf_dup_ipv{4,6}

net/built-in.o: In function `nf_dup_ipv4': (.text+0xed24d): undefined reference to `nf_conntrack_untracked'
net/built-in.o: In function `nf_dup_ipv4': (.text+0xed267): undefined reference to `nf_conntrack_untracked'
net/built-in.o: In function `nf_dup_ipv6': (.text+0x158aef): undefined reference to `nf_conntrack_untracked'
net/built-in.o: In function `nf_dup_ipv6': (.text+0x158b09): undefined reference to `nf_conntrack_untracked'

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/Kconfig | 1 +
 net/ipv6/netfilter/Kconfig | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 690d27d3f2f9..a35584176535 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -75,6 +75,7 @@ endif # NF_TABLES
 
 config NF_DUP_IPV4
 	tristate "Netfilter IPv4 packet duplication to alternate destination"
+	depends on !NF_CONNTRACK || NF_CONNTRACK
 	help
 	  This option enables the nf_dup_ipv4 core, which duplicates an IPv4
 	  packet to be rerouted to another destination.
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 96833e4b3193..f6a024e141e5 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -58,6 +58,7 @@ endif # NF_TABLES
 
 config NF_DUP_IPV6
 	tristate "Netfilter IPv6 packet duplication to alternate destination"
+	depends on !NF_CONNTRACK || NF_CONNTRACK
 	help
 	  This option enables the nf_dup_ipv6 core, which duplicates an IPv6
 	  packet to be rerouted to another destination.

From cc4998febd567d1c671684abce5595344bd4e8b2 Mon Sep 17 00:00:00 2001
From: lucien <lucien.xin@gmail.com>
Date: Tue, 6 Oct 2015 21:03:07 +0800
Subject: [PATCH 05/79] netfilter: ipt_rpfilter: remove the nh_scope test in
 rpfilter_lookup_reverse

--accept-local  option works for res.type == RTN_LOCAL, which should be
from the local table, but there, the fib_info's nh->nh_scope =
RT_SCOPE_NOWHERE ( > RT_SCOPE_HOST). in fib_create_info().

	if (cfg->fc_scope == RT_SCOPE_HOST) {
		struct fib_nh *nh = fi->fib_nh;

		/* Local address is added. */
		if (nhs != 1 || nh->nh_gw)
			goto err_inval;
		nh->nh_scope = RT_SCOPE_NOWHERE;   <===
		nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
		err = -ENODEV;
		if (!nh->nh_dev)
			goto failure;

but in our rpfilter_lookup_reverse():

	if (dev_match || flags & XT_RPFILTER_LOOSE)
		return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;

if nh->nh_scope > RT_SCOPE_HOST, it will fail. --accept-local option
will never be passed.

it seems the test is bogus and can be removed to fix this issue.

	if (dev_match || flags & XT_RPFILTER_LOOSE)
		return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;

ipv6 does not have this issue.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_rpfilter.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 8618fd150c96..c4ffc9de1654 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -61,9 +61,7 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
 	if (FIB_RES_DEV(res) == dev)
 		dev_match = true;
 #endif
-	if (dev_match || flags & XT_RPFILTER_LOOSE)
-		return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
-	return dev_match;
+	return dev_match || flags & XT_RPFILTER_LOOSE;
 }
 
 static bool rpfilter_is_local(const struct sk_buff *skb)

From 514ed62ed3f6846325d9bfb15cb5c3540547f13b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 8 Oct 2015 23:38:07 +0200
Subject: [PATCH 06/79] netfilter: sync with packet rx also after removing
 queue entries

We need to sync packet rx again after flushing the queue entries.
Otherwise, the following race could happen:

cpu1: nf_unregister_hook(H) called, H unliked from lists, calls
synchronize_net() to wait for packet rx completion.

Problem is that while no new nf_queue_entry structs that use H can be
allocated, another CPU might receive a verdict from userspace just before
cpu1 calls nf_queue_nf_hook_drop to remove this entry:

cpu2: receive verdict from userspace, lock queue
cpu2: unlink nf_queue_entry struct E, which references H, from queue list
cpu1: calls nf_queue_nf_hook_drop, blocks on queue spinlock
cpu2: unlock queue
cpu1: nf_queue_nf_hook_drop drops affected queue entries
cpu2: call nf_reinject for E
cpu1: kfree(H)
cpu2: potential use-after-free for H

Cc: Eric W. Biederman <ebiederm@xmission.com>
Fixes: 085db2c04557 ("netfilter: Per network namespace netfilter hooks.")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 8e47f8113495..21a085686dc1 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -152,6 +152,8 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 #endif
 	synchronize_net();
 	nf_queue_nf_hook_drop(net, &entry->ops);
+	/* other cpu might still process nfqueue verdict that used reg */
+	synchronize_net();
 	kfree(entry);
 }
 EXPORT_SYMBOL(nf_unregister_net_hook);

From 00db674bedd68ff8b5afae9030ff5e04d45d1b4a Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <kernel@kyup.com>
Date: Fri, 16 Oct 2015 09:40:28 +0300
Subject: [PATCH 07/79] netfilter: ipset: Fix sleeping memory allocation in
 atomic context

Commit 00590fdd5be0 introduced RCU locking in list type and in
doing so introduced a memory allocation in list_set_add, which
is done in an atomic context, due to the fact that ipset rcu
list modifications are serialised with a spin lock. The reason
why we can't use a mutex is that in addition to modifying the
list with ipset commands, it's also being modified when a
particular ipset rule timeout expires aka garbage collection.
This gc is triggered from set_cleanup_entries, which in turn
is invoked from a timer thus requiring the lock to be bh-safe.

Concretely the following call chain can lead to "sleeping function
called in atomic context" splat:
call_ad -> list_set_uadt -> list_set_uadd -> kzalloc(, GFP_KERNEL).
And since GFP_KERNEL allows initiating direct reclaim thus
potentially sleeping in the allocation path.

To fix the issue change the allocation type to GFP_ATOMIC, to
correctly reflect that it is occuring in an atomic context.

Fixes: 00590fdd5be0 ("netfilter: ipset: Introduce RCU locking in list type")
Signed-off-by: Nikolay Borisov <kernel@kyup.com>
Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_list_set.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index a1fe5377a2b3..5a30ce6e8c90 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -297,7 +297,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	      ip_set_timeout_expired(ext_timeout(n, set))))
 		n =  NULL;
 
-	e = kzalloc(set->dsize, GFP_KERNEL);
+	e = kzalloc(set->dsize, GFP_ATOMIC);
 	if (!e)
 		return -ENOMEM;
 	e->id = d->id;

From ca064bd89363a6e7e71b1c5226ff1b718957a9d4 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 19 Oct 2015 10:30:05 +0200
Subject: [PATCH 08/79] xfrm: Fix pmtu discovery for local generated packets.

Commit 044a832a777 ("xfrm: Fix local error reporting crash
with interfamily tunnels") moved the setting of skb->protocol
behind the last access of the inner mode family to fix an
interfamily crash. Unfortunately now skb->protocol might not
be set at all, so we fail dispatch to the inner address family.
As a reault, the local error handler is not called and the
mtu value is not reported back to userspace.

We fix this by setting skb->protocol on message size errors
before we call xfrm_local_error.

Fixes: 044a832a7779c ("xfrm: Fix local error reporting crash with interfamily tunnels")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_output.c | 2 ++
 net/ipv6/xfrm6_output.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 2878dbfffeb7..41a261355662 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,6 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
 
 	mtu = dst_mtu(skb_dst(skb));
 	if (skb->len > mtu) {
+		skb->protocol = htons(ETH_P_IP);
+
 		if (skb->sk)
 			xfrm_local_error(skb, mtu);
 		else
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index be033f22a3f3..e15feb7b413d 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -79,6 +79,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 
 	if (!skb->ignore_df && skb->len > mtu) {
 		skb->dev = dst->dev;
+		skb->protocol = htons(ETH_P_IPV6);
 
 		if (xfrm6_local_dontfrag(skb))
 			xfrm6_local_rxpmtu(skb, mtu);

From aebd99477f03950fb05b29c7248656167198bf2d Mon Sep 17 00:00:00 2001
From: Simon Arlott <simon@fire.lp0.eu>
Date: Thu, 15 Oct 2015 21:00:22 +0100
Subject: [PATCH 09/79] bcm63xx_enet: check 1000BASE-T advertisement
 configuration

If a gigabit ethernet PHY is connected to a fast ethernet MAC,
then it can detect 1000 support from the partner but not use it.

This results in a forced speed of 1000 and RX/TX failure.

Check for 1000BASE-T support and then check the advertisement
configuration before setting the MAC speed to 1000mbit.

Signed-off-by: Simon Arlott <simon@fire.lp0.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcm63xx_enet.c | 31 ++++++++++++--------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index a7f2cc3e485e..4183c2abeeeb 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -2049,7 +2049,7 @@ static void swphy_poll_timer(unsigned long data)
 
 	for (i = 0; i < priv->num_ports; i++) {
 		struct bcm63xx_enetsw_port *port;
-		int val, j, up, advertise, lpa, lpa2, speed, duplex, media;
+		int val, j, up, advertise, lpa, speed, duplex, media;
 		int external_phy = bcm_enet_port_is_rgmii(i);
 		u8 override;
 
@@ -2092,22 +2092,27 @@ static void swphy_poll_timer(unsigned long data)
 		lpa = bcmenet_sw_mdio_read(priv, external_phy, port->phy_id,
 					   MII_LPA);
 
-		lpa2 = bcmenet_sw_mdio_read(priv, external_phy, port->phy_id,
-					    MII_STAT1000);
-
 		/* figure out media and duplex from advertise and LPA values */
 		media = mii_nway_result(lpa & advertise);
 		duplex = (media & ADVERTISE_FULL) ? 1 : 0;
-		if (lpa2 & LPA_1000FULL)
-			duplex = 1;
 
-		if (lpa2 & (LPA_1000FULL | LPA_1000HALF))
-			speed = 1000;
-		else {
-			if (media & (ADVERTISE_100FULL | ADVERTISE_100HALF))
-				speed = 100;
-			else
-				speed = 10;
+		if (media & (ADVERTISE_100FULL | ADVERTISE_100HALF))
+			speed = 100;
+		else
+			speed = 10;
+
+		if (val & BMSR_ESTATEN) {
+			advertise = bcmenet_sw_mdio_read(priv, external_phy,
+						port->phy_id, MII_CTRL1000);
+
+			lpa = bcmenet_sw_mdio_read(priv, external_phy,
+						port->phy_id, MII_STAT1000);
+
+			if (advertise & (ADVERTISE_1000FULL | ADVERTISE_1000HALF)
+					&& lpa & (LPA_1000FULL | LPA_1000HALF)) {
+				speed = 1000;
+				duplex = (lpa & LPA_1000FULL);
+			}
 		}
 
 		dev_info(&priv->pdev->dev,

From 9e42f715264ff158478fa30eaed847f6e131366b Mon Sep 17 00:00:00 2001
From: Heiko Schocher <hs@denx.de>
Date: Sat, 17 Oct 2015 06:04:35 +0200
Subject: [PATCH 10/79] drivers: net: cpsw: add phy-handle parsing

add the ability to parse "phy-handle". This
is needed for phys, which have a DT node, and
need to parse DT properties.

Signed-off-by: Heiko Schocher <hs@denx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/cpsw.txt |  1 +
 drivers/net/ethernet/ti/cpsw.c                 | 15 +++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index a9df21aaa154..a2cae4eb4a60 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -39,6 +39,7 @@ Required properties:
 Optional properties:
 - dual_emac_res_vlan	: Specifies VID to be used to segregate the ports
 - mac-address		: See ethernet.txt file in the same directory
+- phy-handle		: See ethernet.txt file in the same directory
 
 Note: "ti,hwmods" field is used to fetch the base address and irq
 resources from TI, omap hwmod data base during device registration.
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 8fc90f1c872c..874fb297e96c 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
+#include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include <linux/of_device.h>
 #include <linux/if_vlan.h>
@@ -365,6 +366,7 @@ struct cpsw_priv {
 	spinlock_t			lock;
 	struct platform_device		*pdev;
 	struct net_device		*ndev;
+	struct device_node		*phy_node;
 	struct napi_struct		napi_rx;
 	struct napi_struct		napi_tx;
 	struct device			*dev;
@@ -1145,7 +1147,11 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
 				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
-	slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
+	if (priv->phy_node)
+		slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
+				 &cpsw_adjust_link, 0, slave->data->phy_if);
+	else
+		slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
 				 &cpsw_adjust_link, slave->data->phy_if);
 	if (IS_ERR(slave->phy)) {
 		dev_err(priv->dev, "phy %s not found on slave %d\n",
@@ -1934,11 +1940,12 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
 	slave->port_vlan = data->dual_emac_res_vlan;
 }
 
-static int cpsw_probe_dt(struct cpsw_platform_data *data,
+static int cpsw_probe_dt(struct cpsw_priv *priv,
 			 struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
 	struct device_node *slave_node;
+	struct cpsw_platform_data *data = &priv->data;
 	int i = 0, ret;
 	u32 prop;
 
@@ -2029,6 +2036,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		if (strcmp(slave_node->name, "slave"))
 			continue;
 
+		priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
 		parp = of_get_property(slave_node, "phy_id", &lenp);
 		if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) {
 			dev_err(&pdev->dev, "Missing slave[%d] phy_id property\n", i);
@@ -2044,7 +2052,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		}
 		snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
 			 PHY_ID_FMT, mdio->name, phyid);
-
 		slave_data->phy_if = of_get_phy_mode(slave_node);
 		if (slave_data->phy_if < 0) {
 			dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n",
@@ -2240,7 +2247,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	/* Select default pin state */
 	pinctrl_pm_select_default_state(&pdev->dev);
 
-	if (cpsw_probe_dt(&priv->data, pdev)) {
+	if (cpsw_probe_dt(priv, pdev)) {
 		dev_err(&pdev->dev, "cpsw: platform data missing\n");
 		ret = -ENODEV;
 		goto clean_runtime_disable_ret;

From d88ecb373bd1877acc43e13311a8e0e6daffc3d2 Mon Sep 17 00:00:00 2001
From: Heiko Schocher <hs@denx.de>
Date: Sat, 17 Oct 2015 06:04:36 +0200
Subject: [PATCH 11/79] net: phy: smsc: disable energy detect mode

On some boards the energy enable detect mode leads in
trouble with some switches, so make the enabling of
this mode configurable through DT.

Signed-off-by: Heiko Schocher <hs@denx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/smsc-lan87xx.txt  | 24 +++++++++++++++++++
 drivers/net/phy/smsc.c                        | 19 +++++++++++----
 2 files changed, 38 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/smsc-lan87xx.txt

diff --git a/Documentation/devicetree/bindings/net/smsc-lan87xx.txt b/Documentation/devicetree/bindings/net/smsc-lan87xx.txt
new file mode 100644
index 000000000000..974edd5c85cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/smsc-lan87xx.txt
@@ -0,0 +1,24 @@
+SMSC LAN87xx Ethernet PHY
+
+Some boards require special tuning values. Configure them
+through an Ethernet OF device node.
+
+Optional properties:
+
+- smsc,disable-energy-detect:
+  If set, do not enable energy detect mode for the SMSC phy.
+  default: enable energy detect mode
+
+Examples:
+smsc phy with disabled energy detect mode on an am335x based board.
+&davinci_mdio {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&davinci_mdio_default>;
+	pinctrl-1 = <&davinci_mdio_sleep>;
+	status = "okay";
+
+	ethernetphy0: ethernet-phy@0 {
+		reg = <0>;
+		smsc,disable-energy-detect;
+	};
+};
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 70b08958763a..dc2da8770918 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -43,16 +43,25 @@ static int smsc_phy_ack_interrupt(struct phy_device *phydev)
 
 static int smsc_phy_config_init(struct phy_device *phydev)
 {
+	int __maybe_unused len;
+	struct device *dev __maybe_unused = &phydev->dev;
+	struct device_node *of_node __maybe_unused = dev->of_node;
 	int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
+	int enable_energy = 1;
 
 	if (rc < 0)
 		return rc;
 
-	/* Enable energy detect mode for this SMSC Transceivers */
-	rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS,
-		       rc | MII_LAN83C185_EDPWRDOWN);
-	if (rc < 0)
-		return rc;
+	if (of_find_property(of_node, "smsc,disable-energy-detect", &len))
+		enable_energy = 0;
+
+	if (enable_energy) {
+		/* Enable energy detect mode for this SMSC Transceivers */
+		rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS,
+			       rc | MII_LAN83C185_EDPWRDOWN);
+		if (rc < 0)
+			return rc;
+	}
 
 	return smsc_phy_ack_interrupt(phydev);
 }

From 91986fd3d335a2ea651bc85cf5a03f2f61a2aa34 Mon Sep 17 00:00:00 2001
From: Philipp Kirchhofer <philipp@familie-kirchhofer.de>
Date: Sun, 18 Oct 2015 16:02:43 +0200
Subject: [PATCH 12/79] net: mv643xx_eth: Ensure proper data alignment in TSO
 TX path

The TX DMA engine requires that buffers with a size of 8 bytes or smaller
must be 64 bit aligned. This requirement may be violated when doing TSO,
as in this case larger skb frags can be broken up and transmitted in small
parts with then inappropriate alignment.

Fix this by checking for proper alignment before handing a buffer to the
DMA engine. If the data is misaligned realign it by copying it into the
TSO header data area.

Signed-off-by: Philipp Kirchhofer <philipp@familie-kirchhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 960169efe636..c05fb794d1ff 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -759,11 +759,23 @@ txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
 
 	desc->l4i_chk = 0;
 	desc->byte_cnt = length;
-	desc->buf_ptr = dma_map_single(dev->dev.parent, data,
-				       length, DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(dev->dev.parent, desc->buf_ptr))) {
-		WARN(1, "dma_map_single failed!\n");
-		return -ENOMEM;
+
+	if (length <= 8 && (uintptr_t)data & 0x7) {
+		/* Copy unaligned small data fragment to TSO header data area */
+		memcpy(txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE,
+		       data, length);
+		desc->buf_ptr = txq->tso_hdrs_dma
+			+ txq->tx_curr_desc * TSO_HEADER_SIZE;
+	} else {
+		/* Alignment is okay, map buffer and hand off to hardware */
+		txq->tx_desc_mapping[tx_index] = DESC_DMA_MAP_SINGLE;
+		desc->buf_ptr = dma_map_single(dev->dev.parent, data,
+			length, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(dev->dev.parent,
+					       desc->buf_ptr))) {
+			WARN(1, "dma_map_single failed!\n");
+			return -ENOMEM;
+		}
 	}
 
 	cmd_sts = BUFFER_OWNED_BY_DMA;

From 968200f322daccc6f08fee26fe1bb4232e460408 Mon Sep 17 00:00:00 2001
From: Philipp Kirchhofer <philipp@familie-kirchhofer.de>
Date: Sun, 18 Oct 2015 16:02:44 +0200
Subject: [PATCH 13/79] net: mv643xx_eth: Defer writing the first TX descriptor
 when using TSO

To prevent a race between the TX DMA engine and the CPU the writing of the
first transmit descriptor must be deferred until all following descriptors
have been updated. The network card may otherwise start transmitting before
all packet descriptors are set up correctly, which leads to data corruption
or an aborted transmit operation.

This deferral is already done in the non-TSO TX path, implement it also in
the TSO TX path.

Signed-off-by: Philipp Kirchhofer <philipp@familie-kirchhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 26 +++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index c05fb794d1ff..e893a35143c5 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -791,7 +791,8 @@ txq_put_data_tso(struct net_device *dev, struct tx_queue *txq,
 }
 
 static inline void
-txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
+txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length,
+		u32 *first_cmd_sts, bool first_desc)
 {
 	struct mv643xx_eth_private *mp = txq_to_mp(txq);
 	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -800,6 +801,7 @@ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
 	int ret;
 	u32 cmd_csum = 0;
 	u16 l4i_chk = 0;
+	u32 cmd_sts;
 
 	tx_index = txq->tx_curr_desc;
 	desc = &txq->tx_desc_area[tx_index];
@@ -815,9 +817,17 @@ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length)
 	desc->byte_cnt = hdr_len;
 	desc->buf_ptr = txq->tso_hdrs_dma +
 			txq->tx_curr_desc * TSO_HEADER_SIZE;
-	desc->cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA  | TX_FIRST_DESC |
+	cmd_sts = cmd_csum | BUFFER_OWNED_BY_DMA  | TX_FIRST_DESC |
 				   GEN_CRC;
 
+	/* Defer updating the first command descriptor until all
+	 * following descriptors have been written.
+	 */
+	if (first_desc)
+		*first_cmd_sts = cmd_sts;
+	else
+		desc->cmd_sts = cmd_sts;
+
 	txq->tx_curr_desc++;
 	if (txq->tx_curr_desc == txq->tx_ring_size)
 		txq->tx_curr_desc = 0;
@@ -831,6 +841,8 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
 	int desc_count = 0;
 	struct tso_t tso;
 	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	struct tx_desc *first_tx_desc;
+	u32 first_cmd_sts = 0;
 
 	/* Count needed descriptors */
 	if ((txq->tx_desc_count + tso_count_descs(skb)) >= txq->tx_ring_size) {
@@ -838,11 +850,14 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
 		return -EBUSY;
 	}
 
+	first_tx_desc = &txq->tx_desc_area[txq->tx_curr_desc];
+
 	/* Initialize the TSO handler, and prepare the first payload */
 	tso_start(skb, &tso);
 
 	total_len = skb->len - hdr_len;
 	while (total_len > 0) {
+		bool first_desc = (desc_count == 0);
 		char *hdr;
 
 		data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
@@ -852,7 +867,8 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
 		/* prepare packet headers: MAC + IP + TCP */
 		hdr = txq->tso_hdrs + txq->tx_curr_desc * TSO_HEADER_SIZE;
 		tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
-		txq_put_hdr_tso(skb, txq, data_left);
+		txq_put_hdr_tso(skb, txq, data_left, &first_cmd_sts,
+				first_desc);
 
 		while (data_left > 0) {
 			int size;
@@ -872,6 +888,10 @@ static int txq_submit_tso(struct tx_queue *txq, struct sk_buff *skb,
 	__skb_queue_tail(&txq->tx_skb, skb);
 	skb_tx_timestamp(skb);
 
+	/* ensure all other descriptors are written before first cmd_sts */
+	wmb();
+	first_tx_desc->cmd_sts = first_cmd_sts;
+
 	/* clear TX_END status */
 	mp->work_tx_end &= ~(1 << txq->index);
 

From f6a835bb04ca34b6fd337eea0544a871ba2349b8 Mon Sep 17 00:00:00 2001
From: Gao feng <omarapazanadi@gmail.com>
Date: Sun, 18 Oct 2015 23:35:56 +0800
Subject: [PATCH 14/79] vsock: fix missing cleanup when misc_register failed

reset transport and unlock if misc_register failed.

Signed-off-by: Gao feng <omarapazanadi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index df5fc6b340f1..00e8a349aabc 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1948,13 +1948,13 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
 	err = misc_register(&vsock_device);
 	if (err) {
 		pr_err("Failed to register misc device\n");
-		return -ENOENT;
+		goto err_reset_transport;
 	}
 
 	err = proto_register(&vsock_proto, 1);	/* we want our slab */
 	if (err) {
 		pr_err("Cannot register vsock protocol\n");
-		goto err_misc_deregister;
+		goto err_deregister_misc;
 	}
 
 	err = sock_register(&vsock_family_ops);
@@ -1969,8 +1969,9 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
 
 err_unregister_proto:
 	proto_unregister(&vsock_proto);
-err_misc_deregister:
+err_deregister_misc:
 	misc_deregister(&vsock_device);
+err_reset_transport:
 	transport = NULL;
 err_busy:
 	mutex_unlock(&vsock_register_mutex);

From ca88ea1247dfee094e2467a3578eaec9bdf0833a Mon Sep 17 00:00:00 2001
From: Joe Jin <joe.jin@oracle.com>
Date: Mon, 19 Oct 2015 13:37:17 +0800
Subject: [PATCH 15/79] xen-netfront: update num_queues to real created

Sometimes xennet_create_queues() may failed to created all requested
queues, we need to update num_queues to real created to avoid NULL
pointer dereference.

Signed-off-by: Joe Jin <joe.jin@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David S. Miller <davem@davemloft.net>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netfront.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index f821a97d7827..6febc053a37f 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1706,19 +1706,19 @@ static void xennet_destroy_queues(struct netfront_info *info)
 }
 
 static int xennet_create_queues(struct netfront_info *info,
-				unsigned int num_queues)
+				unsigned int *num_queues)
 {
 	unsigned int i;
 	int ret;
 
-	info->queues = kcalloc(num_queues, sizeof(struct netfront_queue),
+	info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
 			       GFP_KERNEL);
 	if (!info->queues)
 		return -ENOMEM;
 
 	rtnl_lock();
 
-	for (i = 0; i < num_queues; i++) {
+	for (i = 0; i < *num_queues; i++) {
 		struct netfront_queue *queue = &info->queues[i];
 
 		queue->id = i;
@@ -1728,7 +1728,7 @@ static int xennet_create_queues(struct netfront_info *info,
 		if (ret < 0) {
 			dev_warn(&info->netdev->dev,
 				 "only created %d queues\n", i);
-			num_queues = i;
+			*num_queues = i;
 			break;
 		}
 
@@ -1738,11 +1738,11 @@ static int xennet_create_queues(struct netfront_info *info,
 			napi_enable(&queue->napi);
 	}
 
-	netif_set_real_num_tx_queues(info->netdev, num_queues);
+	netif_set_real_num_tx_queues(info->netdev, *num_queues);
 
 	rtnl_unlock();
 
-	if (num_queues == 0) {
+	if (*num_queues == 0) {
 		dev_err(&info->netdev->dev, "no queues\n");
 		return -EINVAL;
 	}
@@ -1788,7 +1788,7 @@ static int talk_to_netback(struct xenbus_device *dev,
 	if (info->queues)
 		xennet_destroy_queues(info);
 
-	err = xennet_create_queues(info, num_queues);
+	err = xennet_create_queues(info, &num_queues);
 	if (err < 0)
 		goto destroy_ring;
 

From 50010c20597d14667eff0fdb628309986f195230 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 19 Oct 2015 13:16:49 +0300
Subject: [PATCH 16/79] irda: precedence bug in irlmp_seq_hb_idx()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is decrementing the pointer, instead of the value stored in the
pointer.  KASan detects it as an out of bounds reference.

Reported-by: "Berry Cheng 程君(成淼)" <chengmiao.cj@alibaba-inc.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index a26c401ef4a4..43964594aa12 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1839,7 +1839,7 @@ static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off)
 	for (element = hashbin_get_first(iter->hashbin);
 	     element != NULL;
 	     element = hashbin_get_next(iter->hashbin)) {
-		if (!off || *off-- == 0) {
+		if (!off || (*off)-- == 0) {
 			/* NB: hashbin left locked */
 			return element;
 		}

From 53387c4e22ac33d27a552b3d56bad932bd32531b Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 19 Oct 2015 09:21:37 -0400
Subject: [PATCH 17/79] tipc: extend broadcast link window size

The default fix broadcast window size is currently set to 20 packets.
This is a very low value, set at a time when we were still testing on
10 Mb/s hubs, and a change to it is long overdue.

Commit 7845989cb4b3da1db ("net: tipc: fix stall during bclink wakeup procedure")
revealed a problem with this low value. For messages of importance LOW,
the backlog queue limit will be  calculated to 30 packets, while a
single, maximum sized message of 66000 bytes, carried across a 1500 MTU
network consists of 46 packets.

This leads to the following scenario (among others leading to the same
situation):

1: Msg 1 of 46 packets is sent. 20 packets go to the transmit queue, 26
   packets to the backlog queue.
2: Msg 2 of 46 packets is attempted sent, but rejected because there is
   no more space in the backlog queue at this level. The sender is added
   to the wakeup queue with a "pending packets chain size" number of 46.
3: Some packets in the transmit queue are acked and released. We try to
   wake up the sender, but the pending size of 46 is bigger than the LOW
   wakeup limit of 30, so this doesn't happen.
5: Subsequent acks releases all the remaining buffers. Each time we test
   for the wakeup criteria and find that 46 still is larger than 30,
   even after both the transmit and the backlog queues are empty.
6: The sender is never woken up and given a chance to send its message.
   He is stuck.

We could now loosen the wakeup criteria (used by link_prepare_wakeup())
to become equal to the send criteria (used by tipc_link_xmit()), i.e.,
by ignoring the "pending packets chain size" value altogether, or we can
just increase the queue limits so that the criteria can be satisfied
anyway. There are good reasons (potentially multiple waiting senders) to
not opt for the former solution, so we choose the latter one.

This commit fixes the problem by giving the broadcast link window a
default value of 50 packets. We also introduce a new minimum link
window size BCLINK_MIN_WIN of 32, which is enough to always avoid the
described situation. Finally, in order to not break any existing users
which may set the window explicitly, we enforce that the window is set
to the new minimum value in case the user is trying to set it to
anything lower.

Fixes: 7845989cb4b3da1db ("net: tipc: fix stall during bclink wakeup procedure")
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 41042de3ae9b..eadba62afa85 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -42,7 +42,8 @@
 #include "core.h"
 
 #define	MAX_PKT_DEFAULT_MCAST	1500	/* bcast link max packet size (fixed) */
-#define	BCLINK_WIN_DEFAULT	20	/* bcast link window size (default) */
+#define	BCLINK_WIN_DEFAULT	50	/* bcast link window size (default) */
+#define	BCLINK_WIN_MIN	        32	/* bcast minimum link window size */
 
 const char tipc_bclink_name[] = "broadcast-link";
 
@@ -908,9 +909,10 @@ int tipc_bclink_set_queue_limits(struct net *net, u32 limit)
 
 	if (!bcl)
 		return -ENOPROTOOPT;
-	if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN))
+	if (limit < BCLINK_WIN_MIN)
+		limit = BCLINK_WIN_MIN;
+	if (limit > TIPC_MAX_LINK_WIN)
 		return -EINVAL;
-
 	tipc_bclink_lock(net);
 	tipc_link_set_queue_limits(bcl, limit);
 	tipc_bclink_unlock(net);

From f1900fb5eca2cf9b96837e4931165003918d7d29 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 19 Oct 2015 08:26:05 -0700
Subject: [PATCH 18/79] net: Really fix vti6 with oif in dst lookups

6e28b000825d ("net: Fix vti use case with oif in dst lookups for IPv6")
is missing the checks on FLOWI_FLAG_SKIP_NH_OIF. Add them.

Fixes: 42a7b32b73d6 ("xfrm: Add oif to dst lookups")
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 3 ++-
 net/ipv6/route.c      | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 61d403ee1031..d03d6da772f3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -877,7 +877,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 #ifdef CONFIG_IPV6_SUBTREES
 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 #endif
-	    (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
+	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 		dst_release(dst);
 		dst = NULL;
 	}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 968f31c01f89..be5d2879b5d8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1068,6 +1068,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 	saved_fn = fn;
 
+	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+		oif = 0;
+
 redo_rt6_select:
 	rt = rt6_select(fn, oif, strict);
 	if (rt->rt6i_nsiblings)

From 1241365f1aeb24ef0ffe82970f7c558022ddc85f Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Mon, 19 Oct 2015 16:31:55 +0100
Subject: [PATCH 19/79] openvswitch: Allocate memory for ovs internal device
 stats.

"openvswitch: Remove vport stats" removed the per-vport statistics, in
order to use the netdev's statistics fields.
"openvswitch: Fix ovs_vport_get_stats()" fixed the export of these stats
to user-space, by using the provided netdev_ops to collate them - but ovs
internal devices still use an unallocated dev->tstats field to count
packets, which are no longer exported by this api.

Allocate the dev->tstats field for ovs internal devices, and wire up
ndo_get_stats64 with the original implementation of
ovs_vport_get_stats().

On its own, "openvswitch: Fix ovs_vport_get_stats()" fixes the OOPs,
unmasking a full-on panic on arm64:

=============%<==============
[<ffffffbffc00ce4c>] internal_dev_recv+0xa8/0x170 [openvswitch]
[<ffffffbffc0008b4>] do_output.isra.31+0x60/0x19c [openvswitch]
[<ffffffbffc000bf8>] do_execute_actions+0x208/0x11c0 [openvswitch]
[<ffffffbffc001c78>] ovs_execute_actions+0xc8/0x238 [openvswitch]
[<ffffffbffc003dfc>] ovs_packet_cmd_execute+0x21c/0x288 [openvswitch]
[<ffffffc0005e8c5c>] genl_family_rcv_msg+0x1b0/0x310
[<ffffffc0005e8e60>] genl_rcv_msg+0xa4/0xe4
[<ffffffc0005e7ddc>] netlink_rcv_skb+0xb0/0xdc
[<ffffffc0005e8a94>] genl_rcv+0x38/0x50
[<ffffffc0005e76c0>] netlink_unicast+0x164/0x210
[<ffffffc0005e7b70>] netlink_sendmsg+0x304/0x368
[<ffffffc0005a21c0>] sock_sendmsg+0x30/0x4c
[SNIP]
Kernel panic - not syncing: Fatal exception in interrupt
=============%<==============

Fixes: 8c876639c985 ("openvswitch: Remove vport stats.")
Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport-internal_dev.c | 46 ++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 388b8a6bf112..b3934126daa8 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -106,12 +106,45 @@ static void internal_dev_destructor(struct net_device *dev)
 	free_netdev(dev);
 }
 
+static struct rtnl_link_stats64 *
+internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	int i;
+
+	memset(stats, 0, sizeof(*stats));
+	stats->rx_errors  = dev->stats.rx_errors;
+	stats->tx_errors  = dev->stats.tx_errors;
+	stats->tx_dropped = dev->stats.tx_dropped;
+	stats->rx_dropped = dev->stats.rx_dropped;
+
+	for_each_possible_cpu(i) {
+		const struct pcpu_sw_netstats *percpu_stats;
+		struct pcpu_sw_netstats local_stats;
+		unsigned int start;
+
+		percpu_stats = per_cpu_ptr(dev->tstats, i);
+
+		do {
+			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
+			local_stats = *percpu_stats;
+		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
+
+		stats->rx_bytes         += local_stats.rx_bytes;
+		stats->rx_packets       += local_stats.rx_packets;
+		stats->tx_bytes         += local_stats.tx_bytes;
+		stats->tx_packets       += local_stats.tx_packets;
+	}
+
+	return stats;
+}
+
 static const struct net_device_ops internal_dev_netdev_ops = {
 	.ndo_open = internal_dev_open,
 	.ndo_stop = internal_dev_stop,
 	.ndo_start_xmit = internal_dev_xmit,
 	.ndo_set_mac_address = eth_mac_addr,
 	.ndo_change_mtu = internal_dev_change_mtu,
+	.ndo_get_stats64 = internal_get_stats,
 };
 
 static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -161,6 +194,11 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
 		err = -ENOMEM;
 		goto error_free_vport;
 	}
+	vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!vport->dev->tstats) {
+		err = -ENOMEM;
+		goto error_free_netdev;
+	}
 
 	dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
 	internal_dev = internal_dev_priv(vport->dev);
@@ -173,7 +211,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
 	rtnl_lock();
 	err = register_netdevice(vport->dev);
 	if (err)
-		goto error_free_netdev;
+		goto error_unlock;
 
 	dev_set_promiscuity(vport->dev, 1);
 	rtnl_unlock();
@@ -181,8 +219,10 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
 
 	return vport;
 
-error_free_netdev:
+error_unlock:
 	rtnl_unlock();
+	free_percpu(vport->dev->tstats);
+error_free_netdev:
 	free_netdev(vport->dev);
 error_free_vport:
 	ovs_vport_free(vport);
@@ -198,7 +238,7 @@ static void internal_dev_destroy(struct vport *vport)
 
 	/* unregister_netdevice() waits for an RCU grace period. */
 	unregister_netdevice(vport->dev);
-
+	free_percpu(vport->dev->tstats);
 	rtnl_unlock();
 }
 

From 45c8b7b175ceb2d542e0fe15247377bf3bce29ec Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 19 Oct 2015 11:33:00 -0400
Subject: [PATCH 20/79] tipc: allow non-linear first fragment buffer

The current code for message reassembly is erroneously assuming that
the the first arriving fragment buffer always is linear, and then goes
ahead resetting the fragment list of that buffer in anticipation of
more arriving fragments.

However, if the buffer already happens to be non-linear, we will
inadvertently drop the already attached fragment list, and later
on trig a BUG() in __pskb_pull_tail().

We see this happen when running fragmented TIPC multicast across UDP,
something made possible since
commit d0f91938bede ("tipc: add ip/udp media type")

We fix this by not resetting the fragment list when the buffer is non-
linear, and by initiatlizing our private fragment list tail pointer to
the tail of the existing fragment list.

Fixes: commit d0f91938bede ("tipc: add ip/udp media type")
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index c5ac436235e0..5f73450159df 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -121,7 +121,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
 {
 	struct sk_buff *head = *headbuf;
 	struct sk_buff *frag = *buf;
-	struct sk_buff *tail;
+	struct sk_buff *tail = NULL;
 	struct tipc_msg *msg;
 	u32 fragid;
 	int delta;
@@ -141,9 +141,15 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
 		if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
 			goto err;
 		head = *headbuf = frag;
-		skb_frag_list_init(head);
-		TIPC_SKB_CB(head)->tail = NULL;
 		*buf = NULL;
+		TIPC_SKB_CB(head)->tail = NULL;
+		if (skb_is_nonlinear(head)) {
+			skb_walk_frags(head, tail) {
+				TIPC_SKB_CB(head)->tail = tail;
+			}
+		} else {
+			skb_frag_list_init(head);
+		}
 		return 0;
 	}
 

From 7a4264a9250fc6d555ff305aa5e5168723805bb9 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Mon, 19 Oct 2015 17:37:13 +0200
Subject: [PATCH 21/79] net: cavium: change NET_VENDOR_CAVIUM to bool

CONFIG_NET_VENDOR_CAVIUM is only used to hide/show config options and to
include subdirectories in the build, so it doesn't make sense to make it
tristate.

Signed-off-by: Andreas Schwab <schwab@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index 9b35d142f47a..8fb84e69c30e 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig
@@ -3,7 +3,7 @@
 #
 
 config NET_VENDOR_CAVIUM
-	tristate "Cavium ethernet drivers"
+	bool "Cavium ethernet drivers"
 	depends on PCI
 	default y
 	---help---

From e53567948f82368247b4b1a63fcab4c76ef7d51c Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 19 Oct 2015 11:43:11 -0400
Subject: [PATCH 22/79] tipc: conditionally expand buffer headroom over udp
 tunnel

In commit d999297c3dbbe ("tipc: reduce locking scope during packet reception")
we altered the packet retransmission function. Since then, when
restransmitting packets, we create a clone of the original buffer
using __pskb_copy(skb, MIN_H_SIZE), where MIN_H_SIZE is the size of
the area we want to have copied, but also the smallest possible TIPC
packet size. The value of MIN_H_SIZE is 24.

Unfortunately, __pskb_copy() also has the effect that the headroom
of the cloned buffer takes the size MIN_H_SIZE. This is too small
for carrying the packet over the UDP tunnel bearer, which requires
a minimum headroom of 28 bytes. A change to just use pskb_copy()
lets the clone inherit the original headroom of 80 bytes, but also
assumes that the copied data area is of at least that size, something
that is not always the case. So that is not a viable solution.

We now fix this by adding a check for sufficient headroom in the
transmit function of udp_media.c, and expanding it when necessary.

Fixes: commit d999297c3dbbe ("tipc: reduce locking scope during packet reception")
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/udp_media.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index c170d3138953..6e648d90297a 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -52,6 +52,8 @@
 /* IANA assigned UDP port */
 #define UDP_PORT_DEFAULT	6118
 
+#define UDP_MIN_HEADROOM        28
+
 static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
 	[TIPC_NLA_UDP_UNSPEC]	= {.type = NLA_UNSPEC},
 	[TIPC_NLA_UDP_LOCAL]	= {.type = NLA_BINARY,
@@ -156,6 +158,9 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 	struct sk_buff *clone;
 	struct rtable *rt;
 
+	if (skb_headroom(skb) < UDP_MIN_HEADROOM)
+		pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+
 	clone = skb_clone(skb, GFP_ATOMIC);
 	skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
 	ub = rcu_dereference_rtnl(b->media_ptr);

From e2e8009ff72ad2a795b67785f3238af152146368 Mon Sep 17 00:00:00 2001
From: Renato Westphal <renatowestphal@gmail.com>
Date: Mon, 19 Oct 2015 18:51:34 -0200
Subject: [PATCH 23/79] tcp: remove improper preemption check in
 tcp_xmit_probe_skb()

Commit e520af48c7e5a introduced the following bug when setting the
TCP_REPAIR sockoption:

[ 2860.657036] BUG: using __this_cpu_add() in preemptible [00000000] code: daemon/12164
[ 2860.657045] caller is __this_cpu_preempt_check+0x13/0x20
[ 2860.657049] CPU: 1 PID: 12164 Comm: daemon Not tainted 4.2.3 #1
[ 2860.657051] Hardware name: Dell Inc. PowerEdge R210 II/0JP7TR, BIOS 2.0.5 03/13/2012
[ 2860.657054]  ffffffff81c7f071 ffff880231e9fdf8 ffffffff8185d765 0000000000000002
[ 2860.657058]  0000000000000001 ffff880231e9fe28 ffffffff8146ed91 ffff880231e9fe18
[ 2860.657062]  ffffffff81cd1a5d ffff88023534f200 ffff8800b9811000 ffff880231e9fe38
[ 2860.657065] Call Trace:
[ 2860.657072]  [<ffffffff8185d765>] dump_stack+0x4f/0x7b
[ 2860.657075]  [<ffffffff8146ed91>] check_preemption_disabled+0xe1/0xf0
[ 2860.657078]  [<ffffffff8146edd3>] __this_cpu_preempt_check+0x13/0x20
[ 2860.657082]  [<ffffffff817e0bc7>] tcp_xmit_probe_skb+0xc7/0x100
[ 2860.657085]  [<ffffffff817e1e2d>] tcp_send_window_probe+0x2d/0x30
[ 2860.657089]  [<ffffffff817d1d8c>] do_tcp_setsockopt.isra.29+0x74c/0x830
[ 2860.657093]  [<ffffffff817d1e9c>] tcp_setsockopt+0x2c/0x30
[ 2860.657097]  [<ffffffff81767b74>] sock_common_setsockopt+0x14/0x20
[ 2860.657100]  [<ffffffff817669e1>] SyS_setsockopt+0x71/0xc0
[ 2860.657104]  [<ffffffff81865172>] entry_SYSCALL_64_fastpath+0x16/0x75

Since tcp_xmit_probe_skb() can be called from process context, use
NET_INC_STATS() instead of NET_INC_STATS_BH().

Fixes: e520af48c7e5 ("tcp: add TCPWinProbe and TCPKeepAlive SNMP counters")
Signed-off-by: Renato Westphal <renatow@taghos.com.br>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1100ffe4a722..3dbee0d83b15 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3405,7 +3405,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
 	 */
 	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
 	skb_mstamp_get(&skb->skb_mstamp);
-	NET_INC_STATS_BH(sock_net(sk), mib);
+	NET_INC_STATS(sock_net(sk), mib);
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
 

From 9e384715e9e702704c6941c575f0e6b322132a3a Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Mon, 19 Oct 2015 19:18:57 -0700
Subject: [PATCH 24/79] openvswitch: Reject ct_state masks for unknown bits

Currently, 0-bits are generated in ct_state where the bit position is
undefined, and matches are accepted on these bit-positions. If userspace
requests to match the 0-value for this bit then it may expect only a
subset of traffic to match this value, whereas currently all packets
will have this bit set to 0. Fix this by rejecting such masks.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.h    | 16 +++++-----------
 net/openvswitch/flow_netlink.c |  5 ++++-
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index da8714942c95..82e0dfc66028 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -35,12 +35,9 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
 int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
 void ovs_ct_free_action(const struct nlattr *a);
 
-static inline bool ovs_ct_state_supported(u32 state)
-{
-	return !(state & ~(OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED |
-			 OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR |
-			 OVS_CS_F_INVALID | OVS_CS_F_TRACKED));
-}
+#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
+			   OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \
+			   OVS_CS_F_INVALID | OVS_CS_F_TRACKED)
 #else
 #include <linux/errno.h>
 
@@ -53,11 +50,6 @@ static inline bool ovs_ct_verify(struct net *net, int attr)
 	return false;
 }
 
-static inline bool ovs_ct_state_supported(u32 state)
-{
-	return false;
-}
-
 static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla,
 				     const struct sw_flow_key *key,
 				     struct sw_flow_actions **acts, bool log)
@@ -94,5 +86,7 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key,
 }
 
 static inline void ovs_ct_free_action(const struct nlattr *a) { }
+
+#define CT_SUPPORTED_MASK 0
 #endif /* CONFIG_NF_CONNTRACK */
 #endif /* ovs_conntrack.h */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 171a691f1c32..bd710bc37469 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -816,7 +816,7 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
 	    ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
 		u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
 
-		if (!is_mask && !ovs_ct_state_supported(ct_state)) {
+		if (ct_state & ~CT_SUPPORTED_MASK) {
 			OVS_NLERR(log, "ct_state flags %08x unsupported",
 				  ct_state);
 			return -EINVAL;
@@ -1099,6 +1099,9 @@ static void nlattr_set(struct nlattr *attr, u8 val,
 		} else {
 			memset(nla_data(nla), val, nla_len(nla));
 		}
+
+		if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
+			*(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
 	}
 }
 

From 1d008a1df927846788b9dc02e770f65951f98ddc Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Mon, 19 Oct 2015 19:18:58 -0700
Subject: [PATCH 25/79] openvswitch: Clarify conntrack COMMIT behaviour

The presence of this attribute does not modify the ct_state for the
current packet, only future packets. Make this more clear in the header
definition.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 036f73bc54cd..e663627a8ef3 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -620,7 +620,8 @@ struct ovs_action_hash {
  * enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action.
  * @OVS_CT_ATTR_COMMIT: If present, commits the connection to the conntrack
  * table. This allows future packets for the same connection to be identified
- * as 'established' or 'related'.
+ * as 'established' or 'related'. The flow key for the current packet will
+ * retain the pre-commit connection state.
  * @OVS_CT_ATTR_ZONE: u16 connection tracking zone.
  * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
  * mask, the corresponding bit in the value is copied to the connection

From 4f0909ee3d8e3514a274121f3bf217a4920fa12d Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Mon, 19 Oct 2015 19:18:59 -0700
Subject: [PATCH 26/79] openvswitch: Mark connections new when not confirmed.

New, related connections are marked as such as part of ovs_ct_lookup(),
but they are not marked as "new" if the commit flag is used. Make this
consistent by setting the "new" flag whenever !nf_ct_is_confirmed(ct).

Reported-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 80bf702715bb..8ad121871834 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -151,6 +151,8 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
 	ct = nf_ct_get(skb, &ctinfo);
 	if (ct) {
 		state = ovs_ct_get_state(ctinfo);
+		if (!nf_ct_is_confirmed(ct))
+			state |= OVS_CS_F_NEW;
 		if (ct->master)
 			state |= OVS_CS_F_RELATED;
 		zone = nf_ct_zone(ct);
@@ -377,7 +379,7 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
 	return true;
 }
 
-static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
+static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 			   const struct ovs_conntrack_info *info,
 			   struct sk_buff *skb)
 {
@@ -408,6 +410,8 @@ static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
 		}
 	}
 
+	ovs_ct_update_key(skb, key, true);
+
 	return 0;
 }
 
@@ -430,8 +434,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 		err = __ovs_ct_lookup(net, key, info, skb);
 		if (err)
 			return err;
-
-		ovs_ct_update_key(skb, key, true);
 	}
 
 	return 0;
@@ -460,8 +462,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 	if (nf_conntrack_confirm(skb) != NF_ACCEPT)
 		return -EINVAL;
 
-	ovs_ct_update_key(skb, key, true);
-
 	return 0;
 }
 

From e754ec69ab69601420eb2ed58ece803d77c93f61 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Mon, 19 Oct 2015 19:19:00 -0700
Subject: [PATCH 27/79] openvswitch: Serialize nested ct actions if provided

If userspace provides a ct action with no nested mark or label, then the
storage for these fields is zeroed. Later when actions are requested,
such zeroed fields are serialized even though userspace didn't
originally specify them. Fix the behaviour by ensuring that no action is
serialized in this case, and reject actions where userspace attempts to
set these fields with mask=0. This should make netlink marshalling
consistent across deserialization/reserialization.

Reported-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 8ad121871834..a5ec34f8502f 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -224,9 +224,6 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
 	struct nf_conn *ct;
 	int err;
 
-	if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
-		return -ENOTSUPP;
-
 	/* The connection could be invalid, in which case set_label is no-op.*/
 	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct)
@@ -587,6 +584,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
 		case OVS_CT_ATTR_MARK: {
 			struct md_mark *mark = nla_data(a);
 
+			if (!mark->mask) {
+				OVS_NLERR(log, "ct_mark mask cannot be 0");
+				return -EINVAL;
+			}
 			info->mark = *mark;
 			break;
 		}
@@ -595,6 +596,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
 		case OVS_CT_ATTR_LABELS: {
 			struct md_labels *labels = nla_data(a);
 
+			if (!labels_nonzero(&labels->mask)) {
+				OVS_NLERR(log, "ct_labels mask cannot be 0");
+				return -EINVAL;
+			}
 			info->labels = *labels;
 			break;
 		}
@@ -705,11 +710,12 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
 	if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
 	    nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
 		return -EMSGSIZE;
-	if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+	if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
 	    nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
 		    &ct_info->mark))
 		return -EMSGSIZE;
 	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+	    labels_nonzero(&ct_info->labels.mask) &&
 	    nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
 		    &ct_info->labels))
 		return -EMSGSIZE;

From 104eb270e665f4fcd8cb8c8ab4c4d4538c604e92 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 20 Oct 2015 10:42:24 +0200
Subject: [PATCH 28/79] net: sun4i-emac: Properly free resources on probe
 failure and remove

Fix sun4i-emac not releasing the following resources:
-iomapped memory not released on probe-failure nor on remove
-clock not getting disabled on probe-failure nor on remove
-sram not being released on remove

And while at it also add error checking to the clk_prepare_enable call
done on probe.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/allwinner/sun4i-emac.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 48ce83e443c2..8d50314ac3eb 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -847,21 +847,25 @@ static int emac_probe(struct platform_device *pdev)
 	if (ndev->irq == -ENXIO) {
 		netdev_err(ndev, "No irq resource\n");
 		ret = ndev->irq;
-		goto out;
+		goto out_iounmap;
 	}
 
 	db->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(db->clk)) {
 		ret = PTR_ERR(db->clk);
-		goto out;
+		goto out_iounmap;
 	}
 
-	clk_prepare_enable(db->clk);
+	ret = clk_prepare_enable(db->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Error couldn't enable clock (%d)\n", ret);
+		goto out_iounmap;
+	}
 
 	ret = sunxi_sram_claim(&pdev->dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Error couldn't map SRAM to device\n");
-		goto out;
+		goto out_clk_disable_unprepare;
 	}
 
 	db->phy_node = of_parse_phandle(np, "phy", 0);
@@ -910,6 +914,10 @@ static int emac_probe(struct platform_device *pdev)
 
 out_release_sram:
 	sunxi_sram_release(&pdev->dev);
+out_clk_disable_unprepare:
+	clk_disable_unprepare(db->clk);
+out_iounmap:
+	iounmap(db->membase);
 out:
 	dev_err(db->dev, "not found (%d).\n", ret);
 
@@ -921,8 +929,12 @@ out:
 static int emac_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct emac_board_info *db = netdev_priv(ndev);
 
 	unregister_netdev(ndev);
+	sunxi_sram_release(&pdev->dev);
+	clk_disable_unprepare(db->clk);
+	iounmap(db->membase);
 	free_netdev(ndev);
 
 	dev_dbg(&pdev->dev, "released and freed device\n");

From 34e45ad9378c31ef2b59e8bd63d62f0ca8e719a3 Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Tue, 20 Oct 2015 16:28:57 -0500
Subject: [PATCH 29/79] net: phy: dp83848: Add TI DP83848 Ethernet PHY

Add support for the TI DP83848 Ethernet PHY device.

The DP83848 is a highly reliable, feature rich, IEEE 802.3 compliant
single port 10/100 Mb/s Ethernet Physical Layer Transceiver supporting
the MII and RMII interfaces.

Signed-off-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Dan Murphy <dmurphy@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Dan Murphy <dmurphy@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig   |  5 ++
 drivers/net/phy/Makefile  |  1 +
 drivers/net/phy/dp83848.c | 99 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+)
 create mode 100644 drivers/net/phy/dp83848.c

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 11e3975485c1..436972b2a746 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -122,6 +122,11 @@ config MICREL_PHY
 	---help---
 	  Supports the KSZ9021, VSC8201, KS8001 PHYs.
 
+config DP83848_PHY
+	tristate "Driver for Texas Instruments DP83848 PHY"
+	---help---
+	  Supports the DP83848 PHY.
+
 config DP83867_PHY
 	tristate "Drivers for Texas Instruments DP83867 Gigabit PHY"
 	---help---
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 87f079c4b2c7..b74822463930 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_MDIO_BITBANG)	+= mdio-bitbang.o
 obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
 obj-$(CONFIG_NATIONAL_PHY)	+= national.o
 obj-$(CONFIG_DP83640_PHY)	+= dp83640.o
+obj-$(CONFIG_DP83848_PHY)	+= dp83848.o
 obj-$(CONFIG_DP83867_PHY)	+= dp83867.o
 obj-$(CONFIG_STE10XP)		+= ste10Xp.o
 obj-$(CONFIG_MICREL_PHY)	+= micrel.o
diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c
new file mode 100644
index 000000000000..5ce9bef54468
--- /dev/null
+++ b/drivers/net/phy/dp83848.c
@@ -0,0 +1,99 @@
+/*
+ * Driver for the Texas Instruments DP83848 PHY
+ *
+ * Copyright (C) 2015 Texas Instruments Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/phy.h>
+
+#define DP83848_PHY_ID			0x20005c90
+
+/* Registers */
+#define DP83848_MICR			0x11
+#define DP83848_MISR			0x12
+
+/* MICR Register Fields */
+#define DP83848_MICR_INT_OE		BIT(0) /* Interrupt Output Enable */
+#define DP83848_MICR_INTEN		BIT(1) /* Interrupt Enable */
+
+/* MISR Register Fields */
+#define DP83848_MISR_RHF_INT_EN		BIT(0) /* Receive Error Counter */
+#define DP83848_MISR_FHF_INT_EN		BIT(1) /* False Carrier Counter */
+#define DP83848_MISR_ANC_INT_EN		BIT(2) /* Auto-negotiation complete */
+#define DP83848_MISR_DUP_INT_EN		BIT(3) /* Duplex Status */
+#define DP83848_MISR_SPD_INT_EN		BIT(4) /* Speed status */
+#define DP83848_MISR_LINK_INT_EN	BIT(5) /* Link status */
+#define DP83848_MISR_ED_INT_EN		BIT(6) /* Energy detect */
+#define DP83848_MISR_LQM_INT_EN		BIT(7) /* Link Quality Monitor */
+
+static int dp83848_ack_interrupt(struct phy_device *phydev)
+{
+	int err = phy_read(phydev, DP83848_MISR);
+
+	return err < 0 ? err : 0;
+}
+
+static int dp83848_config_intr(struct phy_device *phydev)
+{
+	int err;
+
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+		err = phy_write(phydev, DP83848_MICR,
+				DP83848_MICR_INT_OE |
+				DP83848_MICR_INTEN);
+		if (err < 0)
+			return err;
+
+		return phy_write(phydev, DP83848_MISR,
+				 DP83848_MISR_ANC_INT_EN |
+				 DP83848_MISR_DUP_INT_EN |
+				 DP83848_MISR_SPD_INT_EN |
+				 DP83848_MISR_LINK_INT_EN);
+	}
+
+	return phy_write(phydev, DP83848_MICR, 0x0);
+}
+
+static struct mdio_device_id __maybe_unused dp83848_tbl[] = {
+	{ DP83848_PHY_ID, 0xfffffff0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(mdio, dp83848_tbl);
+
+static struct phy_driver dp83848_driver[] = {
+	{
+		.phy_id		= DP83848_PHY_ID,
+		.phy_id_mask	= 0xfffffff0,
+		.name		= "TI DP83848",
+		.features	= PHY_BASIC_FEATURES,
+		.flags		= PHY_HAS_INTERRUPT,
+
+		.soft_reset	= genphy_soft_reset,
+		.config_init	= genphy_config_init,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+		.config_aneg	= genphy_config_aneg,
+		.read_status	= genphy_read_status,
+
+		/* IRQ related */
+		.ack_interrupt	= dp83848_ack_interrupt,
+		.config_intr	= dp83848_config_intr,
+
+		.driver		= { .owner = THIS_MODULE, },
+	},
+};
+module_phy_driver(dp83848_driver);
+
+MODULE_DESCRIPTION("Texas Instruments DP83848 PHY driver");
+MODULE_AUTHOR("Andrew F. Davis <afd@ti.com");
+MODULE_LICENSE("GPL");

From 47191d65b647af5eb5c82ede70ed4c24b1e93ef4 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Wed, 21 Oct 2015 11:47:43 +0200
Subject: [PATCH 30/79] netlink: fix locking around NETLINK_LIST_MEMBERSHIPS

Currently, NETLINK_LIST_MEMBERSHIPS grabs the netlink table while copying
the membership state to user-space. However, grabing the netlink table is
effectively a write_lock_irq(), and as such we should not be triggering
page-faults in the critical section.

This can be easily reproduced by the following snippet:
    int s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
    void *p = mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
    int r = getsockopt(s, 0x10e, 9, p, (void*)((char*)p + 4092));

This should work just fine, but currently triggers EFAULT and a possible
WARN_ON below handle_mm_fault().

Fix this by reducing locking of NETLINK_LIST_MEMBERSHIPS to a read-side
lock. The write-lock was overkill in the first place, and the read-lock
allows page-faults just fine.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0a49a8c7c564..fafe33bdb619 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2371,7 +2371,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
 		int pos, idx, shift;
 
 		err = 0;
-		netlink_table_grab();
+		netlink_lock_table();
 		for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
 			if (len - pos < sizeof(u32))
 				break;
@@ -2386,7 +2386,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
 		}
 		if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
 			err = -EFAULT;
-		netlink_table_ungrab();
+		netlink_unlock_table();
 		break;
 	}
 	case NETLINK_CAP_ACK:

From 4ef7ea9195ea73262cd9730fb54e1eb726da157b Mon Sep 17 00:00:00 2001
From: Jorgen Hansen <jhansen@vmware.com>
Date: Wed, 21 Oct 2015 04:53:56 -0700
Subject: [PATCH 31/79] VSOCK: sock_put wasn't safe to call in interrupt
 context

In the vsock vmci_transport driver, sock_put wasn't safe to call
in interrupt context, since that may call the vsock destructor
which in turn calls several functions that should only be called
from process context. This change defers the callling of these
functions  to a worker thread. All these functions were
deallocation of resources related to the transport itself.

Furthermore, an unused callback was removed to simplify the
cleanup.

Multiple customers have been hitting this issue when using
VMware tools on vSphere 2015.

Also added a version to the vmci transport module (starting from
1.0.2.0-k since up until now it appears that this module was
sharing version with vsock that is currently at 1.0.1.0-k).

Reviewed-by: Aditya Asarwade <asarwade@vmware.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/vmci_transport.c | 177 ++++++++++++++++-----------------
 net/vmw_vsock/vmci_transport.h |   4 +-
 2 files changed, 88 insertions(+), 93 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 1f63daff3965..5243ce2b2c18 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -40,13 +40,11 @@
 
 static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
 static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
-static void vmci_transport_peer_attach_cb(u32 sub_id,
-					  const struct vmci_event_data *ed,
-					  void *client_data);
 static void vmci_transport_peer_detach_cb(u32 sub_id,
 					  const struct vmci_event_data *ed,
 					  void *client_data);
 static void vmci_transport_recv_pkt_work(struct work_struct *work);
+static void vmci_transport_cleanup(struct work_struct *work);
 static int vmci_transport_recv_listen(struct sock *sk,
 				      struct vmci_transport_packet *pkt);
 static int vmci_transport_recv_connecting_server(
@@ -75,6 +73,10 @@ struct vmci_transport_recv_pkt_info {
 	struct vmci_transport_packet pkt;
 };
 
+static LIST_HEAD(vmci_transport_cleanup_list);
+static DEFINE_SPINLOCK(vmci_transport_cleanup_lock);
+static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup);
+
 static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
 							   VMCI_INVALID_ID };
 static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
@@ -791,44 +793,6 @@ out:
 	return err;
 }
 
-static void vmci_transport_peer_attach_cb(u32 sub_id,
-					  const struct vmci_event_data *e_data,
-					  void *client_data)
-{
-	struct sock *sk = client_data;
-	const struct vmci_event_payload_qp *e_payload;
-	struct vsock_sock *vsk;
-
-	e_payload = vmci_event_data_const_payload(e_data);
-
-	vsk = vsock_sk(sk);
-
-	/* We don't ask for delayed CBs when we subscribe to this event (we
-	 * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
-	 * guarantees in that case about what context we might be running in,
-	 * so it could be BH or process, blockable or non-blockable.  So we
-	 * need to account for all possible contexts here.
-	 */
-	local_bh_disable();
-	bh_lock_sock(sk);
-
-	/* XXX This is lame, we should provide a way to lookup sockets by
-	 * qp_handle.
-	 */
-	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
-				 e_payload->handle)) {
-		/* XXX This doesn't do anything, but in the future we may want
-		 * to set a flag here to verify the attach really did occur and
-		 * we weren't just sent a datagram claiming it was.
-		 */
-		goto out;
-	}
-
-out:
-	bh_unlock_sock(sk);
-	local_bh_enable();
-}
-
 static void vmci_transport_handle_detach(struct sock *sk)
 {
 	struct vsock_sock *vsk;
@@ -871,28 +835,38 @@ static void vmci_transport_peer_detach_cb(u32 sub_id,
 					  const struct vmci_event_data *e_data,
 					  void *client_data)
 {
-	struct sock *sk = client_data;
+	struct vmci_transport *trans = client_data;
 	const struct vmci_event_payload_qp *e_payload;
-	struct vsock_sock *vsk;
 
 	e_payload = vmci_event_data_const_payload(e_data);
-	vsk = vsock_sk(sk);
-	if (vmci_handle_is_invalid(e_payload->handle))
-		return;
-
-	/* Same rules for locking as for peer_attach_cb(). */
-	local_bh_disable();
-	bh_lock_sock(sk);
 
 	/* XXX This is lame, we should provide a way to lookup sockets by
 	 * qp_handle.
 	 */
-	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
-				 e_payload->handle))
-		vmci_transport_handle_detach(sk);
+	if (vmci_handle_is_invalid(e_payload->handle) ||
+	    vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
+		return;
 
-	bh_unlock_sock(sk);
-	local_bh_enable();
+	/* We don't ask for delayed CBs when we subscribe to this event (we
+	 * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
+	 * guarantees in that case about what context we might be running in,
+	 * so it could be BH or process, blockable or non-blockable.  So we
+	 * need to account for all possible contexts here.
+	 */
+	spin_lock_bh(&trans->lock);
+	if (!trans->sk)
+		goto out;
+
+	/* Apart from here, trans->lock is only grabbed as part of sk destruct,
+	 * where trans->sk isn't locked.
+	 */
+	bh_lock_sock(trans->sk);
+
+	vmci_transport_handle_detach(trans->sk);
+
+	bh_unlock_sock(trans->sk);
+ out:
+	spin_unlock_bh(&trans->lock);
 }
 
 static void vmci_transport_qp_resumed_cb(u32 sub_id,
@@ -1181,7 +1155,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
 	 */
 	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
 				   vmci_transport_peer_detach_cb,
-				   pending, &detach_sub_id);
+				   vmci_trans(vpending), &detach_sub_id);
 	if (err < VMCI_SUCCESS) {
 		vmci_transport_send_reset(pending, pkt);
 		err = vmci_transport_error_to_vsock_error(err);
@@ -1321,7 +1295,6 @@ vmci_transport_recv_connecting_client(struct sock *sk,
 		    || vmci_trans(vsk)->qpair
 		    || vmci_trans(vsk)->produce_size != 0
 		    || vmci_trans(vsk)->consume_size != 0
-		    || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
 		    || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
 			skerr = EPROTO;
 			err = -EINVAL;
@@ -1389,7 +1362,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
 	struct vsock_sock *vsk;
 	struct vmci_handle handle;
 	struct vmci_qp *qpair;
-	u32 attach_sub_id;
 	u32 detach_sub_id;
 	bool is_local;
 	u32 flags;
@@ -1399,7 +1371,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
 
 	vsk = vsock_sk(sk);
 	handle = VMCI_INVALID_HANDLE;
-	attach_sub_id = VMCI_INVALID_ID;
 	detach_sub_id = VMCI_INVALID_ID;
 
 	/* If we have gotten here then we should be past the point where old
@@ -1444,23 +1415,15 @@ static int vmci_transport_recv_connecting_client_negotiate(
 		goto destroy;
 	}
 
-	/* Subscribe to attach and detach events first.
+	/* Subscribe to detach events first.
 	 *
 	 * XXX We attach once for each queue pair created for now so it is easy
 	 * to find the socket (it's provided), but later we should only
 	 * subscribe once and add a way to lookup sockets by queue pair handle.
 	 */
-	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
-				   vmci_transport_peer_attach_cb,
-				   sk, &attach_sub_id);
-	if (err < VMCI_SUCCESS) {
-		err = vmci_transport_error_to_vsock_error(err);
-		goto destroy;
-	}
-
 	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
 				   vmci_transport_peer_detach_cb,
-				   sk, &detach_sub_id);
+				   vmci_trans(vsk), &detach_sub_id);
 	if (err < VMCI_SUCCESS) {
 		err = vmci_transport_error_to_vsock_error(err);
 		goto destroy;
@@ -1496,7 +1459,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
 	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
 		pkt->u.size;
 
-	vmci_trans(vsk)->attach_sub_id = attach_sub_id;
 	vmci_trans(vsk)->detach_sub_id = detach_sub_id;
 
 	vmci_trans(vsk)->notify_ops->process_negotiate(sk);
@@ -1504,9 +1466,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
 	return 0;
 
 destroy:
-	if (attach_sub_id != VMCI_INVALID_ID)
-		vmci_event_unsubscribe(attach_sub_id);
-
 	if (detach_sub_id != VMCI_INVALID_ID)
 		vmci_event_unsubscribe(detach_sub_id);
 
@@ -1607,9 +1566,11 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
 	vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
 	vmci_trans(vsk)->qpair = NULL;
 	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
-	vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id =
-		VMCI_INVALID_ID;
+	vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
 	vmci_trans(vsk)->notify_ops = NULL;
+	INIT_LIST_HEAD(&vmci_trans(vsk)->elem);
+	vmci_trans(vsk)->sk = &vsk->sk;
+	vmci_trans(vsk)->lock = __SPIN_LOCK_UNLOCKED(vmci_trans(vsk)->lock);
 	if (psk) {
 		vmci_trans(vsk)->queue_pair_size =
 			vmci_trans(psk)->queue_pair_size;
@@ -1629,29 +1590,57 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
 	return 0;
 }
 
+static void vmci_transport_free_resources(struct list_head *transport_list)
+{
+	while (!list_empty(transport_list)) {
+		struct vmci_transport *transport =
+		    list_first_entry(transport_list, struct vmci_transport,
+				     elem);
+		list_del(&transport->elem);
+
+		if (transport->detach_sub_id != VMCI_INVALID_ID) {
+			vmci_event_unsubscribe(transport->detach_sub_id);
+			transport->detach_sub_id = VMCI_INVALID_ID;
+		}
+
+		if (!vmci_handle_is_invalid(transport->qp_handle)) {
+			vmci_qpair_detach(&transport->qpair);
+			transport->qp_handle = VMCI_INVALID_HANDLE;
+			transport->produce_size = 0;
+			transport->consume_size = 0;
+		}
+
+		kfree(transport);
+	}
+}
+
+static void vmci_transport_cleanup(struct work_struct *work)
+{
+	LIST_HEAD(pending);
+
+	spin_lock_bh(&vmci_transport_cleanup_lock);
+	list_replace_init(&vmci_transport_cleanup_list, &pending);
+	spin_unlock_bh(&vmci_transport_cleanup_lock);
+	vmci_transport_free_resources(&pending);
+}
+
 static void vmci_transport_destruct(struct vsock_sock *vsk)
 {
-	if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) {
-		vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id);
-		vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID;
-	}
-
-	if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
-		vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id);
-		vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
-	}
-
-	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
-		vmci_qpair_detach(&vmci_trans(vsk)->qpair);
-		vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
-		vmci_trans(vsk)->produce_size = 0;
-		vmci_trans(vsk)->consume_size = 0;
-	}
+	/* Ensure that the detach callback doesn't use the sk/vsk
+	 * we are about to destruct.
+	 */
+	spin_lock_bh(&vmci_trans(vsk)->lock);
+	vmci_trans(vsk)->sk = NULL;
+	spin_unlock_bh(&vmci_trans(vsk)->lock);
 
 	if (vmci_trans(vsk)->notify_ops)
 		vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
 
-	kfree(vsk->trans);
+	spin_lock_bh(&vmci_transport_cleanup_lock);
+	list_add(&vmci_trans(vsk)->elem, &vmci_transport_cleanup_list);
+	spin_unlock_bh(&vmci_transport_cleanup_lock);
+	schedule_work(&vmci_transport_cleanup_work);
+
 	vsk->trans = NULL;
 }
 
@@ -2146,6 +2135,9 @@ module_init(vmci_transport_init);
 
 static void __exit vmci_transport_exit(void)
 {
+	cancel_work_sync(&vmci_transport_cleanup_work);
+	vmci_transport_free_resources(&vmci_transport_cleanup_list);
+
 	if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
 		if (vmci_datagram_destroy_handle(
 			vmci_transport_stream_handle) != VMCI_SUCCESS)
@@ -2164,6 +2156,7 @@ module_exit(vmci_transport_exit);
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
+MODULE_VERSION("1.0.2.0-k");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("vmware_vsock");
 MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index ce6c9623d5f0..2ad46f39649f 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -119,10 +119,12 @@ struct vmci_transport {
 	u64 queue_pair_size;
 	u64 queue_pair_min_size;
 	u64 queue_pair_max_size;
-	u32 attach_sub_id;
 	u32 detach_sub_id;
 	union vmci_transport_notify notify;
 	struct vmci_transport_notify_ops *notify_ops;
+	struct list_head elem;
+	struct sock *sk;
+	spinlock_t lock; /* protects sk. */
 };
 
 int vmci_transport_register(void);

From c7a7c95e8e18a3598c4d0f99c35e69dce591daf1 Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Wed, 21 Oct 2015 14:18:38 +0200
Subject: [PATCH 32/79] ISDN: fix OOM condition for sending queued I-Frames

The skb_clone() return value was not checked and the skb_realloc_headroom()
usage was wrong, the old skb was not freed. It turned out, that the
skb_clone is not needed at all, the skb_realloc_headroom() will create a
private copy with enough headroom and the original SKB can be used for the
ACK queue.
We need to requeue the original skb if the call failed, since the upper
layer cannot be informed about memory shortage.

Thanks to Insu Yun <wuninsu@gmail.com> to remind me on this issue.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hisax/isdnl2.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/isdn/hisax/isdnl2.c b/drivers/isdn/hisax/isdnl2.c
index 18accb0a79cc..c53a53f6efb6 100644
--- a/drivers/isdn/hisax/isdnl2.c
+++ b/drivers/isdn/hisax/isdnl2.c
@@ -1247,7 +1247,7 @@ static void
 l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
 {
 	struct PStack *st = fi->userdata;
-	struct sk_buff *skb;
+	struct sk_buff *skb, *nskb;
 	struct Layer2 *l2 = &st->l2;
 	u_char header[MAX_HEADER_LEN];
 	int i, hdr_space_needed;
@@ -1262,14 +1262,10 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
 		return;
 
 	hdr_space_needed = l2headersize(l2, 0);
-	if (hdr_space_needed > skb_headroom(skb)) {
-		struct sk_buff *orig_skb = skb;
-
-		skb = skb_realloc_headroom(skb, hdr_space_needed);
-		if (!skb) {
-			dev_kfree_skb(orig_skb);
-			return;
-		}
+	nskb = skb_realloc_headroom(skb, hdr_space_needed);
+	if (!nskb) {
+		skb_queue_head(&l2->i_queue, skb);
+		return;
 	}
 	spin_lock_irqsave(&l2->lock, flags);
 	if (test_bit(FLG_MOD128, &l2->flag))
@@ -1282,7 +1278,7 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
 		       p1);
 		dev_kfree_skb(l2->windowar[p1]);
 	}
-	l2->windowar[p1] = skb_clone(skb, GFP_ATOMIC);
+	l2->windowar[p1] = skb;
 
 	i = sethdraddr(&st->l2, header, CMD);
 
@@ -1295,8 +1291,8 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
 		l2->vs = (l2->vs + 1) % 8;
 	}
 	spin_unlock_irqrestore(&l2->lock, flags);
-	memcpy(skb_push(skb, i), header, i);
-	st->l2.l2l1(st, PH_PULL | INDICATION, skb);
+	memcpy(skb_push(nskb, i), header, i);
+	st->l2.l2l1(st, PH_PULL | INDICATION, nskb);
 	test_and_clear_bit(FLG_ACK_PEND, &st->l2.flag);
 	if (!test_and_set_bit(FLG_T200_RUN, &st->l2.flag)) {
 		FsmDelTimer(&st->l2.t203, 13);

From c96356a9baa2e3d628caf52f3b83df1968628b5f Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Wed, 21 Oct 2015 14:18:39 +0200
Subject: [PATCH 33/79] mISDN: fix OOM condition for sending queued I-Frames

The old code did not check the return value of skb_clone().
The extra skb_clone() is not needed at all, if using skb_realloc_headroom()
instead, which gives us a private copy with enough headroom as well.
We need to requeue the original skb if the call failed, because we cannot
inform upper layers about the data loss. Restructure the code to minimise
rollback effort if it happens.
This fix kernel bug #86091

Thanks to Insu Yun <wuninsu@gmail.com> to remind me on this issue.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/layer2.c | 54 ++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 34 deletions(-)

diff --git a/drivers/isdn/mISDN/layer2.c b/drivers/isdn/mISDN/layer2.c
index 949cabb88f1c..5eb380a25903 100644
--- a/drivers/isdn/mISDN/layer2.c
+++ b/drivers/isdn/mISDN/layer2.c
@@ -1476,7 +1476,7 @@ static void
 l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
 {
 	struct layer2	*l2 = fi->userdata;
-	struct sk_buff	*skb, *nskb, *oskb;
+	struct sk_buff	*skb, *nskb;
 	u_char		header[MAX_L2HEADER_LEN];
 	u_int		i, p1;
 
@@ -1486,11 +1486,26 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
 	skb = skb_dequeue(&l2->i_queue);
 	if (!skb)
 		return;
-
-	if (test_bit(FLG_MOD128, &l2->flag))
+	i = sethdraddr(l2, header, CMD);
+	if (test_bit(FLG_MOD128, &l2->flag)) {
+		header[i++] = l2->vs << 1;
+		header[i++] = l2->vr << 1;
+	} else
+		header[i++] = (l2->vr << 5) | (l2->vs << 1);
+	nskb = skb_realloc_headroom(skb, i);
+	if (!nskb) {
+		printk(KERN_WARNING "%s: no headroom(%d) copy for IFrame\n",
+		       mISDNDevName4ch(&l2->ch), i);
+		skb_queue_head(&l2->i_queue, skb);
+		return;
+	}
+	if (test_bit(FLG_MOD128, &l2->flag)) {
 		p1 = (l2->vs - l2->va) % 128;
-	else
+		l2->vs = (l2->vs + 1) % 128;
+	} else {
 		p1 = (l2->vs - l2->va) % 8;
+		l2->vs = (l2->vs + 1) % 8;
+	}
 	p1 = (p1 + l2->sow) % l2->window;
 	if (l2->windowar[p1]) {
 		printk(KERN_WARNING "%s: l2 try overwrite ack queue entry %d\n",
@@ -1498,36 +1513,7 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
 		dev_kfree_skb(l2->windowar[p1]);
 	}
 	l2->windowar[p1] = skb;
-	i = sethdraddr(l2, header, CMD);
-	if (test_bit(FLG_MOD128, &l2->flag)) {
-		header[i++] = l2->vs << 1;
-		header[i++] = l2->vr << 1;
-		l2->vs = (l2->vs + 1) % 128;
-	} else {
-		header[i++] = (l2->vr << 5) | (l2->vs << 1);
-		l2->vs = (l2->vs + 1) % 8;
-	}
-
-	nskb = skb_clone(skb, GFP_ATOMIC);
-	p1 = skb_headroom(nskb);
-	if (p1 >= i)
-		memcpy(skb_push(nskb, i), header, i);
-	else {
-		printk(KERN_WARNING
-		       "%s: L2 pull_iqueue skb header(%d/%d) too short\n",
-		       mISDNDevName4ch(&l2->ch), i, p1);
-		oskb = nskb;
-		nskb = mI_alloc_skb(oskb->len + i, GFP_ATOMIC);
-		if (!nskb) {
-			dev_kfree_skb(oskb);
-			printk(KERN_WARNING "%s: no skb mem in %s\n",
-			       mISDNDevName4ch(&l2->ch), __func__);
-			return;
-		}
-		memcpy(skb_put(nskb, i), header, i);
-		memcpy(skb_put(nskb, oskb->len), oskb->data, oskb->len);
-		dev_kfree_skb(oskb);
-	}
+	memcpy(skb_push(nskb, i), header, i);
 	l2down(l2, PH_DATA_REQ, l2_newid(l2), nskb);
 	test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
 	if (!test_and_set_bit(FLG_T200_RUN, &l2->flag)) {

From d46a9d678e4c9fac1e968d0593e4dba683389324 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Wed, 21 Oct 2015 08:42:22 -0700
Subject: [PATCH 34/79] net: ipv6: Dont add RT6_LOOKUP_F_IFACE flag if saddr
 set

741a11d9e410 ("net: ipv6: Add RT6_LOOKUP_F_IFACE flag if oif is set")
adds the RT6_LOOKUP_F_IFACE flag to make device index mismatch fatal if
oif is given. Hajime reported that this change breaks the Mobile IPv6
use case that wants to force the message through one interface yet use
the source address from another interface. Handle this case by only
adding the flag if oif is set and saddr is not set.

Fixes: 741a11d9e410 ("net: ipv6: Add RT6_LOOKUP_F_IFACE flag if oif is set")
Cc: Hajime Tazaki <thehajime@gmail.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index be5d2879b5d8..946880ad48ac 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1193,14 +1193,16 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
 				    struct flowi6 *fl6)
 {
 	int flags = 0;
+	bool any_src;
 
 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
+	any_src = ipv6_addr_any(&fl6->saddr);
 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
-	    fl6->flowi6_oif)
+	    (fl6->flowi6_oif && any_src))
 		flags |= RT6_LOOKUP_F_IFACE;
 
-	if (!ipv6_addr_any(&fl6->saddr))
+	if (!any_src)
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 	else if (sk)
 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);

From 0db65fcfcded76fe4f74e3ca9f4e2baf67b683ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 22 Oct 2015 14:15:58 +0200
Subject: [PATCH 35/79] qmi_wwan: add Sierra Wireless MC74xx/EM74xx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New device IDs shamelessly lifted from the vendor driver.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 355842b85ee9..2a7c1be23c4f 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -765,6 +765,10 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1199, 0x9056, 8)},	/* Sierra Wireless Modem */
 	{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
 	{QMI_FIXED_INTF(0x1199, 0x9061, 8)},	/* Sierra Wireless Modem */
+	{QMI_FIXED_INTF(0x1199, 0x9070, 8)},	/* Sierra Wireless MC74xx/EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9070, 10)},	/* Sierra Wireless MC74xx/EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx/EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx/EM74xx */
 	{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},	/* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
 	{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},	/* Alcatel L800MA */
 	{QMI_FIXED_INTF(0x2357, 0x0201, 4)},	/* TP-LINK HSUPA Modem MA180 */

From 74a6c66565f5f4eda4aaae53e2e325deeedeaca2 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Fri, 2 Oct 2015 19:09:34 -0700
Subject: [PATCH 36/79] i40e: fix stats offsets

The code was setting up stats that were not being initialized.
This caused several counters to be displayed incorrectly, due
to indexing beyond the array of strings when printing stats.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index e972b5ecbf0b..13a5d4cf494b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1344,6 +1344,12 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 			data[i++] = (i40e_gstrings_veb_stats[j].sizeof_stat ==
 				     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 		}
+		for (j = 0; j < I40E_MAX_TRAFFIC_CLASS; j++) {
+			data[i++] = veb->tc_stats.tc_tx_packets[j];
+			data[i++] = veb->tc_stats.tc_tx_bytes[j];
+			data[i++] = veb->tc_stats.tc_rx_packets[j];
+			data[i++] = veb->tc_stats.tc_rx_bytes[j];
+		}
 	}
 	for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
 		p = (char *)pf + i40e_gstrings_stats[j].stat_offset;

From e9e53662d8130dd950885e37dc1d97008e1283f9 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Fri, 2 Oct 2015 17:57:21 -0700
Subject: [PATCH 37/79] i40e: fix annoying message

The driver was printing a message about not being able
to assign VMDq because of a lack of MSI-X vectors.

This was because a line was missing that initialized a variable,
simply a merge error.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index dd44fafd8798..3dd26cdd0bf2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7911,6 +7911,7 @@ static int i40e_sw_init(struct i40e_pf *pf)
 	if (pf->hw.func_caps.vmdq) {
 		pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
 		pf->flags |= I40E_FLAG_VMDQ_ENABLED;
+		pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
 	}
 
 #ifdef I40E_FCOE

From 8566b86ab9f0f45bc6f7dd422b21de9d0cf5415a Mon Sep 17 00:00:00 2001
From: Jorgen Hansen <jhansen@vmware.com>
Date: Thu, 22 Oct 2015 08:25:25 -0700
Subject: [PATCH 38/79] VSOCK: Fix lockdep issue.

The recent fix for the vsock sock_put issue used the wrong
initializer for the transport spin_lock causing an issue when
running with lockdep checking.

Testing: Verified fix on kernel with lockdep enabled.

Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/vmci_transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 5243ce2b2c18..7555cad83a75 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1570,7 +1570,7 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
 	vmci_trans(vsk)->notify_ops = NULL;
 	INIT_LIST_HEAD(&vmci_trans(vsk)->elem);
 	vmci_trans(vsk)->sk = &vsk->sk;
-	vmci_trans(vsk)->lock = __SPIN_LOCK_UNLOCKED(vmci_trans(vsk)->lock);
+	spin_lock_init(&vmci_trans(vsk)->lock);
 	if (psk) {
 		vmci_trans(vsk)->queue_pair_size =
 			vmci_trans(psk)->queue_pair_size;

From fc4099f17240767554ff3a73977acb78ef615404 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 22 Oct 2015 18:17:16 -0700
Subject: [PATCH 39/79] openvswitch: Fix egress tunnel info.

While transitioning to netdev based vport we broke OVS
feature which allows user to retrieve tunnel packet egress
information for lwtunnel devices.  Following patch fixes it
by introducing ndo operation to get the tunnel egress info.
Same ndo operation can be used for lwtunnel devices and compat
ovs-tnl-vport devices. So after adding such device operation
we can remove similar operation from ovs-vport.

Fixes: 614732eaa12d ("openvswitch: Use regular VXLAN net_device device").
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c           | 40 +++++++++++++++++++----
 drivers/net/vxlan.c            | 41 ++++++++++++++++++++++++
 include/linux/netdevice.h      |  7 ++++
 include/net/dst_metadata.h     | 32 +++++++++++++++++++
 net/core/dev.c                 | 27 ++++++++++++++++
 net/ipv4/ip_gre.c              | 46 +++++++++++++++++++++------
 net/openvswitch/actions.c      |  9 ++----
 net/openvswitch/datapath.c     |  5 ++-
 net/openvswitch/datapath.h     |  1 -
 net/openvswitch/flow_netlink.c | 18 ++++-------
 net/openvswitch/flow_netlink.h |  6 ++--
 net/openvswitch/vport-geneve.c | 13 --------
 net/openvswitch/vport-gre.c    |  8 -----
 net/openvswitch/vport-vxlan.c  | 19 -----------
 net/openvswitch/vport.c        | 58 ----------------------------------
 net/openvswitch/vport.h        | 35 --------------------
 16 files changed, 192 insertions(+), 173 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index cde29f8a37bf..445071c163cb 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -594,14 +594,12 @@ static struct rtable *geneve_get_rt(struct sk_buff *skb,
 	rt = ip_route_output_key(geneve->net, fl4);
 	if (IS_ERR(rt)) {
 		netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
-		dev->stats.tx_carrier_errors++;
-		return rt;
+		return ERR_PTR(-ENETUNREACH);
 	}
 	if (rt->dst.dev == dev) { /* is this necessary? */
 		netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
-		dev->stats.collisions++;
 		ip_rt_put(rt);
-		return ERR_PTR(-EINVAL);
+		return ERR_PTR(-ELOOP);
 	}
 	return rt;
 }
@@ -627,12 +625,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ip_tunnel_info *info = NULL;
 	struct rtable *rt = NULL;
 	const struct iphdr *iip; /* interior IP header */
+	int err = -EINVAL;
 	struct flowi4 fl4;
 	__u8 tos, ttl;
 	__be16 sport;
 	bool udp_csum;
 	__be16 df;
-	int err;
 
 	if (geneve->collect_md) {
 		info = skb_tunnel_info(skb);
@@ -647,7 +645,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 	rt = geneve_get_rt(skb, dev, &fl4, info);
 	if (IS_ERR(rt)) {
 		netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
-		dev->stats.tx_carrier_errors++;
+		err = PTR_ERR(rt);
 		goto tx_error;
 	}
 
@@ -699,10 +697,37 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 tx_error:
 	dev_kfree_skb(skb);
 err:
-	dev->stats.tx_errors++;
+	if (err == -ELOOP)
+		dev->stats.collisions++;
+	else if (err == -ENETUNREACH)
+		dev->stats.tx_carrier_errors++;
+	else
+		dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
+static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	if (ip_tunnel_info_af(info) != AF_INET)
+		return -EINVAL;
+
+	rt = geneve_get_rt(skb, dev, &fl4, info);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+
+	ip_rt_put(rt);
+	info->key.u.ipv4.src = fl4.saddr;
+	info->key.tp_src = udp_flow_src_port(geneve->net, skb,
+					     1, USHRT_MAX, true);
+	info->key.tp_dst = geneve->dst_port;
+	return 0;
+}
+
 static const struct net_device_ops geneve_netdev_ops = {
 	.ndo_init		= geneve_init,
 	.ndo_uninit		= geneve_uninit,
@@ -713,6 +738,7 @@ static const struct net_device_ops geneve_netdev_ops = {
 	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_fill_metadata_dst	= geneve_fill_metadata_dst,
 };
 
 static void geneve_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index afdc65fd5bc5..c1587ece28cf 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2337,6 +2337,46 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
+				struct ip_tunnel_info *info,
+				__be16 sport, __be16 dport)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	memset(&fl4, 0, sizeof(fl4));
+	fl4.flowi4_tos = RT_TOS(info->key.tos);
+	fl4.flowi4_mark = skb->mark;
+	fl4.flowi4_proto = IPPROTO_UDP;
+	fl4.daddr = info->key.u.ipv4.dst;
+
+	rt = ip_route_output_key(vxlan->net, &fl4);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+	ip_rt_put(rt);
+
+	info->key.u.ipv4.src = fl4.saddr;
+	info->key.tp_src = sport;
+	info->key.tp_dst = dport;
+	return 0;
+}
+
+static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	__be16 sport, dport;
+
+	sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+				  vxlan->cfg.port_max, true);
+	dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
+
+	if (ip_tunnel_info_af(info) == AF_INET)
+		return egress_ipv4_tun_info(dev, skb, info, sport, dport);
+	return -EINVAL;
+}
+
 static const struct net_device_ops vxlan_netdev_ops = {
 	.ndo_init		= vxlan_init,
 	.ndo_uninit		= vxlan_uninit,
@@ -2351,6 +2391,7 @@ static const struct net_device_ops vxlan_netdev_ops = {
 	.ndo_fdb_add		= vxlan_fdb_add,
 	.ndo_fdb_del		= vxlan_fdb_delete,
 	.ndo_fdb_dump		= vxlan_fdb_dump,
+	.ndo_fill_metadata_dst	= vxlan_fill_metadata_dst,
 };
 
 /* Info for udev, that this is a virtual tunnel endpoint */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2d15e3831440..210d11a75e4f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1054,6 +1054,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	This function is used to pass protocol port error state information
  *	to the switch driver. The switch driver can react to the proto_down
  *      by doing a phys down on the associated switch port.
+ * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
+ *	This function is used to get egress tunnel information for given skb.
+ *	This is useful for retrieving outer tunnel header parameters while
+ *	sampling packet.
  *
  */
 struct net_device_ops {
@@ -1227,6 +1231,8 @@ struct net_device_ops {
 	int			(*ndo_get_iflink)(const struct net_device *dev);
 	int			(*ndo_change_proto_down)(struct net_device *dev,
 							 bool proto_down);
+	int			(*ndo_fill_metadata_dst)(struct net_device *dev,
+						       struct sk_buff *skb);
 };
 
 /**
@@ -2203,6 +2209,7 @@ void dev_add_offload(struct packet_offload *po);
 void dev_remove_offload(struct packet_offload *po);
 
 int dev_get_iflink(const struct net_device *dev);
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
 				      unsigned short mask);
 struct net_device *dev_get_by_name(struct net *net, const char *name);
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index af9d5382f6cb..ce009710120c 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -60,6 +60,38 @@ static inline struct metadata_dst *tun_rx_dst(int md_size)
 	return tun_dst;
 }
 
+static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
+{
+	struct metadata_dst *md_dst = skb_metadata_dst(skb);
+	int md_size = md_dst->u.tun_info.options_len;
+	struct metadata_dst *new_md;
+
+	if (!md_dst)
+		return ERR_PTR(-EINVAL);
+
+	new_md = metadata_dst_alloc(md_size, GFP_ATOMIC);
+	if (!new_md)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+	       sizeof(struct ip_tunnel_info) + md_size);
+	skb_dst_drop(skb);
+	dst_hold(&new_md->dst);
+	skb_dst_set(skb, &new_md->dst);
+	return new_md;
+}
+
+static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb)
+{
+	struct metadata_dst *dst;
+
+	dst = tun_dst_unclone(skb);
+	if (IS_ERR(dst))
+		return NULL;
+
+	return &dst->u.tun_info;
+}
+
 static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
 						 __be16 flags,
 						 __be64 tunnel_id,
diff --git a/net/core/dev.c b/net/core/dev.c
index 6bb6470f5b7b..c14748d051e7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -99,6 +99,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/stat.h>
 #include <net/dst.h>
+#include <net/dst_metadata.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
@@ -681,6 +682,32 @@ int dev_get_iflink(const struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_get_iflink);
 
+/**
+ *	dev_fill_metadata_dst - Retrieve tunnel egress information.
+ *	@dev: targeted interface
+ *	@skb: The packet.
+ *
+ *	For better visibility of tunnel traffic OVS needs to retrieve
+ *	egress tunnel information for a packet. Following API allows
+ *	user to get this info.
+ */
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ip_tunnel_info *info;
+
+	if (!dev->netdev_ops  || !dev->netdev_ops->ndo_fill_metadata_dst)
+		return -EINVAL;
+
+	info = skb_tunnel_info_unclone(skb);
+	if (!info)
+		return -ENOMEM;
+	if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
+		return -EINVAL;
+
+	return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
+}
+EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+
 /**
  *	__dev_get_by_name	- find a device by its name
  *	@net: the applicable net namespace
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bd0679d90519..614521437e30 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
 
+static struct rtable *gre_get_rt(struct sk_buff *skb,
+				 struct net_device *dev,
+				 struct flowi4 *fl,
+				 const struct ip_tunnel_key *key)
+{
+	struct net *net = dev_net(dev);
+
+	memset(fl, 0, sizeof(*fl));
+	fl->daddr = key->u.ipv4.dst;
+	fl->saddr = key->u.ipv4.src;
+	fl->flowi4_tos = RT_TOS(key->tos);
+	fl->flowi4_mark = skb->mark;
+	fl->flowi4_proto = IPPROTO_GRE;
+
+	return ip_route_output_key(net, fl);
+}
+
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel_info *tun_info;
-	struct net *net = dev_net(dev);
 	const struct ip_tunnel_key *key;
 	struct flowi4 fl;
 	struct rtable *rt;
@@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto err_free_skb;
 
 	key = &tun_info->key;
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = key->u.ipv4.dst;
-	fl.saddr = key->u.ipv4.src;
-	fl.flowi4_tos = RT_TOS(key->tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_GRE;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = gre_get_rt(skb, dev, &fl, key);
 	if (IS_ERR(rt))
 		goto err_free_skb;
 
@@ -566,6 +575,24 @@ err_free_skb:
 	dev->stats.tx_dropped++;
 }
 
+static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	if (ip_tunnel_info_af(info) != AF_INET)
+		return -EINVAL;
+
+	rt = gre_get_rt(skb, dev, &fl4, &info->key);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+
+	ip_rt_put(rt);
+	info->key.u.ipv4.src = fl4.saddr;
+	return 0;
+}
+
 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 			      struct net_device *dev)
 {
@@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 	.ndo_change_mtu		= ip_tunnel_change_mtu,
 	.ndo_get_stats64	= ip_tunnel_get_stats64,
 	.ndo_get_iflink		= ip_tunnel_get_iflink,
+	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
 };
 
 static void ipgre_tap_setup(struct net_device *dev)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c6a39bf2c3b9..0bf0f406de52 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -768,7 +768,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			    struct sw_flow_key *key, const struct nlattr *attr,
 			    const struct nlattr *actions, int actions_len)
 {
-	struct ip_tunnel_info info;
 	struct dp_upcall_info upcall;
 	const struct nlattr *a;
 	int rem;
@@ -796,11 +795,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			if (vport) {
 				int err;
 
-				upcall.egress_tun_info = &info;
-				err = ovs_vport_get_egress_tun_info(vport, skb,
-								    &upcall);
-				if (err)
-					upcall.egress_tun_info = NULL;
+				err = dev_fill_metadata_dst(vport->dev, skb);
+				if (!err)
+					upcall.egress_tun_info = skb_tunnel_info(skb);
 			}
 
 			break;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index b816ff871528..c5d08ee37730 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -490,9 +490,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 
 	if (upcall_info->egress_tun_info) {
 		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
-		err = ovs_nla_put_egress_tunnel_key(user_skb,
-						    upcall_info->egress_tun_info,
-						    upcall_info->egress_tun_opts);
+		err = ovs_nla_put_tunnel_info(user_skb,
+					      upcall_info->egress_tun_info);
 		BUG_ON(err);
 		nla_nest_end(user_skb, nla);
 	}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index f88038a99f44..67bdecd9fdc1 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -117,7 +117,6 @@ struct ovs_skb_cb {
  */
 struct dp_upcall_info {
 	struct ip_tunnel_info *egress_tun_info;
-	const void *egress_tun_opts;
 	const struct nlattr *userdata;
 	const struct nlattr *actions;
 	int actions_len;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index bd710bc37469..38536c137c54 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -717,7 +717,7 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_OAM) &&
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 		return -EMSGSIZE;
-	if (tun_opts) {
+	if (swkey_tun_opts_len) {
 		if (output->tun_flags & TUNNEL_GENEVE_OPT &&
 		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
 			    swkey_tun_opts_len, tun_opts))
@@ -749,13 +749,12 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	return 0;
 }
 
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
-				  const struct ip_tunnel_info *egress_tun_info,
-				  const void *egress_tun_opts)
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+			    struct ip_tunnel_info *tun_info)
 {
-	return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key,
-				    egress_tun_opts,
-				    egress_tun_info->options_len);
+	return __ipv4_tun_to_nlattr(skb, &tun_info->key,
+				    ip_tunnel_info_opts(tun_info),
+				    tun_info->options_len);
 }
 
 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
@@ -2383,10 +2382,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 		if (!start)
 			return -EMSGSIZE;
 
-		err = ipv4_tun_to_nlattr(skb, &tun_info->key,
-					 tun_info->options_len ?
-					     ip_tunnel_info_opts(tun_info) : NULL,
-					 tun_info->options_len);
+		err = ovs_nla_put_tunnel_info(skb, tun_info);
 		if (err)
 			return err;
 		nla_nest_end(skb, start);
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 6ca3f0baf449..47dd142eca1c 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
 int ovs_nla_get_match(struct net *, struct sw_flow_match *,
 		      const struct nlattr *key, const struct nlattr *mask,
 		      bool log);
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
-				  const struct ip_tunnel_info *,
-				  const void *egress_tun_opts);
+
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+			    struct ip_tunnel_info *tun_info);
 
 bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 2735e9c4a3b8..5f8aaaaa0785 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport,
 	return 0;
 }
 
-static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				      struct dp_upcall_info *upcall)
-{
-	struct geneve_port *geneve_port = geneve_vport(vport);
-	struct net *net = ovs_dp_get_net(vport->dp);
-	__be16 dport = htons(geneve_port->port_no);
-	__be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
-
-	return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
-					  skb, IPPROTO_UDP, sport, dport);
-}
-
 static struct vport *geneve_tnl_create(const struct vport_parms *parms)
 {
 	struct net *net = ovs_dp_get_net(parms->dp);
@@ -130,7 +118,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
 	.get_options	= geneve_get_options,
 	.send		= ovs_netdev_send,
 	.owner          = THIS_MODULE,
-	.get_egress_tun_info	= geneve_get_egress_tun_info,
 };
 
 static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 4d24481669c9..64225bf5eb40 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms)
 	return ovs_netdev_link(vport, parms->name);
 }
 
-static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				   struct dp_upcall_info *upcall)
-{
-	return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
-					  skb, IPPROTO_GRE, 0, 0);
-}
-
 static struct vport_ops ovs_gre_vport_ops = {
 	.type		= OVS_VPORT_TYPE_GRE,
 	.create		= gre_create,
 	.send		= ovs_netdev_send,
-	.get_egress_tun_info	= gre_get_egress_tun_info,
 	.destroy	= ovs_netdev_tunnel_destroy,
 	.owner		= THIS_MODULE,
 };
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index c11413d5075f..e1c9c0888037 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -146,31 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
 	return ovs_netdev_link(vport, parms->name);
 }
 
-static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				     struct dp_upcall_info *upcall)
-{
-	struct vxlan_dev *vxlan = netdev_priv(vport->dev);
-	struct net *net = ovs_dp_get_net(vport->dp);
-	__be16 dst_port = vxlan_dev_dst_port(vxlan);
-	__be16 src_port;
-	int port_min;
-	int port_max;
-
-	inet_get_local_port_range(net, &port_min, &port_max);
-	src_port = udp_flow_src_port(net, skb, 0, 0, true);
-
-	return ovs_tunnel_get_egress_info(upcall, net,
-					  skb, IPPROTO_UDP,
-					  src_port, dst_port);
-}
-
 static struct vport_ops ovs_vxlan_netdev_vport_ops = {
 	.type			= OVS_VPORT_TYPE_VXLAN,
 	.create			= vxlan_create,
 	.destroy		= ovs_netdev_tunnel_destroy,
 	.get_options		= vxlan_get_options,
 	.send			= ovs_netdev_send,
-	.get_egress_tun_info	= vxlan_get_egress_tun_info,
 };
 
 static int __init ovs_vxlan_tnl_init(void)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 12a36ac21eda..320c765ce44a 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -479,61 +479,3 @@ void ovs_vport_deferred_free(struct vport *vport)
 	call_rcu(&vport->rcu, free_vport_rcu);
 }
 EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
-
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
-			       struct net *net,
-			       struct sk_buff *skb,
-			       u8 ipproto,
-			       __be16 tp_src,
-			       __be16 tp_dst)
-{
-	struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
-	const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
-	const struct ip_tunnel_key *tun_key;
-	u32 skb_mark = skb->mark;
-	struct rtable *rt;
-	struct flowi4 fl;
-
-	if (unlikely(!tun_info))
-		return -EINVAL;
-	if (ip_tunnel_info_af(tun_info) != AF_INET)
-		return -EINVAL;
-
-	tun_key = &tun_info->key;
-
-	/* Route lookup to get srouce IP address.
-	 * The process may need to be changed if the corresponding process
-	 * in vports ops changed.
-	 */
-	rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
-	if (IS_ERR(rt))
-		return PTR_ERR(rt);
-
-	ip_rt_put(rt);
-
-	/* Generate egress_tun_info based on tun_info,
-	 * saddr, tp_src and tp_dst
-	 */
-	ip_tunnel_key_init(&egress_tun_info->key,
-			   fl.saddr, tun_key->u.ipv4.dst,
-			   tun_key->tos,
-			   tun_key->ttl,
-			   tp_src, tp_dst,
-			   tun_key->tun_id,
-			   tun_key->tun_flags);
-	egress_tun_info->options_len = tun_info->options_len;
-	egress_tun_info->mode = tun_info->mode;
-	upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				  struct dp_upcall_info *upcall)
-{
-	/* get_egress_tun_info() is only implemented on tunnel ports. */
-	if (unlikely(!vport->ops->get_egress_tun_info))
-		return -EINVAL;
-
-	return vport->ops->get_egress_tun_info(vport, skb, upcall);
-}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index a413f3ae6a7b..d341ad6f3afe 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -27,7 +27,6 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/u64_stats_sync.h>
-#include <net/route.h>
 
 #include "datapath.h"
 
@@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
 int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
 u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
 
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
-			       struct net *net,
-			       struct sk_buff *,
-			       u8 ipproto,
-			       __be16 tp_src,
-			       __be16 tp_dst);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				  struct dp_upcall_info *upcall);
-
 /**
  * struct vport_portids - array of netlink portids of a vport.
  *                        must be protected by rcu.
@@ -140,8 +129,6 @@ struct vport_parms {
  * have any configuration.
  * @send: Send a packet on the device.
  * zero for dropped packets or negative for error.
- * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
- * a packet.
  */
 struct vport_ops {
 	enum ovs_vport_type type;
@@ -154,9 +141,6 @@ struct vport_ops {
 	int (*get_options)(const struct vport *, struct sk_buff *);
 
 	void (*send)(struct vport *, struct sk_buff *);
-	int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
-				   struct dp_upcall_info *upcall);
-
 	struct module *owner;
 	struct list_head list;
 };
@@ -215,25 +199,6 @@ static inline const char *ovs_vport_name(struct vport *vport)
 int ovs_vport_ops_register(struct vport_ops *ops);
 void ovs_vport_ops_unregister(struct vport_ops *ops);
 
-static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
-						     const struct ip_tunnel_key *key,
-						     u32 mark,
-						     struct flowi4 *fl,
-						     u8 protocol)
-{
-	struct rtable *rt;
-
-	memset(fl, 0, sizeof(*fl));
-	fl->daddr = key->u.ipv4.dst;
-	fl->saddr = key->u.ipv4.src;
-	fl->flowi4_tos = RT_TOS(key->tos);
-	fl->flowi4_mark = mark;
-	fl->flowi4_proto = protocol;
-
-	rt = ip_route_output_key(net, fl);
-	return rt;
-}
-
 static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
 {
 	vport->ops->send(vport, skb);

From f23d538bc24a83c16127c2eb82c9cf1adc2b5149 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 23 Oct 2015 00:57:05 -0400
Subject: [PATCH 40/79] macvtap: unbreak receiving of gro skb with frag list

We don't have fraglist support in TAP_FEATURES. This will lead
software segmentation of gro skb with frag list. Fixes by having
frag list support in TAP_FEATURES.

With this patch single session of netperf receiving were restored from
about 5Gb/s to about 12Gb/s on mlx4.

Fixes a567dd6252 ("macvtap: simplify usage of tap_features")
Cc: Vlad Yasevich <vyasevic@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 248478c6f6e4..197c93937c2d 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -137,7 +137,7 @@ static const struct proto_ops macvtap_socket_ops;
 #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
 		      NETIF_F_TSO6 | NETIF_F_UFO)
 #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
-#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG)
+#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST)
 
 static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev)
 {

From ab997ad408394bcaf7f3015d4c4e38047eaf2ad6 Mon Sep 17 00:00:00 2001
From: lucien <lucien.xin@gmail.com>
Date: Fri, 23 Oct 2015 15:36:53 +0800
Subject: [PATCH 41/79] ipv6: fix the incorrect return value of throw route

The error condition -EAGAIN, which is signaled by throw routes, tells
the rules framework to walk on searching for next matches. If the walk
ends and we stop walking the rules with the result of a throw route we
have to translate the error conditions to -ENETUNREACH.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 19 +++++++++++++++----
 net/ipv6/ip6_fib.c    | 12 +++++++++++-
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 9f777ec59a59..ed33abf57abd 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -32,6 +32,7 @@ struct fib6_rule {
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags, pol_lookup_t lookup)
 {
+	struct rt6_info *rt;
 	struct fib_lookup_arg arg = {
 		.lookup_ptr = lookup,
 		.flags = FIB_LOOKUP_NOREF,
@@ -40,11 +41,21 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 	fib_rules_lookup(net->ipv6.fib6_rules_ops,
 			 flowi6_to_flowi(fl6), flags, &arg);
 
-	if (arg.result)
-		return arg.result;
+	rt = arg.result;
 
-	dst_hold(&net->ipv6.ip6_null_entry->dst);
-	return &net->ipv6.ip6_null_entry->dst;
+	if (!rt) {
+		dst_hold(&net->ipv6.ip6_null_entry->dst);
+		return &net->ipv6.ip6_null_entry->dst;
+	}
+
+	if (rt->rt6i_flags & RTF_REJECT &&
+	    rt->dst.error == -EAGAIN) {
+		ip6_rt_put(rt);
+		rt = net->ipv6.ip6_null_entry;
+		dst_hold(&rt->dst);
+	}
+
+	return &rt->dst;
 }
 
 static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7d2e0023c72d..6cedc62b2abb 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -285,7 +285,17 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags, pol_lookup_t lookup)
 {
-	return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+	struct rt6_info *rt;
+
+	rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+	if (rt->rt6i_flags & RTF_REJECT &&
+	    rt->dst.error == -EAGAIN) {
+		ip6_rt_put(rt);
+		rt = net->ipv6.ip6_null_entry;
+		dst_hold(&rt->dst);
+	}
+
+	return &rt->dst;
 }
 
 static void __net_init fib6_tables_init(struct net *net)

From c80dbe04612986fd6104b4a1be21681b113b5ac9 Mon Sep 17 00:00:00 2001
From: Andrew Shewmaker <agshew@gmail.com>
Date: Sun, 18 Oct 2015 21:59:08 -0700
Subject: [PATCH 42/79] tcp: allow dctcp alpha to drop to zero

If alpha is strictly reduced by alpha >> dctcp_shift_g and if alpha is less
than 1 << dctcp_shift_g, then alpha may never reach zero. For example,
given shift_g=4 and alpha=15, alpha >> dctcp_shift_g yields 0 and alpha
remains 15. The effect isn't noticeable in this case below cwnd=137, but
could gradually drive uncongested flows with leftover alpha down to
cwnd=137. A larger dctcp_shift_g would have a greater effect.

This change causes alpha=15 to drop to 0 instead of being decrementing by 1
as it would when alpha=16. However, it requires one less conditional to
implement since it doesn't have to guard against subtracting 1 from 0U. A
decay of 15 is not unreasonable since an equal or greater amount occurs at
alpha >= 240.

Signed-off-by: Andrew G. Shewmaker <agshew@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_dctcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 7092a61c4dc8..7e538f71f5fb 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -209,7 +209,7 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
 
 		/* alpha = (1 - g) * alpha + g * F */
 
-		alpha -= alpha >> dctcp_shift_g;
+		alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
 		if (bytes_ecn) {
 			/* If dctcp_shift_g == 1, a 32bit value would overflow
 			 * after 8 Mbytes.

From 79907146fb5b1778035870db895fb2bf64061284 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 16 Oct 2015 11:32:42 +0200
Subject: [PATCH 43/79] overflow-arith: begin to add support for overflow
 builtin functions

The idea of the overflow-arith.h header is to collect overflow checking
functions in one central place.

If gcc compiler supports the __builtin_overflow_* builtins we use them
because they might give better performance, otherwise the code falls
back to normal overflow checking functions.

The builtin_overflow functions are supported by gcc-5 and clang. The
matter of supporting clang is to just provide a corresponding
CC_HAVE_BUILTIN_OVERFLOW, because the specific overflow checking builtins
don't differ between gcc and clang.

I just provide overflow_usub function here as I intend this to get merged
into net, more functions will definitely follow as they are needed.

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/compiler-gcc.h   |  4 ++++
 include/linux/overflow-arith.h | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 include/linux/overflow-arith.h

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index dfaa7b3e9ae9..82c159e0532a 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -237,6 +237,10 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
+#if GCC_VERSION >= 50000
+#define CC_HAVE_BUILTIN_OVERFLOW
+#endif
+
 #endif	/* gcc version >= 40000 specific checks */
 
 #if !defined(__noclone)
diff --git a/include/linux/overflow-arith.h b/include/linux/overflow-arith.h
new file mode 100644
index 000000000000..e12ccf854a70
--- /dev/null
+++ b/include/linux/overflow-arith.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <linux/kernel.h>
+
+#ifdef CC_HAVE_BUILTIN_OVERFLOW
+
+#define overflow_usub __builtin_usub_overflow
+
+#else
+
+static inline bool overflow_usub(unsigned int a, unsigned int b,
+				 unsigned int *res)
+{
+	*res = a - b;
+	return *res > a ? true : false;
+}
+
+#endif

From b72a2b01b686f242028038f630555513c9e4de38 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 16 Oct 2015 11:32:43 +0200
Subject: [PATCH 44/79] ipv6: protect mtu calculation of wrap-around and
 infinite loop by rounding issues

Raw sockets with hdrincl enabled can insert ipv6 extension headers
right into the data stream. In case we need to fragment those packets,
we reparse the options header to find the place where we can insert
the fragment header. If the extension headers exceed the link's MTU we
actually cannot make progress in such a case.

Instead of ending up in broken arithmetic or rounding towards 0 and
entering an endless loop in ip6_fragment, just prevent those cases by
aborting early and signal -EMSGSIZE to user space.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d03d6da772f3..8dddb45c433e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -28,6 +28,7 @@
 
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/overflow-arith.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/net.h>
@@ -584,7 +585,10 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 		if (np->frag_size)
 			mtu = np->frag_size;
 	}
-	mtu -= hlen + sizeof(struct frag_hdr);
+
+	if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) ||
+	    mtu <= 7)
+		goto fail_toobig;
 
 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 				    &ipv6_hdr(skb)->saddr);

From d2fd719bcb0e83cb39cfee22ee800f98a56eceb3 Mon Sep 17 00:00:00 2001
From: Nathan Sullivan <nathan.sullivan@ni.com>
Date: Wed, 21 Oct 2015 14:17:04 -0500
Subject: [PATCH 45/79] net/phy: micrel: Add workaround for bad autoneg

Very rarely, the KSZ9031 will appear to complete autonegotiation, but
will drop all traffic afterwards.  When this happens, the idle error
count will read 0xFF after autonegotiation completes.  Reset the PHY
when in that state.

Signed-off-by: Nathan Sullivan <nathan.sullivan@ni.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 499185eaf413..cf6312fafea5 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -514,6 +514,27 @@ static int ksz8873mll_read_status(struct phy_device *phydev)
 	return 0;
 }
 
+static int ksz9031_read_status(struct phy_device *phydev)
+{
+	int err;
+	int regval;
+
+	err = genphy_read_status(phydev);
+	if (err)
+		return err;
+
+	/* Make sure the PHY is not broken. Read idle error count,
+	 * and reset the PHY if it is maxed out.
+	 */
+	regval = phy_read(phydev, MII_STAT1000);
+	if ((regval & 0xFF) == 0xFF) {
+		phy_init_hw(phydev);
+		phydev->link = 0;
+	}
+
+	return 0;
+}
+
 static int ksz8873mll_config_aneg(struct phy_device *phydev)
 {
 	return 0;
@@ -772,7 +793,7 @@ static struct phy_driver ksphy_driver[] = {
 	.driver_data	= &ksz9021_type,
 	.config_init	= ksz9031_config_init,
 	.config_aneg	= genphy_config_aneg,
-	.read_status	= genphy_read_status,
+	.read_status	= ksz9031_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
 	.suspend	= genphy_suspend,

From 20a41fba679d665cdae2808e2b9cae97c073351f Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Wed, 21 Oct 2015 15:37:05 -0500
Subject: [PATCH 46/79] amd-xgbe: Use wmb before updating current descriptor
 count

The code currently uses the lightweight dma_wmb barrier before updating
the current descriptor count. Under heavy load, the Tx cleanup routine
was seeing the updated current descriptor count before the updated
descriptor information. As a result, the Tx descriptor was being cleaned
up before it was used because it was not "owned" by the hardware yet,
resulting in a Tx queue hang.

Using the wmb barrier insures that the descriptor is updated before the
descriptor counter preventing the Tx queue hang. For extra insurance,
the Tx cleanup routine is changed to grab the current decriptor count on
entry and uses that initial value in the processing loop rather than
trying to chase the current value.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 2 +-
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index a4473d8ff4fa..e9ab8b9f3b9c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1595,7 +1595,7 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 				  packet->rdesc_count, 1);
 
 	/* Make sure ownership is written to the descriptor */
-	dma_wmb();
+	wmb();
 
 	ring->cur = cur_index + 1;
 	if (!packet->skb->xmit_more ||
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index aae9d5ecd182..d2b77d985441 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1807,6 +1807,7 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
 	struct netdev_queue *txq;
 	int processed = 0;
 	unsigned int tx_packets = 0, tx_bytes = 0;
+	unsigned int cur;
 
 	DBGPR("-->xgbe_tx_poll\n");
 
@@ -1814,10 +1815,11 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
 	if (!ring)
 		return 0;
 
+	cur = ring->cur;
 	txq = netdev_get_tx_queue(netdev, channel->queue_index);
 
 	while ((processed < XGBE_TX_DESC_MAX_PROC) &&
-	       (ring->dirty != ring->cur)) {
+	       (ring->dirty != cur)) {
 		rdata = XGBE_GET_DESC_DATA(ring, ring->dirty);
 		rdesc = rdata->rdesc;
 

From f6b8dec99865ea906150e963eacbfd037b579ee9 Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Thu, 22 Oct 2015 11:35:05 +0800
Subject: [PATCH 47/79] af_key: fix two typos

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 83a70688784b..f9c9ecb0cdd3 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -261,7 +261,7 @@ static int pfkey_broadcast(struct sk_buff *skb,
 
 		err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
 
-		/* Error is cleare after succecful sending to at least one
+		/* Error is cleared after successful sending to at least one
 		 * registered KM */
 		if ((broadcast_flags & BROADCAST_REGISTERED) && err)
 			err = err2;

From 1acea4f6ce1b1c0941438aca75dd2e5c6b09db60 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Thu, 22 Oct 2015 16:57:10 +0200
Subject: [PATCH 48/79] ppp: fix pppoe_dev deletion condition in
 pppoe_release()

We can't rely on PPPOX_ZOMBIE to decide whether to clear po->pppoe_dev.
PPPOX_ZOMBIE can be set by pppoe_disc_rcv() even when po->pppoe_dev is
NULL. So we have no guarantee that (sk->sk_state & PPPOX_ZOMBIE) implies
(po->pppoe_dev != NULL).
Since we're releasing a PPPoE socket, we want to release the pppoe_dev
if it exists and reset sk_state to PPPOX_DEAD, no matter the previous
value of sk_state. So we can just check for po->pppoe_dev and avoid any
assumption on sk->sk_state.

Fixes: 2b018d57ff18 ("pppoe: drop PPPOX_ZOMBIEs in pppoe_release")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pppoe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 2ed75060da50..5e0b43283bce 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -589,7 +589,7 @@ static int pppoe_release(struct socket *sock)
 
 	po = pppox_sk(sk);
 
-	if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) {
+	if (po->pppoe_dev) {
 		dev_put(po->pppoe_dev);
 		po->pppoe_dev = NULL;
 	}

From ce9d9b8e5c2b7486edf76958bcdb5e6534a915b0 Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Fri, 23 Oct 2015 20:59:49 +0800
Subject: [PATCH 49/79] net: sysctl: fix a kmemleak warning

the returned buffer of register_sysctl() is stored into net_header
variable, but net_header is not used after, and compiler maybe
optimise the variable out, and lead kmemleak reported the below warning

	comm "swapper/0", pid 1, jiffies 4294937448 (age 267.270s)
	hex dump (first 32 bytes):
	90 38 8b 01 c0 ff ff ff 00 00 00 00 01 00 00 00 .8..............
	01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
	backtrace:
	[<ffffffc00020f134>] create_object+0x10c/0x2a0
	[<ffffffc00070ff44>] kmemleak_alloc+0x54/0xa0
	[<ffffffc0001fe378>] __kmalloc+0x1f8/0x4f8
	[<ffffffc00028e984>] __register_sysctl_table+0x64/0x5a0
	[<ffffffc00028eef0>] register_sysctl+0x30/0x40
	[<ffffffc00099c304>] net_sysctl_init+0x20/0x58
	[<ffffffc000994dd8>] sock_init+0x10/0xb0
	[<ffffffc0000842e0>] do_one_initcall+0x90/0x1b8
	[<ffffffc000966bac>] kernel_init_freeable+0x218/0x2f0
	[<ffffffc00070ed6c>] kernel_init+0x1c/0xe8
	[<ffffffc000083bfc>] ret_from_fork+0xc/0x50
	[<ffffffffffffffff>] 0xffffffffffffffff <<end check kmemleak>>

Before fix, the objdump result on ARM64:
0000000000000000 <net_sysctl_init>:
   0:   a9be7bfd        stp     x29, x30, [sp,#-32]!
   4:   90000001        adrp    x1, 0 <net_sysctl_init>
   8:   90000000        adrp    x0, 0 <net_sysctl_init>
   c:   910003fd        mov     x29, sp
  10:   91000021        add     x1, x1, #0x0
  14:   91000000        add     x0, x0, #0x0
  18:   a90153f3        stp     x19, x20, [sp,#16]
  1c:   12800174        mov     w20, #0xfffffff4                // #-12
  20:   94000000        bl      0 <register_sysctl>
  24:   b4000120        cbz     x0, 48 <net_sysctl_init+0x48>
  28:   90000013        adrp    x19, 0 <net_sysctl_init>
  2c:   91000273        add     x19, x19, #0x0
  30:   9101a260        add     x0, x19, #0x68
  34:   94000000        bl      0 <register_pernet_subsys>
  38:   2a0003f4        mov     w20, w0
  3c:   35000060        cbnz    w0, 48 <net_sysctl_init+0x48>
  40:   aa1303e0        mov     x0, x19
  44:   94000000        bl      0 <register_sysctl_root>
  48:   2a1403e0        mov     w0, w20
  4c:   a94153f3        ldp     x19, x20, [sp,#16]
  50:   a8c27bfd        ldp     x29, x30, [sp],#32
  54:   d65f03c0        ret
After:
0000000000000000 <net_sysctl_init>:
   0:   a9bd7bfd        stp     x29, x30, [sp,#-48]!
   4:   90000000        adrp    x0, 0 <net_sysctl_init>
   8:   910003fd        mov     x29, sp
   c:   a90153f3        stp     x19, x20, [sp,#16]
  10:   90000013        adrp    x19, 0 <net_sysctl_init>
  14:   91000000        add     x0, x0, #0x0
  18:   91000273        add     x19, x19, #0x0
  1c:   f90013f5        str     x21, [sp,#32]
  20:   aa1303e1        mov     x1, x19
  24:   12800175        mov     w21, #0xfffffff4                // #-12
  28:   94000000        bl      0 <register_sysctl>
  2c:   f9002260        str     x0, [x19,#64]
  30:   b40001a0        cbz     x0, 64 <net_sysctl_init+0x64>
  34:   90000014        adrp    x20, 0 <net_sysctl_init>
  38:   91000294        add     x20, x20, #0x0
  3c:   9101a280        add     x0, x20, #0x68
  40:   94000000        bl      0 <register_pernet_subsys>
  44:   2a0003f5        mov     w21, w0
  48:   35000080        cbnz    w0, 58 <net_sysctl_init+0x58>
  4c:   aa1403e0        mov     x0, x20
  50:   94000000        bl      0 <register_sysctl_root>
  54:   14000004        b       64 <net_sysctl_init+0x64>
  58:   f9402260        ldr     x0, [x19,#64]
  5c:   94000000        bl      0 <unregister_sysctl_table>
  60:   f900227f        str     xzr, [x19,#64]
  64:   2a1503e0        mov     w0, w21
  68:   f94013f5        ldr     x21, [sp,#32]
  6c:   a94153f3        ldp     x19, x20, [sp,#16]
  70:   a8c37bfd        ldp     x29, x30, [sp],#48
  74:   d65f03c0        ret

Add the possible error handle to free the net_header to remove the
kmemleak warning

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sysctl_net.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index e7000be321b0..ed98c1fc3de1 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -94,10 +94,14 @@ __init int net_sysctl_init(void)
 		goto out;
 	ret = register_pernet_subsys(&sysctl_pernet_ops);
 	if (ret)
-		goto out;
+		goto out1;
 	register_sysctl_root(&net_sysctl_root);
 out:
 	return ret;
+out1:
+	unregister_sysctl_table(net_header);
+	net_header = NULL;
+	goto out;
 }
 
 struct ctl_table_header *register_net_sysctl(struct net *net,

From f9bf45e08ef36b6726a5744f0029325e81b3248a Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Fri, 23 Oct 2015 17:14:07 -0700
Subject: [PATCH 50/79] net: thunderx: Remove PF soft reset.

In some silicon revisions, the soft reset clobbers PCI config space,
so quit doing the reset.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nic_main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index b3a5947a2cc0..d6e3219ddeb3 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -305,9 +305,6 @@ static void nic_init_hw(struct nicpf *nic)
 {
 	int i;
 
-	/* Reset NIC, in case the driver is repeatedly inserted and removed */
-	nic_reg_write(nic, NIC_PF_SOFT_RESET, 1);
-
 	/* Enable NIC HW block */
 	nic_reg_write(nic, NIC_PF_CFG, 0x3);
 

From 4e85777ff071b51f500b130b6d036922af32be25 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Fri, 23 Oct 2015 17:14:08 -0700
Subject: [PATCH 51/79] net: thunderx: Fix incorrect subsystem devid of VF on
 pass2 silicon

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index b63e579aeb12..a9377727c11c 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -29,7 +29,7 @@
 static const struct pci_device_id nicvf_id_table[] = {
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
 			 PCI_DEVICE_ID_THUNDER_NIC_VF,
-			 PCI_VENDOR_ID_CAVIUM, 0xA11E) },
+			 PCI_VENDOR_ID_CAVIUM, 0xA134) },
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
 			 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF,
 			 PCI_VENDOR_ID_CAVIUM, 0xA11E) },

From 88ed237720bd618240439714a57fb69ea96428e7 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Fri, 23 Oct 2015 17:14:09 -0700
Subject: [PATCH 52/79] net: thunderx: Rewrite silicon revision tests.

The test for pass-1 silicon was incorrect, it should be for all
revisions less than 8.  Also the revision is already present in the
pci_dev, so there is no need to read and keep a private copy.

Remove rev_id and code to read it from struct nicpf.  Create new
static inline function pass1_silicon() to be used to testing the
silicon version.  Use pass1_silicon() for revision checks, this will
be more widely used in follow on patches.

Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nic_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index d6e3219ddeb3..52e1acb69562 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -22,7 +22,6 @@
 
 struct nicpf {
 	struct pci_dev		*pdev;
-	u8			rev_id;
 	u8			node;
 	unsigned int		flags;
 	u8			num_vf_en;      /* No of VF enabled */
@@ -54,6 +53,11 @@ struct nicpf {
 	bool			irq_allocated[NIC_PF_MSIX_VECTORS];
 };
 
+static inline bool pass1_silicon(struct nicpf *nic)
+{
+	return nic->pdev->revision < 8;
+}
+
 /* Supported devices */
 static const struct pci_device_id nic_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) },
@@ -117,7 +121,7 @@ static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx)
 	 * when PF writes to MBOX(1), in next revisions when
 	 * PF writes to MBOX(0)
 	 */
-	if (nic->rev_id == 0) {
+	if (pass1_silicon(nic)) {
 		/* see the comment for nic_reg_write()/nic_reg_read()
 		 * functions above
 		 */
@@ -998,8 +1002,6 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_release_regions;
 	}
 
-	pci_read_config_byte(pdev, PCI_REVISION_ID, &nic->rev_id);
-
 	nic->node = nic_get_node_id(pdev);
 
 	nic_set_lmac_vf_mapping(nic);

From 34411b68b132e403ddf395419e986475a9993d9b Mon Sep 17 00:00:00 2001
From: Thanneeru Srinivasulu <tsrinivasulu@caviumnetworks.com>
Date: Fri, 23 Oct 2015 17:14:10 -0700
Subject: [PATCH 53/79] net: thunderx: Incorporate pass2 silicon CPI index
 configuration changes

Add support for ThunderX pass2 CPI and MPI configuration changes.
MPI_ALG is not enabled i.e MCAM parsing is disabled.

Signed-off-by: Thanneeru Srinivasulu <tsrinivasulu@caviumnetworks.com>
Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/thunder/nic_main.c    | 29 +++++++++++++++----
 drivers/net/ethernet/cavium/thunder/nic_reg.h |  4 +++
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 52e1acb69562..c561fdcb79a7 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -43,6 +43,7 @@ struct nicpf {
 	u8			duplex[MAX_LMAC];
 	u32			speed[MAX_LMAC];
 	u16			cpi_base[MAX_NUM_VFS_SUPPORTED];
+	u16			rssi_base[MAX_NUM_VFS_SUPPORTED];
 	u16			rss_ind_tbl_size;
 	bool			mbx_lock[MAX_NUM_VFS_SUPPORTED];
 
@@ -396,8 +397,18 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
 			padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */
 
 		/* Leave RSS_SIZE as '0' to disable RSS */
-		nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
-			      (vnic << 24) | (padd << 16) | (rssi_base + rssi));
+		if (pass1_silicon(nic)) {
+			nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
+				      (vnic << 24) | (padd << 16) |
+				      (rssi_base + rssi));
+		} else {
+			/* Set MPI_ALG to '0' to disable MCAM parsing */
+			nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
+				      (padd << 16));
+			/* MPI index is same as CPI if MPI_ALG is not enabled */
+			nic_reg_write(nic, NIC_PF_MPI_0_2047_CFG | (cpi << 3),
+				      (vnic << 24) | (rssi_base + rssi));
+		}
 
 		if ((rssi + 1) >= cfg->rq_cnt)
 			continue;
@@ -410,6 +421,7 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
 			rssi = ((cpi - cpi_base) & 0x38) >> 3;
 	}
 	nic->cpi_base[cfg->vf_id] = cpi_base;
+	nic->rssi_base[cfg->vf_id] = rssi_base;
 }
 
 /* Responsds to VF with its RSS indirection table size */
@@ -435,10 +447,9 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
 {
 	u8  qset, idx = 0;
 	u64 cpi_cfg, cpi_base, rssi_base, rssi;
+	u64 idx_addr;
 
-	cpi_base = nic->cpi_base[cfg->vf_id];
-	cpi_cfg = nic_reg_read(nic, NIC_PF_CPI_0_2047_CFG | (cpi_base << 3));
-	rssi_base = (cpi_cfg & 0x0FFF) + cfg->tbl_offset;
+	rssi_base = nic->rssi_base[cfg->vf_id] + cfg->tbl_offset;
 
 	rssi = rssi_base;
 	qset = cfg->vf_id;
@@ -455,9 +466,15 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
 		idx++;
 	}
 
+	cpi_base = nic->cpi_base[cfg->vf_id];
+	if (pass1_silicon(nic))
+		idx_addr = NIC_PF_CPI_0_2047_CFG;
+	else
+		idx_addr = NIC_PF_MPI_0_2047_CFG;
+	cpi_cfg = nic_reg_read(nic, idx_addr | (cpi_base << 3));
 	cpi_cfg &= ~(0xFULL << 20);
 	cpi_cfg |= (cfg->hash_bits << 20);
-	nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi_base << 3), cpi_cfg);
+	nic_reg_write(nic, idx_addr | (cpi_base << 3), cpi_cfg);
 }
 
 /* 4 level transmit side scheduler configutation
diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h
index 58197bb2f805..dd536be20193 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_reg.h
+++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h
@@ -85,7 +85,11 @@
 #define   NIC_PF_ECC3_DBE_INT_W1S		(0x2708)
 #define   NIC_PF_ECC3_DBE_ENA_W1C		(0x2710)
 #define   NIC_PF_ECC3_DBE_ENA_W1S		(0x2718)
+#define   NIC_PF_MCAM_0_191_ENA			(0x100000)
+#define   NIC_PF_MCAM_0_191_M_0_5_DATA		(0x110000)
+#define   NIC_PF_MCAM_CTRL			(0x120000)
 #define   NIC_PF_CPI_0_2047_CFG			(0x200000)
+#define   NIC_PF_MPI_0_2047_CFG			(0x210000)
 #define   NIC_PF_RSSI_0_4097_RQ			(0x220000)
 #define   NIC_PF_LMAC_0_7_CFG			(0x240000)
 #define   NIC_PF_LMAC_0_7_SW_XOFF		(0x242000)

From 5188f7e5a7175975f8b943a4b25e499c98a7b9d6 Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@freescale.com>
Date: Fri, 23 Oct 2015 11:41:58 +0300
Subject: [PATCH 54/79] gianfar: Remove duplicated argument to bitwise OR

RQFCR_AND is duplicated.
Add missing space as well.

Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 6bdc89179b72..a33e4a829601 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -676,14 +676,14 @@ static void ethflow_to_filer_rules (struct gfar_private *priv, u64 ethflow)
 	u32 fcr = 0x0, fpr = FPR_FILER_MASK;
 
 	if (ethflow & RXH_L2DA) {
-		fcr = RQFCR_PID_DAH |RQFCR_CMP_NOMATCH |
+		fcr = RQFCR_PID_DAH | RQFCR_CMP_NOMATCH |
 		      RQFCR_HASH | RQFCR_AND | RQFCR_HASHTBL_0;
 		priv->ftp_rqfpr[priv->cur_filer_idx] = fpr;
 		priv->ftp_rqfcr[priv->cur_filer_idx] = fcr;
 		gfar_write_filer(priv, priv->cur_filer_idx, fcr, fpr);
 		priv->cur_filer_idx = priv->cur_filer_idx - 1;
 
-		fcr = RQFCR_PID_DAL | RQFCR_AND | RQFCR_CMP_NOMATCH |
+		fcr = RQFCR_PID_DAL | RQFCR_CMP_NOMATCH |
 		      RQFCR_HASH | RQFCR_AND | RQFCR_HASHTBL_0;
 		priv->ftp_rqfpr[priv->cur_filer_idx] = fpr;
 		priv->ftp_rqfcr[priv->cur_filer_idx] = fcr;

From 15bf176db1fb00333af7050c0c699fc7b4e4a960 Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@freescale.com>
Date: Fri, 23 Oct 2015 11:41:59 +0300
Subject: [PATCH 55/79] gianfar: Don't enable the Filer w/o the Parser

Under one unusual circumstance it's possible to wrongly set
FILREN without enabling PRSDEP as well in the RCTRL register,
against the hardware specifications.  With the default config
this does not happen because the default Rx offloads (Rx csum
and Rx VLAN) properly enable PRSDEP.  But if anyone disables
all these offloads (via ethtool), we get a wrong configuration
were the Rx flow classification and hashing, and other Filer
based features (e.g. wake-on-filer interrupt) won't work.
This patch fixes the issue.
Also, account for Rx FCB insertion which happens every time
PRSDEP is set.

Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 710715fcb23d..939ed8fe6318 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -341,7 +341,7 @@ static void gfar_rx_offload_en(struct gfar_private *priv)
 	if (priv->ndev->features & (NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX))
 		priv->uses_rxfcb = 1;
 
-	if (priv->hwts_rx_en)
+	if (priv->hwts_rx_en || priv->rx_filer_enable)
 		priv->uses_rxfcb = 1;
 }
 
@@ -351,7 +351,7 @@ static void gfar_mac_rx_config(struct gfar_private *priv)
 	u32 rctrl = 0;
 
 	if (priv->rx_filer_enable) {
-		rctrl |= RCTRL_FILREN;
+		rctrl |= RCTRL_FILREN | RCTRL_PRSDEP_INIT;
 		/* Program the RIR0 reg with the required distribution */
 		if (priv->poll_mode == GFAR_SQ_POLLING)
 			gfar_write(&regs->rir0, DEFAULT_2RXQ_RIR0);

From 1de65a5ea32de7b335ab505366d45cefadbbdf71 Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@freescale.com>
Date: Fri, 23 Oct 2015 11:42:00 +0300
Subject: [PATCH 56/79] gianfar: Fix Rx BSY error handling

The Rx BSY error interrupt indicates that a frame was
received and discarded due to lack of buffers, so it's
a rx ring overflow condition and has nothing to do with
with bad rx packets.  Use the right counter.

BSY conditions happen when the SoC is under performance
stress.  Doing *more* work in stress situations by trying
to schedule NAPI is not a good idea as the stressed system
becomes still more stressed.  The Rx interrupt is already
at work making sure the NAPI is scheduled.
So calling gfar_receive() here does not help.  This issue
was present since day 1.

Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 939ed8fe6318..ce38d266f931 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -3462,11 +3462,9 @@ static irqreturn_t gfar_error(int irq, void *grp_id)
 		netif_dbg(priv, tx_err, dev, "Transmit Error\n");
 	}
 	if (events & IEVENT_BSY) {
-		dev->stats.rx_errors++;
+		dev->stats.rx_over_errors++;
 		atomic64_inc(&priv->extra_stats.rx_bsy);
 
-		gfar_receive(irq, grp_id);
-
 		netif_dbg(priv, rx_err, dev, "busy error (rstat: %x)\n",
 			  gfar_read(&regs->rstat));
 	}

From abb1ed7b793fcb10cadb378fe0eeee589b61a9e1 Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@freescale.com>
Date: Fri, 23 Oct 2015 11:42:01 +0300
Subject: [PATCH 57/79] MAINTAINERS: Add entry for gianfar ethernet driver

Signed-off-by: Claudiu Manoil <claudiu.manoil@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index fb7d2e4af200..9b43ef23b985 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4427,6 +4427,14 @@ L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
 F:	drivers/net/ethernet/freescale/ucc_geth*
 
+FREESCALE eTSEC ETHERNET DRIVER (GIANFAR)
+M:	Claudiu Manoil <claudiu.manoil@freescale.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/freescale/gianfar*
+X:	drivers/net/ethernet/freescale/gianfar_ptp.c
+F:	Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
+
 FREESCALE QUICC ENGINE UCC UART DRIVER
 M:	Timur Tabi <timur@tabi.org>
 L:	linuxppc-dev@lists.ozlabs.org

From ab8579169b79c062935dade949287113c7c1ba73 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 24 Oct 2015 00:46:03 +0300
Subject: [PATCH 58/79] sh_eth: fix RX buffer size alignment

Both  Renesas R-Car and RZ/A1 manuals state that RX buffer  length must be
a multiple of 32 bytes, while the driver  only uses 16 byte granularity...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 257ea713b4c1..d8334d8a53b3 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1148,8 +1148,8 @@ static void sh_eth_ring_format(struct net_device *ndev)
 
 		/* RX descriptor */
 		rxdesc = &mdp->rx_ring[i];
-		/* The size of the buffer is a multiple of 16 bytes. */
-		rxdesc->buffer_length = ALIGN(mdp->rx_buf_sz, 16);
+		/* The size of the buffer is a multiple of 32 bytes. */
+		rxdesc->buffer_length = ALIGN(mdp->rx_buf_sz, 32);
 		dma_addr = dma_map_single(&ndev->dev, skb->data,
 					  rxdesc->buffer_length,
 					  DMA_FROM_DEVICE);
@@ -1506,7 +1506,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 			if (mdp->cd->rpadir)
 				skb_reserve(skb, NET_IP_ALIGN);
 			dma_unmap_single(&ndev->dev, rxdesc->addr,
-					 ALIGN(mdp->rx_buf_sz, 16),
+					 ALIGN(mdp->rx_buf_sz, 32),
 					 DMA_FROM_DEVICE);
 			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, ndev);
@@ -1524,8 +1524,8 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 	for (; mdp->cur_rx - mdp->dirty_rx > 0; mdp->dirty_rx++) {
 		entry = mdp->dirty_rx % mdp->num_rx_ring;
 		rxdesc = &mdp->rx_ring[entry];
-		/* The size of the buffer is 16 byte boundary. */
-		rxdesc->buffer_length = ALIGN(mdp->rx_buf_sz, 16);
+		/* The size of the buffer is 32 byte boundary. */
+		rxdesc->buffer_length = ALIGN(mdp->rx_buf_sz, 32);
 
 		if (mdp->rx_skbuff[entry] == NULL) {
 			skb = netdev_alloc_skb(ndev, skbuff_size);

From cb3685958dd4c46d7646d244063ea3ec8adf3618 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 24 Oct 2015 00:46:40 +0300
Subject: [PATCH 59/79] sh_eth: fix RX buffer size calculation

The RX buffer size calulation failed to account for the length granularity
(which is now 32 bytes)...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index d8334d8a53b3..a484d8beb855 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1127,7 +1127,7 @@ static void sh_eth_ring_format(struct net_device *ndev)
 	struct sh_eth_txdesc *txdesc = NULL;
 	int rx_ringsize = sizeof(*rxdesc) * mdp->num_rx_ring;
 	int tx_ringsize = sizeof(*txdesc) * mdp->num_tx_ring;
-	int skbuff_size = mdp->rx_buf_sz + SH_ETH_RX_ALIGN - 1;
+	int skbuff_size = mdp->rx_buf_sz + SH_ETH_RX_ALIGN + 32 - 1;
 	dma_addr_t dma_addr;
 
 	mdp->cur_rx = 0;
@@ -1450,7 +1450,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 	struct sk_buff *skb;
 	u16 pkt_len = 0;
 	u32 desc_status;
-	int skbuff_size = mdp->rx_buf_sz + SH_ETH_RX_ALIGN - 1;
+	int skbuff_size = mdp->rx_buf_sz + SH_ETH_RX_ALIGN + 32 - 1;
 	dma_addr_t dma_addr;
 
 	boguscnt = min(boguscnt, *quota);

From 7e3b6e7423d5f994257c1de88e06b509673fdbcf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 24 Oct 2015 05:47:44 -0700
Subject: [PATCH 60/79] ipv6: gre: support SIT encapsulation

gre_gso_segment() chokes if SIT frames were aggregated by GRO engine.

Fixes: 61c1db7fae21e ("ipv6: sit: add GSO/TSO support")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/gre_offload.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 5aa46d4b44ef..5a8ee3282550 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
 				  SKB_GSO_GRE_CSUM |
-				  SKB_GSO_IPIP)))
+				  SKB_GSO_IPIP |
+				  SKB_GSO_SIT)))
 		goto out;
 
 	if (!skb->encapsulation)

From 8c387ebbaff8652943a1cbcab496aecadc6a8875 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sun, 25 Oct 2015 14:57:00 +0100
Subject: [PATCH 61/79] net: thunderx: add missing of_node_put

for_each_child_of_node performs an of_node_get on each iteration, so
a break out of the loop requires an of_node_put.

A simplified version of the semantic patch that fixes this problem is as
follows (http://coccinelle.lip6.fr):

// <smpl>
@@
local idexpression r.n;
expression r,e;
@@

 for_each_child_of_node(r,n) {
   ...
(
   of_node_put(n);
|
   e = n
|
+  of_node_put(n);
?  break;
)
   ...
 }
... when != n
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 574c49278900..180aa9fabf48 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -977,8 +977,10 @@ static int bgx_init_of_phy(struct bgx *bgx)
 		SET_NETDEV_DEV(&bgx->lmac[lmac].netdev, &bgx->pdev->dev);
 		bgx->lmac[lmac].lmacid = lmac;
 		lmac++;
-		if (lmac == MAX_LMAC_PER_BGX)
+		if (lmac == MAX_LMAC_PER_BGX) {
+			of_node_put(np_child);
 			break;
+		}
 	}
 	return 0;
 }

From bd252796852193277a07da505601a2f407c70e0b Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sun, 25 Oct 2015 14:57:01 +0100
Subject: [PATCH 62/79] net: netcp: add missing of_node_put

for_each_child_of_node performs an of_node_get on each iteration, so
a break out of the loop requires an of_node_put.

A simplified version of the semantic patch that fixes this problem is as
follows (http://coccinelle.lip6.fr):

// <smpl>
@@
local idexpression r.n;
expression r,e;
@@

 for_each_child_of_node(r,n) {
   ...
(
   of_node_put(n);
|
   e = n
|
+  of_node_put(n);
?  break;
)
   ...
 }
... when != n
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/netcp_ethss.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 6bff8d82ceab..4e70e7586a09 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2637,8 +2637,10 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev,
 			mac_phy_link = true;
 
 		slave->open = true;
-		if (gbe_dev->num_slaves >= gbe_dev->max_num_slaves)
+		if (gbe_dev->num_slaves >= gbe_dev->max_num_slaves) {
+			of_node_put(port);
 			break;
+		}
 	}
 
 	/* of_phy_connect() is needed only for MAC-PHY interface */
@@ -3137,8 +3139,10 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 			continue;
 		}
 		gbe_dev->num_slaves++;
-		if (gbe_dev->num_slaves >= gbe_dev->max_num_slaves)
+		if (gbe_dev->num_slaves >= gbe_dev->max_num_slaves) {
+			of_node_put(interface);
 			break;
+		}
 	}
 	of_node_put(interfaces);
 

From 447ed7360037b6e38c0206ddcbd04a256ec94099 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sun, 25 Oct 2015 14:57:02 +0100
Subject: [PATCH 63/79] netdev/phy: add missing of_node_put

for_each_available_child_of_node performs an of_node_get on each iteration, so
a break out of the loop requires an of_node_put.

A simplified version of the semantic patch that fixes this problem is as
follows (http://coccinelle.lip6.fr):

// <smpl>
@@
local idexpression r.n;
expression r,e;
@@

 for_each_available_child_of_node(r,n) {
   ...
(
   of_node_put(n);
|
   e = n
|
+  of_node_put(n);
?  break;
)
   ...
 }
... when != n
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mux.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index 280c7c311f72..908e8d486342 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -144,6 +144,7 @@ int mdio_mux_init(struct device *dev,
 			dev_err(dev,
 				"Error: Failed to allocate memory for child\n");
 			ret_val = -ENOMEM;
+			of_node_put(child_bus_node);
 			break;
 		}
 		cb->bus_number = v;

From 028623418766ea64f4256035b06ac6cbc0a67892 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sun, 25 Oct 2015 14:57:03 +0100
Subject: [PATCH 64/79] net: phy: mdio: add missing of_node_put

for_each_available_child_of_node performs an of_node_get on each iteration, so
a break out of the loop requires an of_node_put.

A simplified version of the semantic patch that fixes this problem is as
follows (http://coccinelle.lip6.fr):

// <smpl>
@@
expression root,e;
local idexpression child;
@@

 for_each_available_child_of_node(root, child) {
   ... when != of_node_put(child)
       when != e = child
(
   return child;
|
+  of_node_put(child);
?  return ...;
)
   ...
 }
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mux-mmioreg.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c
index 2377c1341172..7fde454fbc4f 100644
--- a/drivers/net/phy/mdio-mux-mmioreg.c
+++ b/drivers/net/phy/mdio-mux-mmioreg.c
@@ -113,12 +113,14 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 		if (!iprop || len != sizeof(uint32_t)) {
 			dev_err(&pdev->dev, "mdio-mux child node %s is "
 				"missing a 'reg' property\n", np2->full_name);
+			of_node_put(np2);
 			return -ENODEV;
 		}
 		if (be32_to_cpup(iprop) & ~s->mask) {
 			dev_err(&pdev->dev, "mdio-mux child node %s has "
 				"a 'reg' value with unmasked bits\n",
 				np2->full_name);
+			of_node_put(np2);
 			return -ENODEV;
 		}
 	}

From 81a577034b000964ca791281a975f0ba9a9d7eed Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sun, 25 Oct 2015 14:57:06 +0100
Subject: [PATCH 65/79] ath6kl: add missing of_node_put

for_each_compatible_node performs an of_node_get on each iteration, so
a break out of the loop requires an of_node_put.

A simplified version of the semantic patch that fixes this problem is as
follows (http://coccinelle.lip6.fr):

// <smpl>
@@
expression e;
local idexpression n;
@@

 for_each_compatible_node(n,...) {
   ... when != of_node_put(n)
       when != e = n
(
   return n;
|
+  of_node_put(n);
?  return ...;
)
   ...
 }
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireless/ath/ath6kl/init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ath/ath6kl/init.c b/drivers/net/wireless/ath/ath6kl/init.c
index 6e473fa4b13c..12241b1c57cd 100644
--- a/drivers/net/wireless/ath/ath6kl/init.c
+++ b/drivers/net/wireless/ath/ath6kl/init.c
@@ -715,6 +715,7 @@ static bool check_device_tree(struct ath6kl *ar)
 				   board_filename, ret);
 			continue;
 		}
+		of_node_put(node);
 		return true;
 	}
 	return false;

From 26b7974d9ad7f93891ee8c39ee63bd2515da7744 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sun, 25 Oct 2015 14:57:07 +0100
Subject: [PATCH 66/79] net: mv643xx_eth: add missing of_node_put

for_each_available_child_of_node performs an of_node_get on each iteration, so
a break out of the loop requires an of_node_put.

A simplified version of the semantic patch that fixes this problem is as
follows (http://coccinelle.lip6.fr):

// <smpl>
@@
expression root,e;
local idexpression child;
@@

 for_each_available_child_of_node(root, child) {
   ... when != of_node_put(child)
       when != e = child
(
   return child;
|
+  of_node_put(child);
?  return ...;
)
   ...
 }
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index e893a35143c5..dfb6d5f79a10 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2817,8 +2817,10 @@ static int mv643xx_eth_shared_of_probe(struct platform_device *pdev)
 
 	for_each_available_child_of_node(np, pnp) {
 		ret = mv643xx_eth_shared_of_add_port(pdev, pnp);
-		if (ret)
+		if (ret) {
+			of_node_put(pnp);
 			return ret;
+		}
 	}
 	return 0;
 }

From c2229fe1430d4e1c70e36520229dd64a87802b20 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Tue, 27 Oct 2015 15:06:45 -0700
Subject: [PATCH 67/79] fib_trie: leaf_walk_rcu should not compute key if key
 is less than pn->key

We were computing the child index in cases where the key value we were
looking for was actually less than the base key of the tnode.  As a result
we were getting incorrect index values that would cause us to skip over
some children.

To fix this I have added a test that will force us to use child index 0 if
the key we are looking for is less than the key of the current tnode.

Fixes: 8be33e955cb9 ("fib_trie: Fib walk rcu should take a tnode and key instead of a trie and a leaf")
Reported-by: Brian Rak <brak@gameservers.com>
Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6c2af797f2f9..744e5936c10d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1569,7 +1569,7 @@ static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
 	do {
 		/* record parent and next child index */
 		pn = n;
-		cindex = key ? get_index(key, pn) : 0;
+		cindex = (key > pn->key) ? get_index(key, pn) : 0;
 
 		if (cindex >> pn->bits)
 			break;

From 74c16618137f1505b0a32dea3ec73a2ef6f8f842 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Sun, 25 Oct 2015 20:21:48 -0700
Subject: [PATCH 68/79] openvswitch: Fix double-free on ip_defrag() errors

If ip_defrag() returns an error other than -EINPROGRESS, then the skb is
freed. When handle_fragments() passes this back up to
do_execute_actions(), it will be freed again. Prevent this double free
by never freeing the skb in do_execute_actions() for errors returned by
ovs_ct_execute. Always free it in ovs_ct_execute() error paths instead.

Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
Reported-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/actions.c   |  4 ++--
 net/openvswitch/conntrack.c | 17 +++++++++++++----
 net/openvswitch/conntrack.h |  1 +
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 0bf0f406de52..dba635d086b2 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1109,8 +1109,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 					     nla_data(a));
 
 			/* Hide stolen IP fragments from user space. */
-			if (err == -EINPROGRESS)
-				return 0;
+			if (err)
+				return err == -EINPROGRESS ? 0 : err;
 			break;
 		}
 
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index a5ec34f8502f..b5dcc0abde66 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -293,6 +293,9 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
 	return helper->help(skb, protoff, ct, ctinfo);
 }
 
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
 static int handle_fragments(struct net *net, struct sw_flow_key *key,
 			    u16 zone, struct sk_buff *skb)
 {
@@ -308,8 +311,8 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
 			return err;
 
 		ovs_cb.mru = IPCB(skb)->frag_max_size;
-	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
 		struct sk_buff *reasm;
 
@@ -318,17 +321,18 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
 		if (!reasm)
 			return -EINPROGRESS;
 
-		if (skb == reasm)
+		if (skb == reasm) {
+			kfree_skb(skb);
 			return -EINVAL;
+		}
 
 		key->ip.proto = ipv6_hdr(reasm)->nexthdr;
 		skb_morph(skb, reasm);
 		consume_skb(reasm);
 		ovs_cb.mru = IP6CB(skb)->frag_max_size;
-#else
-		return -EPFNOSUPPORT;
 #endif
 	} else {
+		kfree_skb(skb);
 		return -EPFNOSUPPORT;
 	}
 
@@ -473,6 +477,9 @@ static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
 	return false;
 }
 
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
 int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 		   struct sw_flow_key *key,
 		   const struct ovs_conntrack_info *info)
@@ -508,6 +515,8 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 					&info->labels.mask);
 err:
 	skb_push(skb, nh_ofs);
+	if (err)
+		kfree_skb(skb);
 	return err;
 }
 
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index 82e0dfc66028..a7544f405c16 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -67,6 +67,7 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 				 struct sw_flow_key *key,
 				 const struct ovs_conntrack_info *info)
 {
+	kfree_skb(skb);
 	return -ENOTSUPP;
 }
 

From 190b8ffbb700a9aa47acc559779bc79c0cb14766 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Sun, 25 Oct 2015 20:21:49 -0700
Subject: [PATCH 69/79] ipv6: Export nf_ct_frag6_consume_orig()

This is needed in openvswitch to fix an skb leak in the next patch.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 701cd2bae0a9..c7196ad1d69f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -646,6 +646,7 @@ void nf_ct_frag6_consume_orig(struct sk_buff *skb)
 		s = s2;
 	}
 }
+EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
 
 static int nf_ct_net_init(struct net *net)
 {

From 6f5cadee44d83395dcd78d557b577e1021e192e4 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Sun, 25 Oct 2015 20:21:50 -0700
Subject: [PATCH 70/79] openvswitch: Fix skb leak using IPv6 defrag

nf_ct_frag6_gather() makes a clone of each skb passed to it, and if the
reassembly is successful, expects the caller to free all of the original
skbs using nf_ct_frag6_consume_orig(). This call was previously missing,
meaning that the original fragments were never freed (with the exception
of the last fragment to arrive).

Fix this by ensuring that all original fragments except for the last
fragment are freed via nf_ct_frag6_consume_orig(). The last fragment
will be morphed into the head, so it must not be freed yet. Furthermore,
retain the ->next pointer for the head after skb_morph().

Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
Reported-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index b5dcc0abde66..50095820edb7 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -326,8 +326,15 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
 			return -EINVAL;
 		}
 
+		/* Don't free 'skb' even though it is one of the original
+		 * fragments, as we're going to morph it into the head.
+		 */
+		skb_get(skb);
+		nf_ct_frag6_consume_orig(reasm);
+
 		key->ip.proto = ipv6_hdr(reasm)->nexthdr;
 		skb_morph(skb, reasm);
+		skb->next = reasm->next;
 		consume_skb(reasm);
 		ovs_cb.mru = IP6CB(skb)->frag_max_size;
 #endif

From 0b7c874348ea14ec3c358fe95e56d6f830540248 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 26 Oct 2015 12:24:22 -0400
Subject: [PATCH 71/79] forcedeth: fix unilateral interrupt disabling in
 netpoll path

Forcedeth currently uses disable_irq_lockdep and enable_irq_lockdep, which in
some configurations simply calls local_irq_disable.  This causes errant warnings
in the netpoll path as in netpoll_send_skb_on_dev, where we disable irqs using
local_irq_save, leading to the following warning:

WARNING: at net/core/netpoll.c:352 netpoll_send_skb_on_dev+0x243/0x250() (Not
tainted)
Hardware name:
netpoll_send_skb_on_dev(): eth0 enabled interrupts in poll
(nv_start_xmit_optimized+0x0/0x860 [forcedeth])
Modules linked in: netconsole(+) configfs ipv6 iptable_filter ip_tables ppdev
parport_pc parport sg microcode serio_raw edac_core edac_mce_amd k8temp
snd_hda_codec_realtek snd_hda_codec_generic forcedeth snd_hda_intel
snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer snd soundcore
snd_page_alloc i2c_nforce2 i2c_core shpchp ext4 jbd2 mbcache sr_mod cdrom sd_mod
crc_t10dif pata_amd ata_generic pata_acpi sata_nv dm_mirror dm_region_hash
dm_log dm_mod [last unloaded: scsi_wait_scan]
Pid: 1940, comm: modprobe Not tainted 2.6.32-573.7.1.el6.x86_64.debug #1
Call Trace:
 [<ffffffff8107bbc1>] ? warn_slowpath_common+0x91/0xe0
 [<ffffffff8107bcc6>] ? warn_slowpath_fmt+0x46/0x60
 [<ffffffffa00fe5b0>] ? nv_start_xmit_optimized+0x0/0x860 [forcedeth]
 [<ffffffff814b3593>] ? netpoll_send_skb_on_dev+0x243/0x250
 [<ffffffff814b37c9>] ? netpoll_send_udp+0x229/0x270
 [<ffffffffa02e3299>] ? write_msg+0x39/0x110 [netconsole]
 [<ffffffffa02e331b>] ? write_msg+0xbb/0x110 [netconsole]
 [<ffffffff8107bd55>] ? __call_console_drivers+0x75/0x90
 [<ffffffff8107bdba>] ? _call_console_drivers+0x4a/0x80
 [<ffffffff8107c445>] ? release_console_sem+0xe5/0x250
 [<ffffffff8107d200>] ? register_console+0x190/0x3e0
 [<ffffffffa02e71a6>] ? init_netconsole+0x1a6/0x216 [netconsole]
 [<ffffffffa02e7000>] ? init_netconsole+0x0/0x216 [netconsole]
 [<ffffffff810020d0>] ? do_one_initcall+0xc0/0x280
 [<ffffffff810d4933>] ? sys_init_module+0xe3/0x260
 [<ffffffff8100b0d2>] ? system_call_fastpath+0x16/0x1b
---[ end trace f349c7af88e6a6d5 ]---
console [netcon0] enabled
netconsole: network logging started

Fix it by modifying the forcedeth code to use
disable_irq_nosync_lockdep_irqsavedisable_irq_nosync_lockdep_irqsave instead,
which saves and restores irq state properly.  This also saves us a little code
in the process

Tested by the reporter, with successful restuls

Patch applies to the head of the net tree

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: "David S. Miller" <davem@davemloft.net>
Reported-by: Vasily Averin <vvs@sw.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/nvidia/forcedeth.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index a41bb5e6b954..75e88f4c1531 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -4076,6 +4076,8 @@ static void nv_do_nic_poll(unsigned long data)
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	u32 mask = 0;
+	unsigned long flags;
+	unsigned int irq = 0;
 
 	/*
 	 * First disable irq(s) and then
@@ -4085,25 +4087,27 @@ static void nv_do_nic_poll(unsigned long data)
 
 	if (!using_multi_irqs(dev)) {
 		if (np->msi_flags & NV_MSI_X_ENABLED)
-			disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
+			irq = np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector;
 		else
-			disable_irq_lockdep(np->pci_dev->irq);
+			irq = np->pci_dev->irq;
 		mask = np->irqmask;
 	} else {
 		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
-			disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+			irq = np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector;
 			mask |= NVREG_IRQ_RX_ALL;
 		}
 		if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
-			disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
+			irq = np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector;
 			mask |= NVREG_IRQ_TX_ALL;
 		}
 		if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
-			disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
+			irq = np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector;
 			mask |= NVREG_IRQ_OTHER;
 		}
 	}
-	/* disable_irq() contains synchronize_irq, thus no irq handler can run now */
+
+	disable_irq_nosync_lockdep_irqsave(irq, &flags);
+	synchronize_irq(irq);
 
 	if (np->recover_error) {
 		np->recover_error = 0;
@@ -4156,28 +4160,22 @@ static void nv_do_nic_poll(unsigned long data)
 			nv_nic_irq_optimized(0, dev);
 		else
 			nv_nic_irq(0, dev);
-		if (np->msi_flags & NV_MSI_X_ENABLED)
-			enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
-		else
-			enable_irq_lockdep(np->pci_dev->irq);
 	} else {
 		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
 			np->nic_poll_irq &= ~NVREG_IRQ_RX_ALL;
 			nv_nic_irq_rx(0, dev);
-			enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
 		}
 		if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
 			np->nic_poll_irq &= ~NVREG_IRQ_TX_ALL;
 			nv_nic_irq_tx(0, dev);
-			enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
 		}
 		if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
 			np->nic_poll_irq &= ~NVREG_IRQ_OTHER;
 			nv_nic_irq_other(0, dev);
-			enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
 		}
 	}
 
+	enable_irq_lockdep_irqrestore(irq, &flags);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER

From 8ce675ff39b9958d1c10f86cf58e357efaafc856 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Mon, 26 Oct 2015 12:46:37 -0400
Subject: [PATCH 72/79] RDS-TCP: Recover correctly from pskb_pull()/pksb_trim()
 failure in rds_tcp_data_recv

Either of pskb_pull() or pskb_trim() may fail under low memory conditions.
If rds_tcp_data_recv() ignores such failures, the application will
receive corrupted data because the skb has not been correctly
carved to the RDS datagram size.

Avoid this by handling pskb_pull/pskb_trim failure in the same
manner as the skb_clone failure: bail out of rds_tcp_data_recv(), and
retry via the deferred call to rds_send_worker() that gets set up on
ENOMEM from rds_tcp_read_sock()

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp_recv.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index fbc5ef88bc0e..27a992154804 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -214,8 +214,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
 			}
 
 			to_copy = min(tc->t_tinc_data_rem, left);
-			pskb_pull(clone, offset);
-			pskb_trim(clone, to_copy);
+			if (!pskb_pull(clone, offset) ||
+			    pskb_trim(clone, to_copy)) {
+				pr_warn("rds_tcp_data_recv: pull/trim failed "
+					"left %zu data_rem %zu skb_len %d\n",
+					left, tc->t_tinc_data_rem, skb->len);
+				kfree_skb(clone);
+				desc->error = -ENOMEM;
+				goto out;
+			}
 			skb_queue_tail(&tinc->ti_skb_list, clone);
 
 			rdsdebug("skb %p data %p len %d off %u to_copy %zu -> "

From 20986ed826cbb36bb8f2d77f872e3c52d8d30647 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Mon, 26 Oct 2015 17:13:54 -0500
Subject: [PATCH 73/79] amd-xgbe: Fix race between access of desc and desc
 index

During Tx cleanup it's still possible for the descriptor data to be
read ahead of the descriptor index. A memory barrier is required between
the read of the descriptor index and the start of the Tx cleanup loop.
This allows a change to a lighter-weight barrier in the Tx transmit
routine just before updating the current descriptor index.

Since the memory barrier does result in extra overhead on arm64, keep
the previous change to not chase the current descriptor value. This
prevents the execution of the barrier for each loop performed.

Suggested-by: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 2 +-
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index e9ab8b9f3b9c..f672dba345f7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1595,7 +1595,7 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 				  packet->rdesc_count, 1);
 
 	/* Make sure ownership is written to the descriptor */
-	wmb();
+	smp_wmb();
 
 	ring->cur = cur_index + 1;
 	if (!packet->skb->xmit_more ||
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index d2b77d985441..dde0486667e0 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1816,6 +1816,10 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
 		return 0;
 
 	cur = ring->cur;
+
+	/* Be sure we get ring->cur before accessing descriptor data */
+	smp_rmb();
+
 	txq = netdev_get_tx_queue(netdev, channel->queue_index);
 
 	while ((processed < XGBE_TX_DESC_MAX_PROC) &&

From 85ff8a43f39fa6d2f970b5e1e5c03df87abde242 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Mon, 26 Oct 2015 17:02:19 -0700
Subject: [PATCH 74/79] bpf: sample: define aarch64 specific registers

Define aarch64 specific registers for building bpf samples correctly.

Signed-off-by: Yang Shi <yang.shi@linaro.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/bpf_helpers.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 3a44d3a272af..af44e564d6dd 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -86,5 +86,17 @@ static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flag
 #define PT_REGS_RC(x) ((x)->gprs[2])
 #define PT_REGS_SP(x) ((x)->gprs[15])
 
+#elif defined(__aarch64__)
+
+#define PT_REGS_PARM1(x) ((x)->regs[0])
+#define PT_REGS_PARM2(x) ((x)->regs[1])
+#define PT_REGS_PARM3(x) ((x)->regs[2])
+#define PT_REGS_PARM4(x) ((x)->regs[3])
+#define PT_REGS_PARM5(x) ((x)->regs[4])
+#define PT_REGS_RET(x) ((x)->regs[30])
+#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[0])
+#define PT_REGS_SP(x) ((x)->sp)
+
 #endif
 #endif

From e407f39afdc0741dcf20aed100b8e738ccab7cb1 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 27 Oct 2015 11:37:39 +0200
Subject: [PATCH 75/79] vhost: fix performance on LE hosts

commit 2751c9882b947292fcfb084c4f604e01724af804 ("vhost: cross-endian
support for legacy devices") introduced a minor regression: even with
cross-endian disabled, and even on LE host, vhost_is_little_endian is
checking is_le flag so there's always a branch.

To fix, simply check virtio_legacy_is_little_endian first.

Cc: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vhost.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 4772862b71a7..d3f767448a72 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -183,10 +183,17 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
 	return vq->acked_features & (1ULL << bit);
 }
 
+#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
 static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
 {
 	return vq->is_le;
 }
+#else
+static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
+{
+	return virtio_legacy_is_little_endian() || vq->is_le;
+}
+#endif
 
 /* Memory accessors */
 static inline u16 vhost16_to_cpu(struct vhost_virtqueue *vq, __virtio16 val)

From 092bf0fc80f5fb7928244ad63d8a2a8df8a72a3e Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 27 Oct 2015 17:36:19 +0200
Subject: [PATCH 76/79] net/mlx4_en: Explicitly set no vlan tags in WQE ctrl
 segment when no vlan is present

We do not set the ins_vlan field to zero when no vlan id is present in the packet.

Since WQEs in the TX ring are not zeroed out between uses, this oversight
could result in having vlan flags present in the WQE ctrl segment when no
vlan is preset.

Fixes: e38af4faf01d ('net/mlx4_en: Add support for hardware accelerated 802.1ad vlan')
Reported-by: Gideon Naim <gideonn@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 494e7762fdb1..4421bf5463f6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -964,6 +964,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 			tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN;
 		else if (vlan_proto == ETH_P_8021Q)
 			tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN;
+		else
+			tx_desc->ctrl.ins_vlan = 0;
 
 		tx_desc->ctrl.fence_size = real_size;
 

From c02b05011fadf8e409e41910217ca689f2fc9d91 Mon Sep 17 00:00:00 2001
From: Carol L Soto <clsoto@linux.vnet.ibm.com>
Date: Tue, 27 Oct 2015 17:36:20 +0200
Subject: [PATCH 77/79] net/mlx4: Copy/set only sizeof struct mlx4_eqe bytes

When doing memcpy/memset of EQEs, we should use sizeof struct
mlx4_eqe as the base size and not caps.eqe_size which could be bigger.

If caps.eqe_size is bigger than the struct mlx4_eqe then we corrupt
data in the master context.

When using a 64 byte stride, the memcpy copied over 63 bytes to the
slave_eq structure.  This resulted in copying over the entire eqe of
interest, including its ownership bit -- and also 31 bytes of garbage
into the next WQE in the slave EQ -- which did NOT include the ownership
bit (and therefore had no impact).

However, once the stride is increased to 128, we are overwriting the
ownership bits of *three* eqes in the slave_eq struct.  This results
in an incorrect ownership bit for those eqes, which causes the eq to
seem to be full. The issue therefore surfaced only once 128-byte EQEs
started being used in SRIOV and (overarchitectures that have 128/256
byte cache-lines such as PPC) - e.g after commit 77507aa249ae
"net/mlx4_core: Enable CQE/EQE stride support".

Fixes: 08ff32352d6f ('mlx4: 64-byte CQE/EQE support')
Signed-off-by: Carol L Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +-
 drivers/net/ethernet/mellanox/mlx4/eq.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 0a3202047569..2177e56ed0be 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2398,7 +2398,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
 			}
 		}
 
-		memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
+		memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe));
 		priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
 		INIT_WORK(&priv->mfunc.master.comm_work,
 			  mlx4_master_comm_channel);
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index c34488479365..603d1c3d3b2e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -196,7 +196,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
 		return;
 	}
 
-	memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
+	memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1);
 	s_eqe->slave_id = slave;
 	/* ensure all information is written before setting the ownersip bit */
 	dma_wmb();

From 1e0d69a9cc9172d7896c2113f983a74f6e8ff303 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 28 Oct 2015 13:21:03 +0100
Subject: [PATCH 78/79] Revert "Merge branch 'ipv6-overflow-arith'"

Linus dislikes these changes. To not hold up the net-merge let's revert
it for now and fix the bug like Linus suggested.

This reverts commit ec3661b42257d9a06cf0d318175623ac7a660113, reversing
changes made to c80dbe04612986fd6104b4a1be21681b113b5ac9.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/compiler-gcc.h   |  4 ----
 include/linux/overflow-arith.h | 18 ------------------
 net/ipv6/ip6_output.c          |  6 +-----
 3 files changed, 1 insertion(+), 27 deletions(-)
 delete mode 100644 include/linux/overflow-arith.h

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 82c159e0532a..dfaa7b3e9ae9 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -237,10 +237,6 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
-#if GCC_VERSION >= 50000
-#define CC_HAVE_BUILTIN_OVERFLOW
-#endif
-
 #endif	/* gcc version >= 40000 specific checks */
 
 #if !defined(__noclone)
diff --git a/include/linux/overflow-arith.h b/include/linux/overflow-arith.h
deleted file mode 100644
index e12ccf854a70..000000000000
--- a/include/linux/overflow-arith.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include <linux/kernel.h>
-
-#ifdef CC_HAVE_BUILTIN_OVERFLOW
-
-#define overflow_usub __builtin_usub_overflow
-
-#else
-
-static inline bool overflow_usub(unsigned int a, unsigned int b,
-				 unsigned int *res)
-{
-	*res = a - b;
-	return *res > a ? true : false;
-}
-
-#endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8dddb45c433e..d03d6da772f3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -28,7 +28,6 @@
 
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/overflow-arith.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/net.h>
@@ -585,10 +584,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 		if (np->frag_size)
 			mtu = np->frag_size;
 	}
-
-	if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) ||
-	    mtu <= 7)
-		goto fail_toobig;
+	mtu -= hlen + sizeof(struct frag_hdr);
 
 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 				    &ipv6_hdr(skb)->saddr);

From 89bc7848a91bc99532f5c21b2885472ba710f249 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 28 Oct 2015 13:21:04 +0100
Subject: [PATCH 79/79] ipv6: protect mtu calculation of wrap-around and
 infinite loop by rounding issues

Raw sockets with hdrincl enabled can insert ipv6 extension headers
right into the data stream. In case we need to fragment those packets,
we reparse the options header to find the place where we can insert
the fragment header. If the extension headers exceed the link's MTU we
actually cannot make progress in such a case.

Instead of ending up in broken arithmetic or rounding towards 0 and
entering an endless loop in ip6_fragment, just prevent those cases by
aborting early and signal -EMSGSIZE to user space.

This is the second version of the patch which doesn't use the
overflow_usub function, which got reverted for now.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d03d6da772f3..f84ec4e9b2de 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -584,6 +584,8 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 		if (np->frag_size)
 			mtu = np->frag_size;
 	}
+	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
+		goto fail_toobig;
 	mtu -= hlen + sizeof(struct frag_hdr);
 
 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,