From 397f385bdba6cdf7752467a7ae81810340929e44 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 19 May 2010 10:30:49 +0900 Subject: [PATCH 01/30] ath5k: wake queues on reset We can wake all queues after a chip reset since everything should be set up and we are ready to transmit. If we don't do that we might end up starting up with stopped queues, not beeing able to transmit. (This started to happen after "ath5k: clean up queue manipulation" but since periodic calibration also stopped and started the queues this effect was hidden most of the time). This way we can also get rid of the superfluous ath5k_reset_wake() function. Signed-off-by: Bruno Randolf Acked-by: Nick Kossifidis Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath5k/base.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index cc6d41dec332..2978359c4366 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -222,7 +222,6 @@ static int ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb); static int ath5k_tx_queue(struct ieee80211_hw *hw, struct sk_buff *skb, struct ath5k_txq *txq); static int ath5k_reset(struct ath5k_softc *sc, struct ieee80211_channel *chan); -static int ath5k_reset_wake(struct ath5k_softc *sc); static int ath5k_start(struct ieee80211_hw *hw); static void ath5k_stop(struct ieee80211_hw *hw); static int ath5k_add_interface(struct ieee80211_hw *hw, @@ -2770,7 +2769,7 @@ ath5k_tasklet_reset(unsigned long data) { struct ath5k_softc *sc = (void *)data; - ath5k_reset_wake(sc); + ath5k_reset(sc, sc->curchan); } /* @@ -2941,23 +2940,13 @@ ath5k_reset(struct ath5k_softc *sc, struct ieee80211_channel *chan) ath5k_beacon_config(sc); /* intrs are enabled by ath5k_beacon_config */ + ieee80211_wake_queues(sc->hw); + return 0; err: return ret; } -static int -ath5k_reset_wake(struct ath5k_softc *sc) -{ - int ret; - - ret = ath5k_reset(sc, sc->curchan); - if (!ret) - ieee80211_wake_queues(sc->hw); - - return ret; -} - static int ath5k_start(struct ieee80211_hw *hw) { return ath5k_init(hw->priv); From 8ae5977ff95c03fe6c36a5721c57dcb4bfe4f290 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 30 May 2010 14:52:58 +0200 Subject: [PATCH 02/30] mac80211: fix blockack-req processing Daniel reported that the paged RX changes had broken blockack request frame processing due to using data that wasn't really part of the skb data. Fix this using skb_copy_bits() for the needed data. As a side effect, this adds a check on processing too short frames, which previously this code could do. Reported-by: Daniel Halperin Signed-off-by: Johannes Berg Acked-by: Daniel Halperin Signed-off-by: John W. Linville --- net/mac80211/rx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6e2a7bcd8cb8..5e0b65406c44 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1818,17 +1818,26 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) return RX_CONTINUE; if (ieee80211_is_back_req(bar->frame_control)) { + struct { + __le16 control, start_seq_num; + } __packed bar_data; + if (!rx->sta) return RX_DROP_MONITOR; + + if (skb_copy_bits(skb, offsetof(struct ieee80211_bar, control), + &bar_data, sizeof(bar_data))) + return RX_DROP_MONITOR; + spin_lock(&rx->sta->lock); - tid = le16_to_cpu(bar->control) >> 12; + tid = le16_to_cpu(bar_data.control) >> 12; if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) { spin_unlock(&rx->sta->lock); return RX_DROP_MONITOR; } tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid]; - start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4; + start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; /* reset session timer */ if (tid_agg_rx->timeout) From 51a0d38de26226f2779912d92f155b93d539da9a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 31 May 2010 12:00:12 +0200 Subject: [PATCH 03/30] mac80211: fix dialog token allocator The dialog token allocator has apparently been broken since b83f4e15 ("mac80211: fix deadlock in sta->lock") because it got moved out under the spinlock. Fix it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c163d0a149f4..98258b7341e3 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -332,14 +332,16 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) IEEE80211_QUEUE_STOP_REASON_AGGREGATION); spin_unlock(&local->ampdu_lock); - spin_unlock_bh(&sta->lock); - /* send an addBA request */ + /* prepare tid data */ sta->ampdu_mlme.dialog_token_allocator++; sta->ampdu_mlme.tid_tx[tid]->dialog_token = sta->ampdu_mlme.dialog_token_allocator; sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + spin_unlock_bh(&sta->lock); + + /* send AddBA request */ ieee80211_send_addba_request(sdata, pubsta->addr, tid, sta->ampdu_mlme.tid_tx[tid]->dialog_token, sta->ampdu_mlme.tid_tx[tid]->ssn, From fafeeb6c80e3842c6dc19d05de09a23f23eef0d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Jun 2010 10:04:49 +0000 Subject: [PATCH 04/30] xfrm: force a dst reference in __xfrm_route_forward() Packets going through __xfrm_route_forward() have a not refcounted dst entry, since we enabled a noref forwarding path. xfrm_lookup() might incorrectly release this dst entry. It's a bit late to make invasive changes in xfrm_lookup(), so lets force a refcount in this path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d965a2bad8d3..4bf27d901333 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2153,6 +2153,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) return 0; } + skb_dst_force(skb); dst = skb_dst(skb); res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; From f048fa9c8686119c3858a463cab6121dced7c0bf Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 1 Jun 2010 15:05:36 +0000 Subject: [PATCH 05/30] bnx2: Fix hang during rmmod bnx2. The regression is caused by: commit 4327ba435a56ada13eedf3eb332e583c7a0586a9 bnx2: Fix netpoll crash. If ->open() and ->close() are called multiple times, the same napi structs will be added to dev->napi_list multiple times, corrupting the dev->napi_list. This causes free_netdev() to hang during rmmod. We fix this by calling netif_napi_del() during ->close(). Also, bnx2_init_napi() must not be in the __devinit section since it is called by ->open(). Signed-off-by: Michael Chan Signed-off-by: Benjamin Li Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 188e356c30a3..949d7a9dcf92 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -247,6 +247,7 @@ static const struct flash_spec flash_5709 = { MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl); static void bnx2_init_napi(struct bnx2 *bp); +static void bnx2_del_napi(struct bnx2 *bp); static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr) { @@ -6270,6 +6271,7 @@ open_err: bnx2_free_skbs(bp); bnx2_free_irq(bp); bnx2_free_mem(bp); + bnx2_del_napi(bp); return rc; } @@ -6537,6 +6539,7 @@ bnx2_close(struct net_device *dev) bnx2_free_irq(bp); bnx2_free_skbs(bp); bnx2_free_mem(bp); + bnx2_del_napi(bp); bp->link_up = 0; netif_carrier_off(bp->dev); bnx2_set_power_state(bp, PCI_D3hot); @@ -8227,7 +8230,16 @@ bnx2_bus_string(struct bnx2 *bp, char *str) return str; } -static void __devinit +static void +bnx2_del_napi(struct bnx2 *bp) +{ + int i; + + for (i = 0; i < bp->irq_nvecs; i++) + netif_napi_del(&bp->bnx2_napi[i].napi); +} + +static void bnx2_init_napi(struct bnx2 *bp) { int i; From 08f382ebb8a9efb898840aa74cf55148c7a98af6 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Tue, 1 Jun 2010 08:59:33 +0000 Subject: [PATCH 06/30] enic: bug fix: make the set/get netlink VF_PORT support symmetrical To make get/set netlink VF_PORT truly symmetrical, we need to keep track of what items are set and only return those items on get. Previously, the driver wasn't differentiating between a set of attr with a NULL string, for example, and not setting the attr at all. We only want to return the NULL string if the attr was actually set with a NULL string. Otherwise, don't return the attr. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- drivers/net/enic/enic.h | 7 ++ drivers/net/enic/enic_main.c | 206 +++++++++++++++++------------------ 2 files changed, 107 insertions(+), 106 deletions(-) diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h index 85f2a2e7030a..45e86d1e5b1b 100644 --- a/drivers/net/enic/enic.h +++ b/drivers/net/enic/enic.h @@ -74,7 +74,14 @@ struct enic_msix_entry { void *devid; }; +#define ENIC_SET_APPLIED (1 << 0) +#define ENIC_SET_REQUEST (1 << 1) +#define ENIC_SET_NAME (1 << 2) +#define ENIC_SET_INSTANCE (1 << 3) +#define ENIC_SET_HOST (1 << 4) + struct enic_port_profile { + u32 set; u8 request; char name[PORT_PROFILE_MAX]; u8 instance_uuid[PORT_UUID_MAX]; diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index 6586b5c7e4b6..bc7d6b96de3d 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c @@ -1029,8 +1029,7 @@ static int enic_dev_init_done(struct enic *enic, int *done, int *error) return err; } -static int enic_set_port_profile(struct enic *enic, u8 request, u8 *mac, - char *name, u8 *instance_uuid, u8 *host_uuid) +static int enic_set_port_profile(struct enic *enic, u8 *mac) { struct vic_provinfo *vp; u8 oui[3] = VIC_PROVINFO_CISCO_OUI; @@ -1040,97 +1039,112 @@ static int enic_set_port_profile(struct enic *enic, u8 request, u8 *mac, "%02X%02X-%02X%02X%02X%02X%0X%02X"; int err; - if (!name) - return -EINVAL; - - if (!is_valid_ether_addr(mac)) - return -EADDRNOTAVAIL; - - vp = vic_provinfo_alloc(GFP_KERNEL, oui, VIC_PROVINFO_LINUX_TYPE); - if (!vp) - return -ENOMEM; - - vic_provinfo_add_tlv(vp, - VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR, - strlen(name) + 1, name); - - vic_provinfo_add_tlv(vp, - VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR, - ETH_ALEN, mac); - - if (instance_uuid) { - uuid = instance_uuid; - sprintf(uuid_str, uuid_fmt, - uuid[0], uuid[1], uuid[2], uuid[3], - uuid[4], uuid[5], uuid[6], uuid[7], - uuid[8], uuid[9], uuid[10], uuid[11], - uuid[12], uuid[13], uuid[14], uuid[15]); - vic_provinfo_add_tlv(vp, - VIC_LINUX_PROV_TLV_CLIENT_UUID_STR, - sizeof(uuid_str), uuid_str); - } - - if (host_uuid) { - uuid = host_uuid; - sprintf(uuid_str, uuid_fmt, - uuid[0], uuid[1], uuid[2], uuid[3], - uuid[4], uuid[5], uuid[6], uuid[7], - uuid[8], uuid[9], uuid[10], uuid[11], - uuid[12], uuid[13], uuid[14], uuid[15]); - vic_provinfo_add_tlv(vp, - VIC_LINUX_PROV_TLV_HOST_UUID_STR, - sizeof(uuid_str), uuid_str); - } - err = enic_vnic_dev_deinit(enic); if (err) - goto err_out; + return err; - memset(&enic->pp, 0, sizeof(enic->pp)); + switch (enic->pp.request) { - err = enic_dev_init_prov(enic, vp); - if (err) - goto err_out; + case PORT_REQUEST_ASSOCIATE: - enic->pp.request = request; - memcpy(enic->pp.name, name, PORT_PROFILE_MAX); - if (instance_uuid) - memcpy(enic->pp.instance_uuid, - instance_uuid, PORT_UUID_MAX); - if (host_uuid) - memcpy(enic->pp.host_uuid, - host_uuid, PORT_UUID_MAX); + if (!(enic->pp.set & ENIC_SET_NAME) || !strlen(enic->pp.name)) + return -EINVAL; -err_out: - vic_provinfo_free(vp); + if (!is_valid_ether_addr(mac)) + return -EADDRNOTAVAIL; - return err; -} + vp = vic_provinfo_alloc(GFP_KERNEL, oui, + VIC_PROVINFO_LINUX_TYPE); + if (!vp) + return -ENOMEM; -static int enic_unset_port_profile(struct enic *enic) -{ - memset(&enic->pp, 0, sizeof(enic->pp)); - return enic_vnic_dev_deinit(enic); + vic_provinfo_add_tlv(vp, + VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR, + strlen(enic->pp.name) + 1, enic->pp.name); + + vic_provinfo_add_tlv(vp, + VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR, + ETH_ALEN, mac); + + if (enic->pp.set & ENIC_SET_INSTANCE) { + uuid = enic->pp.instance_uuid; + sprintf(uuid_str, uuid_fmt, + uuid[0], uuid[1], uuid[2], uuid[3], + uuid[4], uuid[5], uuid[6], uuid[7], + uuid[8], uuid[9], uuid[10], uuid[11], + uuid[12], uuid[13], uuid[14], uuid[15]); + vic_provinfo_add_tlv(vp, + VIC_LINUX_PROV_TLV_CLIENT_UUID_STR, + sizeof(uuid_str), uuid_str); + } + + if (enic->pp.set & ENIC_SET_HOST) { + uuid = enic->pp.host_uuid; + sprintf(uuid_str, uuid_fmt, + uuid[0], uuid[1], uuid[2], uuid[3], + uuid[4], uuid[5], uuid[6], uuid[7], + uuid[8], uuid[9], uuid[10], uuid[11], + uuid[12], uuid[13], uuid[14], uuid[15]); + vic_provinfo_add_tlv(vp, + VIC_LINUX_PROV_TLV_HOST_UUID_STR, + sizeof(uuid_str), uuid_str); + } + + err = enic_dev_init_prov(enic, vp); + vic_provinfo_free(vp); + if (err) + return err; + break; + + case PORT_REQUEST_DISASSOCIATE: + break; + + default: + return -EINVAL; + } + + enic->pp.set |= ENIC_SET_APPLIED; + return 0; } static int enic_set_vf_port(struct net_device *netdev, int vf, struct nlattr *port[]) { struct enic *enic = netdev_priv(netdev); - char *name = NULL; - u8 *instance_uuid = NULL; - u8 *host_uuid = NULL; - u8 request = PORT_REQUEST_DISASSOCIATE; + + memset(&enic->pp, 0, sizeof(enic->pp)); + + if (port[IFLA_PORT_REQUEST]) { + enic->pp.set |= ENIC_SET_REQUEST; + enic->pp.request = nla_get_u8(port[IFLA_PORT_REQUEST]); + } + + if (port[IFLA_PORT_PROFILE]) { + enic->pp.set |= ENIC_SET_NAME; + memcpy(enic->pp.name, nla_data(port[IFLA_PORT_PROFILE]), + PORT_PROFILE_MAX); + } + + if (port[IFLA_PORT_INSTANCE_UUID]) { + enic->pp.set |= ENIC_SET_INSTANCE; + memcpy(enic->pp.instance_uuid, + nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX); + } + + if (port[IFLA_PORT_HOST_UUID]) { + enic->pp.set |= ENIC_SET_HOST; + memcpy(enic->pp.host_uuid, + nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX); + } /* don't support VFs, yet */ if (vf != PORT_SELF_VF) return -EOPNOTSUPP; - if (port[IFLA_PORT_REQUEST]) - request = nla_get_u8(port[IFLA_PORT_REQUEST]); + if (!(enic->pp.set & ENIC_SET_REQUEST)) + return -EOPNOTSUPP; - switch (request) { - case PORT_REQUEST_ASSOCIATE: + if (enic->pp.request == PORT_REQUEST_ASSOCIATE) { /* If the interface mac addr hasn't been assigned, * assign a random mac addr before setting port- @@ -1139,30 +1153,9 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, if (is_zero_ether_addr(netdev->dev_addr)) random_ether_addr(netdev->dev_addr); - - if (port[IFLA_PORT_PROFILE]) - name = nla_data(port[IFLA_PORT_PROFILE]); - - if (port[IFLA_PORT_INSTANCE_UUID]) - instance_uuid = - nla_data(port[IFLA_PORT_INSTANCE_UUID]); - - if (port[IFLA_PORT_HOST_UUID]) - host_uuid = nla_data(port[IFLA_PORT_HOST_UUID]); - - return enic_set_port_profile(enic, request, - netdev->dev_addr, name, - instance_uuid, host_uuid); - - case PORT_REQUEST_DISASSOCIATE: - - return enic_unset_port_profile(enic); - - default: - break; } - return -EOPNOTSUPP; + return enic_set_port_profile(enic, netdev->dev_addr); } static int enic_get_vf_port(struct net_device *netdev, int vf, @@ -1172,14 +1165,12 @@ static int enic_get_vf_port(struct net_device *netdev, int vf, int err, error, done; u16 response = PORT_PROFILE_RESPONSE_SUCCESS; - /* don't support VFs, yet */ - if (vf != PORT_SELF_VF) - return -EOPNOTSUPP; + if (!(enic->pp.set & ENIC_SET_APPLIED)) + return -ENODATA; err = enic_dev_init_done(enic, &done, &error); - if (err) - return err; + error = err; switch (error) { case ERR_SUCCESS: @@ -1202,12 +1193,15 @@ static int enic_get_vf_port(struct net_device *netdev, int vf, NLA_PUT_U16(skb, IFLA_PORT_REQUEST, enic->pp.request); NLA_PUT_U16(skb, IFLA_PORT_RESPONSE, response); - NLA_PUT(skb, IFLA_PORT_PROFILE, PORT_PROFILE_MAX, - enic->pp.name); - NLA_PUT(skb, IFLA_PORT_INSTANCE_UUID, PORT_UUID_MAX, - enic->pp.instance_uuid); - NLA_PUT(skb, IFLA_PORT_HOST_UUID, PORT_UUID_MAX, - enic->pp.host_uuid); + if (enic->pp.set & ENIC_SET_NAME) + NLA_PUT(skb, IFLA_PORT_PROFILE, PORT_PROFILE_MAX, + enic->pp.name); + if (enic->pp.set & ENIC_SET_INSTANCE) + NLA_PUT(skb, IFLA_PORT_INSTANCE_UUID, PORT_UUID_MAX, + enic->pp.instance_uuid); + if (enic->pp.set & ENIC_SET_HOST) + NLA_PUT(skb, IFLA_PORT_HOST_UUID, PORT_UUID_MAX, + enic->pp.host_uuid); return 0; From 194dbcc8a1a97cbac9a619a563e5f6b7f7d5a485 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 12 May 2010 21:31:06 +0000 Subject: [PATCH 07/30] net: init_vlan should not copy slave or master flags The vlan device should not copy the slave or master flags from the real device. It is not in the bond until added nor is it a master. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 55be90826f5f..529842677817 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -708,7 +708,8 @@ static int vlan_dev_init(struct net_device *dev) netif_carrier_off(dev); /* IFF_BROADCAST|IFF_MULTICAST; ??? */ - dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); + dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | + IFF_MASTER | IFF_SLAVE); dev->iflink = real_dev->ifindex; dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))) | From 2df4a0fa1540c460ec69788ab2a901cc72a75644 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 12 May 2010 21:31:11 +0000 Subject: [PATCH 08/30] net: fix conflict between null_or_orig and null_or_bond If a skb is received on an inactive bond that does not meet the special cases checked for by skb_bond_should_drop it should only be delivered to exact matches as the comment in netif_receive_skb() says. However because null_or_bond could also be null this is not always true. This patch renames null_or_bond to orig_or_bond and initializes it to orig_dev. This keeps the intent of null_or_bond to pass frames received on VLAN interfaces stacked on bonding interfaces without invalidating the statement for null_or_orig. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1845b08c624e..d03470f5260a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2795,7 +2795,7 @@ static int __netif_receive_skb(struct sk_buff *skb) struct net_device *orig_dev; struct net_device *master; struct net_device *null_or_orig; - struct net_device *null_or_bond; + struct net_device *orig_or_bond; int ret = NET_RX_DROP; __be16 type; @@ -2868,10 +2868,10 @@ ncls: * device that may have registered for a specific ptype. The * handler may have to adjust skb->dev and orig_dev. */ - null_or_bond = NULL; + orig_or_bond = orig_dev; if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - null_or_bond = vlan_dev_real_dev(skb->dev); + orig_or_bond = vlan_dev_real_dev(skb->dev); } type = skb->protocol; @@ -2879,7 +2879,7 @@ ncls: &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && (ptype->dev == null_or_orig || ptype->dev == skb->dev || ptype->dev == orig_dev || - ptype->dev == null_or_bond)) { + ptype->dev == orig_or_bond)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; From ceb3d2394532540a52ce34f71e67c8d008913f79 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 29 May 2010 13:23:34 +0000 Subject: [PATCH 09/30] korina: fix deadlock on RX FIFO overrun By calling korina_restart(), the IRQ handler tries to disable the interrupt it's currently serving. This leads to a deadlock since disable_irq() waits for any running IRQ handlers to finish before returning. This patch addresses the issue by turning korina_restart() into a workqueue task, which is then scheduled when needed. Reproducing the deadlock is easily done using e.g. GNU netcat to send large amounts of UDP data to the host running this driver. Note that the same problem (and fix) applies to TX FIFO underruns, but apparently these are less easy to trigger. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- drivers/net/korina.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index 26bf1b76b997..13533f937e05 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -135,6 +135,7 @@ struct korina_private { struct napi_struct napi; struct timer_list media_check_timer; struct mii_if_info mii_if; + struct work_struct restart_task; struct net_device *dev; int phy_addr; }; @@ -890,12 +891,12 @@ static int korina_init(struct net_device *dev) /* * Restart the RC32434 ethernet controller. - * FIXME: check the return status where we call it */ -static int korina_restart(struct net_device *dev) +static void korina_restart_task(struct work_struct *work) { - struct korina_private *lp = netdev_priv(dev); - int ret; + struct korina_private *lp = container_of(work, + struct korina_private, restart_task); + struct net_device *dev = lp->dev; /* * Disable interrupts @@ -916,10 +917,9 @@ static int korina_restart(struct net_device *dev) napi_disable(&lp->napi); - ret = korina_init(dev); - if (ret < 0) { + if (korina_init(dev) < 0) { printk(KERN_ERR "%s: cannot restart device\n", dev->name); - return ret; + return; } korina_multicast_list(dev); @@ -927,8 +927,6 @@ static int korina_restart(struct net_device *dev) enable_irq(lp->ovr_irq); enable_irq(lp->tx_irq); enable_irq(lp->rx_irq); - - return ret; } static void korina_clear_and_restart(struct net_device *dev, u32 value) @@ -937,7 +935,7 @@ static void korina_clear_and_restart(struct net_device *dev, u32 value) netif_stop_queue(dev); writel(value, &lp->eth_regs->ethintfc); - korina_restart(dev); + schedule_work(&lp->restart_task); } /* Ethernet Tx Underflow interrupt */ @@ -962,11 +960,8 @@ static irqreturn_t korina_und_interrupt(int irq, void *dev_id) static void korina_tx_timeout(struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); - unsigned long flags; - spin_lock_irqsave(&lp->lock, flags); - korina_restart(dev); - spin_unlock_irqrestore(&lp->lock, flags); + schedule_work(&lp->restart_task); } /* Ethernet Rx Overflow interrupt */ @@ -1086,6 +1081,8 @@ static int korina_close(struct net_device *dev) napi_disable(&lp->napi); + cancel_work_sync(&lp->restart_task); + free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); free_irq(lp->ovr_irq, dev); @@ -1198,6 +1195,8 @@ static int korina_probe(struct platform_device *pdev) } setup_timer(&lp->media_check_timer, korina_poll_media, (unsigned long) dev); + INIT_WORK(&lp->restart_task, korina_restart_task); + printk(KERN_INFO "%s: " DRV_NAME "-" DRV_VERSION " " DRV_RELDATE "\n", dev->name); out: From 53ee490ac5836d506ea5830f821045aafa3c196f Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 29 May 2010 13:23:35 +0000 Subject: [PATCH 10/30] korina: use netdev_alloc_skb_ip_align() here, too This patch completes commit 89d71a66c40d629e3b1285def543ab1425558cd5 which missed this spot, as it seems. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- drivers/net/korina.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index 13533f937e05..3e9b6b7be42e 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -765,10 +765,9 @@ static int korina_alloc_ring(struct net_device *dev) /* Initialize the receive descriptors */ for (i = 0; i < KORINA_NUM_RDS; i++) { - skb = dev_alloc_skb(KORINA_RBSIZE + 2); + skb = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE); if (!skb) return -ENOMEM; - skb_reserve(skb, 2); lp->rx_skb[i] = skb; lp->rd_ring[i].control = DMA_DESC_IOD | DMA_COUNT(KORINA_RBSIZE); From b1011b375be106e0a312baafc981a26165283efe Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 29 May 2010 13:23:36 +0000 Subject: [PATCH 11/30] korina: count RX DMA OVR as rx_fifo_error This way, RX DMA overruns (actually being caused by overrun of the 512byte input FIFO) show up in ifconfig output. The rx_fifo_errors counter is unused otherwise. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- drivers/net/korina.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index 3e9b6b7be42e..c7a9bef4dfb0 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -376,7 +376,7 @@ static int korina_rx(struct net_device *dev, int limit) if (devcs & ETH_RX_LE) dev->stats.rx_length_errors++; if (devcs & ETH_RX_OVR) - dev->stats.rx_over_errors++; + dev->stats.rx_fifo_errors++; if (devcs & ETH_RX_CV) dev->stats.rx_frame_errors++; if (devcs & ETH_RX_CES) From e3fe8558c7fc182972c3d947d88744482111f304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20B=C3=A9nard?= Date: Wed, 2 Jun 2010 06:13:34 -0700 Subject: [PATCH 12/30] net/fec: fix pm to survive to suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * in the actual driver, calling fec_stop and fec_enet_init doesn't allow to have a working network interface at resume (where a ifconfig down and up is required to recover the interface) * by using fec_enet_close and fec_enet_open, this patch solves this problem and handle the case where the link changed between suspend and resume * this patch also disable clock at suspend and reenable it at resume Signed-off-by: Eric BĂ©nard Signed-off-by: David S. Miller --- drivers/net/fec.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/fec.c b/drivers/net/fec.c index ddf7a86cd466..edfff92a6d8e 100644 --- a/drivers/net/fec.c +++ b/drivers/net/fec.c @@ -1373,10 +1373,9 @@ fec_suspend(struct platform_device *dev, pm_message_t state) if (ndev) { fep = netdev_priv(ndev); - if (netif_running(ndev)) { - netif_device_detach(ndev); - fec_stop(ndev); - } + if (netif_running(ndev)) + fec_enet_close(ndev); + clk_disable(fep->clk); } return 0; } @@ -1385,12 +1384,13 @@ static int fec_resume(struct platform_device *dev) { struct net_device *ndev = platform_get_drvdata(dev); + struct fec_enet_private *fep; if (ndev) { - if (netif_running(ndev)) { - fec_enet_init(ndev, 0); - netif_device_attach(ndev); - } + fep = netdev_priv(ndev); + clk_enable(fep->clk); + if (netif_running(ndev)) + fec_enet_open(ndev); } return 0; } From 33c29dde7d04dc0ec0edb649d20ccf1351c13a06 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 May 2010 14:26:59 +0000 Subject: [PATCH 13/30] act_nat: fix the wrong checksum when addr isn't in old_addr/mask fix the wrong checksum when addr isn't in old_addr/mask For TCP and UDP packets, when addr isn't in old_addr/mask we don't do SNAT or DNAT, and we should not update layer 4 checksum. Signed-off-by: Changli Gao ---- net/sched/act_nat.c | 4 ++++ 1 file changed, 4 insertions(+) Signed-off-by: David S. Miller --- net/sched/act_nat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index d885ba311564..570949417f38 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -159,6 +159,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, iph->daddr = new_addr; csum_replace4(&iph->check, addr, new_addr); + } else if ((iph->frag_off & htons(IP_OFFSET)) || + iph->protocol != IPPROTO_ICMP) { + goto out; } ihl = iph->ihl * 4; @@ -247,6 +250,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, break; } +out: return action; drop: From edafe502404f3669d364b6e96d79b54067b634b4 Mon Sep 17 00:00:00 2001 From: Daniele Lacamera Date: Wed, 2 Jun 2010 02:02:04 +0000 Subject: [PATCH 14/30] TCP: tcp_hybla: Fix integer overflow in slow start increment For large values of rtt, 2^rho operation may overflow u32. Clamp down the increment to 2^16. Signed-off-by: Daniele Lacamera Signed-off-by: David S. Miller --- net/ipv4/tcp_hybla.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index c209e054a634..377bc9349371 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -126,8 +126,8 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * calculate 2^fract in a <<7 value. */ is_slowstart = 1; - increment = ((1 << ca->rho) * hybla_fraction(rho_fractions)) - - 128; + increment = ((1 << min(ca->rho, 16U)) * + hybla_fraction(rho_fractions)) - 128; } else { /* * congestion avoidance From fbc2e7d9cf49e0bf89b9e91fd60a06851a855c5d Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 2 Jun 2010 07:32:42 -0700 Subject: [PATCH 15/30] cls_u32: use skb_header_pointer() to dereference data safely use skb_header_pointer() to dereference data safely the original skb->data dereference isn't safe, as there isn't any skb->len or skb_is_nonlinear() check. skb_header_pointer() is used instead in this patch. And when the skb isn't long enough, we terminate the function u32_classify() immediately with -1. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 96275422c619..4f522143811e 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -98,11 +98,11 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re { struct { struct tc_u_knode *knode; - u8 *ptr; + unsigned int off; } stack[TC_U32_MAXDEPTH]; struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; - u8 *ptr = skb_network_header(skb); + unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; @@ -134,8 +134,14 @@ next_knode: #endif for (i = n->sel.nkeys; i>0; i--, key++) { + unsigned int toff; + __be32 *data, _data; - if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { + toff = off + key->off + (off2 & key->offmask); + data = skb_header_pointer(skb, toff, 4, &_data); + if (!data) + goto out; + if ((*data ^ key->val) & key->mask) { n = n->next; goto next_knode; } @@ -174,29 +180,45 @@ check_terminal: if (sdepth >= TC_U32_MAXDEPTH) goto deadloop; stack[sdepth].knode = n; - stack[sdepth].ptr = ptr; + stack[sdepth].off = off; sdepth++; ht = n->ht_down; sel = 0; - if (ht->divisor) - sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift); + if (ht->divisor) { + __be32 *data, _data; + data = skb_header_pointer(skb, off + n->sel.hoff, 4, + &_data); + if (!data) + goto out; + sel = ht->divisor & u32_hash_fold(*data, &n->sel, + n->fshift); + } if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) goto next_ht; if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; - if (n->sel.flags&TC_U32_VAROFFSET) - off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift; + if (n->sel.flags & TC_U32_VAROFFSET) { + __be16 *data, _data; + + data = skb_header_pointer(skb, + off + n->sel.offoff, + 2, &_data); + if (!data) + goto out; + off2 += ntohs(n->sel.offmask & *data) >> + n->sel.offshift; + } off2 &= ~3; } if (n->sel.flags&TC_U32_EAT) { - ptr += off2; + off += off2; off2 = 0; } - if (ptr < skb_tail_pointer(skb)) + if (off < skb->len) goto next_ht; } @@ -204,9 +226,10 @@ check_terminal: if (sdepth--) { n = stack[sdepth].knode; ht = n->ht_up; - ptr = stack[sdepth].ptr; + off = stack[sdepth].off; goto check_terminal; } +out: return -1; deadloop: From d42a8f464ba14467e5d45dc0eb8f789c82bd0679 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 1 Jun 2010 11:32:43 +0000 Subject: [PATCH 16/30] sfc: Get port number from CS_PORT_NUM, not PCI function number A single shared memory region used to communicate with firmware is mapped into both PCI PFs of the SFC9020 and SFL9021. Drivers must be able to identify which port they are addressing in order to use the correct sub-region. Currently we use the PCI function number, but the PCI address may be virtualised. Use the CS_PORT_NUM register field defined for just this purpose. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/sfc/net_driver.h | 4 +++- drivers/net/sfc/siena.c | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h index 2e6fd89f2a72..5fffd9abffde 100644 --- a/drivers/net/sfc/net_driver.h +++ b/drivers/net/sfc/net_driver.h @@ -645,6 +645,7 @@ union efx_multicast_hash { * struct efx_nic - an Efx NIC * @name: Device name (net device name or bus id before net device registered) * @pci_dev: The PCI device + * @port_num: Index of this host port within the controller * @type: Controller type attributes * @legacy_irq: IRQ number * @workqueue: Workqueue for port reconfigures and the HW monitor. @@ -728,6 +729,7 @@ union efx_multicast_hash { struct efx_nic { char name[IFNAMSIZ]; struct pci_dev *pci_dev; + unsigned port_num; const struct efx_nic_type *type; int legacy_irq; struct workqueue_struct *workqueue; @@ -830,7 +832,7 @@ static inline const char *efx_dev_name(struct efx_nic *efx) static inline unsigned int efx_port_num(struct efx_nic *efx) { - return PCI_FUNC(efx->pci_dev->devfn); + return efx->port_num; } /** diff --git a/drivers/net/sfc/siena.c b/drivers/net/sfc/siena.c index 727b4228e081..7ecd255a7cc0 100644 --- a/drivers/net/sfc/siena.c +++ b/drivers/net/sfc/siena.c @@ -206,6 +206,7 @@ static int siena_probe_nic(struct efx_nic *efx) { struct siena_nic_data *nic_data; bool already_attached = 0; + efx_oword_t reg; int rc; /* Allocate storage for hardware specific data */ @@ -220,6 +221,9 @@ static int siena_probe_nic(struct efx_nic *efx) goto fail1; } + efx_reado(efx, ®, FR_AZ_CS_DEBUG); + efx->port_num = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; + efx_mcdi_init(efx); /* Recover from a failed assertion before probing */ From 3b21b508ecc9e043839a5337563cfc77f9fcedb9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Jun 2010 13:43:15 +0000 Subject: [PATCH 17/30] e1000e: change logical negate to bitwise The bitwise negate is intended here. With the logical negate the condition is always false. Signed-off-by: Dan Carpenter Acked-by: Bruce Allan Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 24507f3b8b17..57a7e41da69e 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -2554,7 +2554,7 @@ static void e1000_init_manageability_pt(struct e1000_adapter *adapter) mdef = er32(MDEF(i)); /* Ignore filters with anything other than IPMI ports */ - if (mdef & !(E1000_MDEF_PORT_623 | E1000_MDEF_PORT_664)) + if (mdef & ~(E1000_MDEF_PORT_623 | E1000_MDEF_PORT_664)) continue; /* Enable this decision filter in MANC2H */ From 60a5711db646b87b9530b16cbaf3bd53ac5594a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Jun 2010 23:56:13 +0000 Subject: [PATCH 18/30] isdn/kcapi: return -EFAULT on copy_from_user errors copy_from_user() returns the number of bytes remaining but we should return -EFAULT here. The error code gets returned to the user. Both old_capi_manufacturer() and capi20_manufacturer() had other places that already returned -EFAULT so this won't break anything. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/isdn/capi/kcapi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index bde3c88b8b27..b054494df846 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -1020,12 +1020,12 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data) if (cmd == AVMB1_ADDCARD) { if ((retval = copy_from_user(&cdef, data, sizeof(avmb1_carddef)))) - return retval; + return -EFAULT; cdef.cardtype = AVM_CARDTYPE_B1; } else { if ((retval = copy_from_user(&cdef, data, sizeof(avmb1_extcarddef)))) - return retval; + return -EFAULT; } cparams.port = cdef.port; cparams.irq = cdef.irq; @@ -1218,7 +1218,7 @@ int capi20_manufacturer(unsigned int cmd, void __user *data) kcapi_carddef cdef; if ((retval = copy_from_user(&cdef, data, sizeof(cdef)))) - return retval; + return -EFAULT; cparams.port = cdef.port; cparams.irq = cdef.irq; From d23380701876dd93d310b2548c51d0f78f25d7aa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 3 Jun 2010 00:05:35 +0000 Subject: [PATCH 19/30] tehuti: return -EFAULT on copy_to_user errors copy_to_user() returns the number of bytes remaining but we want to return a negative error code here. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/tehuti.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 20ab16192325..737df6032bbc 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -646,7 +646,7 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd) error = copy_from_user(data, ifr->ifr_data, sizeof(data)); if (error) { pr_err("cant copy from user\n"); - RET(error); + RET(-EFAULT); } DBG("%d 0x%x 0x%x\n", data[0], data[1], data[2]); } @@ -665,7 +665,7 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd) data[2]); error = copy_to_user(ifr->ifr_data, data, sizeof(data)); if (error) - RET(error); + RET(-EFAULT); break; case BDX_OP_WRITE: From 9e2d11b926765681f72db0373d2ecbbac28359b3 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 2 Jun 2010 10:36:53 +0000 Subject: [PATCH 20/30] epic100: Test __BIG_ENDIAN instead of (non-existent) CONFIG_BIG_ENDIAN Probably no one has used this driver on big-endian systems, since it was setting up descriptor swapping if CONFIG_BIG_ENDIAN is set, which it never is, since that symbol is not mentioned anywhere else in the kernel source. Switch this test to a check for __BIG_ENDIAN so it has a chance at working. Signed-off-by: Roland Dreier Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/epic100.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c index 6838dfc9ef23..4c274657283c 100644 --- a/drivers/net/epic100.c +++ b/drivers/net/epic100.c @@ -87,6 +87,7 @@ static int rx_copybreak; #include #include #include +#include /* These identify the driver base version and may not be removed. */ static char version[] __devinitdata = @@ -230,7 +231,7 @@ static const u16 media2miictl[16] = { * The EPIC100 Rx and Tx buffer descriptors. Note that these * really ARE host-endian; it's not a misannotation. We tell * the card to byteswap them internally on big-endian hosts - - * look for #ifdef CONFIG_BIG_ENDIAN in epic_open(). + * look for #ifdef __BIG_ENDIAN in epic_open(). */ struct epic_tx_desc { @@ -690,7 +691,7 @@ static int epic_open(struct net_device *dev) outl((inl(ioaddr + NVCTL) & ~0x003C) | 0x4800, ioaddr + NVCTL); /* Tell the chip to byteswap descriptors on big-endian hosts */ -#ifdef CONFIG_BIG_ENDIAN +#ifdef __BIG_ENDIAN outl(0x4432 | (RX_FIFO_THRESH<<8), ioaddr + GENCTL); inl(ioaddr + GENCTL); outl(0x0432 | (RX_FIFO_THRESH<<8), ioaddr + GENCTL); @@ -806,7 +807,7 @@ static void epic_restart(struct net_device *dev) for (i = 16; i > 0; i--) outl(0x0008, ioaddr + TEST1); -#ifdef CONFIG_BIG_ENDIAN +#ifdef __BIG_ENDIAN outl(0x0432 | (RX_FIFO_THRESH<<8), ioaddr + GENCTL); #else outl(0x0412 | (RX_FIFO_THRESH<<8), ioaddr + GENCTL); From 3df95ce948dc8ceef07b49003ab944aa047f2a79 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 2 Jun 2010 10:39:56 +0000 Subject: [PATCH 21/30] sfc: Store port number in net_device::dev_id This exposes the port number to userland through sysfs. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/sfc/net_driver.h | 4 +--- drivers/net/sfc/siena.c | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h index 5fffd9abffde..4762c91cb587 100644 --- a/drivers/net/sfc/net_driver.h +++ b/drivers/net/sfc/net_driver.h @@ -645,7 +645,6 @@ union efx_multicast_hash { * struct efx_nic - an Efx NIC * @name: Device name (net device name or bus id before net device registered) * @pci_dev: The PCI device - * @port_num: Index of this host port within the controller * @type: Controller type attributes * @legacy_irq: IRQ number * @workqueue: Workqueue for port reconfigures and the HW monitor. @@ -729,7 +728,6 @@ union efx_multicast_hash { struct efx_nic { char name[IFNAMSIZ]; struct pci_dev *pci_dev; - unsigned port_num; const struct efx_nic_type *type; int legacy_irq; struct workqueue_struct *workqueue; @@ -832,7 +830,7 @@ static inline const char *efx_dev_name(struct efx_nic *efx) static inline unsigned int efx_port_num(struct efx_nic *efx) { - return efx->port_num; + return efx->net_dev->dev_id; } /** diff --git a/drivers/net/sfc/siena.c b/drivers/net/sfc/siena.c index 7ecd255a7cc0..f2b1e6180753 100644 --- a/drivers/net/sfc/siena.c +++ b/drivers/net/sfc/siena.c @@ -222,7 +222,7 @@ static int siena_probe_nic(struct efx_nic *efx) } efx_reado(efx, ®, FR_AZ_CS_DEBUG); - efx->port_num = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; + efx->net_dev->dev_id = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; efx_mcdi_init(efx); From db2c24175d149b55784f7cb2c303622ce962c1ae Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 2 Jun 2010 04:55:02 +0000 Subject: [PATCH 22/30] act_pedit: access skb->data safely access skb->data safely we should use skb_header_pointer() and skb_store_bits() to access skb->data to handle small or non-linear skbs. Signed-off-by: Changli Gao ---- net/sched/act_pedit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fdbd0b7bd840..50e3d945e1f4 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -125,7 +125,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, { struct tcf_pedit *p = a->priv; int i, munged = 0; - u8 *pptr; + unsigned int off; if (!(skb->tc_verd & TC_OK2MUNGE)) { /* should we set skb->cloned? */ @@ -134,7 +134,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, } } - pptr = skb_network_header(skb); + off = skb_network_offset(skb); spin_lock(&p->tcf_lock); @@ -144,17 +144,17 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tc_pedit_key *tkey = p->tcfp_keys; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { - u32 *ptr; + u32 *ptr, _data; int offset = tkey->off; if (tkey->offmask) { - if (skb->len > tkey->at) { - char *j = pptr + tkey->at; - offset += ((*j & tkey->offmask) >> - tkey->shift); - } else { + char *d, _d; + + d = skb_header_pointer(skb, off + tkey->at, 1, + &_d); + if (!d) goto bad; - } + offset += (*d & tkey->offmask) >> tkey->shift; } if (offset % 4) { @@ -169,9 +169,13 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, goto bad; } - ptr = (u32 *)(pptr+offset); + ptr = skb_header_pointer(skb, off + offset, 4, &_data); + if (!ptr) + goto bad; /* just do it, baby */ *ptr = ((*ptr & tkey->mask) ^ tkey->val); + if (ptr == &_data) + skb_store_bits(skb, off + offset, ptr, 4); munged++; } From a1868dc2878e61778b9d6d8c61d5368e51d68a29 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Wed, 2 Jun 2010 12:44:05 +0000 Subject: [PATCH 23/30] ixgbe: return IXGBE_ERR_RAR_INDEX when out of range Based on original patch from Shirley Ma Return IXGBE_ERR_RAR_INDEX when RAR index is out of range, instead of returning IXGBE_SUCCESS. CC: Shirley Ma Signed-off-by: Jeff Kirsher Acked-by: Don Skidmore Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_common.c | 2 ++ drivers/net/ixgbe/ixgbe_type.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c index 1159d9138f05..9595b1bfb8dd 100644 --- a/drivers/net/ixgbe/ixgbe_common.c +++ b/drivers/net/ixgbe/ixgbe_common.c @@ -1188,6 +1188,7 @@ s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); } else { hw_dbg(hw, "RAR index %d is out of range.\n", index); + return IXGBE_ERR_RAR_INDEX; } return 0; @@ -1219,6 +1220,7 @@ s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index) IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); } else { hw_dbg(hw, "RAR index %d is out of range.\n", index); + return IXGBE_ERR_RAR_INDEX; } /* clear VMDq pool/queue selection for this RAR */ diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h index 2eb6e151016c..cdd1998f18c7 100644 --- a/drivers/net/ixgbe/ixgbe_type.h +++ b/drivers/net/ixgbe/ixgbe_type.h @@ -2609,6 +2609,7 @@ struct ixgbe_info { #define IXGBE_ERR_EEPROM_VERSION -24 #define IXGBE_ERR_NO_SPACE -25 #define IXGBE_ERR_OVERTEMP -26 +#define IXGBE_ERR_RAR_INDEX -27 #define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF #endif /* _IXGBE_TYPE_H_ */ From 8764ab2ca7ab5055e1ca80f9cfa4970c34acb804 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 4 Jun 2010 01:57:38 +0000 Subject: [PATCH 24/30] net: check for refcount if pop a stacked dst_entry xfrm triggers a warning if dst_pop() drops a refcount on a noref dst. This patch changes dst_pop() to skb_dst_pop(). skb_dst_pop() drops the refcnt only on a refcounted dst. Also we don't clone the child dst_entry, so it is not refcounted and we can use skb_dst_set_noref() in xfrm_output_one(). Signed-off-by: Steffen Klassert Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 6 +++--- net/xfrm/xfrm_output.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 612069beda73..81d1413a8701 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -250,11 +250,11 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) * Linux networking. Thus, destinations are stackable. */ -static inline struct dst_entry *dst_pop(struct dst_entry *dst) +static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) { - struct dst_entry *child = dst_clone(dst->child); + struct dst_entry *child = skb_dst(skb)->child; - dst_release(dst); + skb_dst_drop(skb); return child; } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 6a329158bdfa..a3cca0a94346 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -95,13 +95,13 @@ resume: goto error_nolock; } - dst = dst_pop(dst); + dst = skb_dst_pop(skb); if (!dst) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } - skb_dst_set(skb, dst); + skb_dst_set_noref(skb, dst); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); From ca739481662137b8f717bc21f16719cda3c33d6b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 3 Jun 2010 17:03:45 +0000 Subject: [PATCH 25/30] ixgbe: only check pfc bits in hang logic if pfc is enabled Only check pfc bits in hang logic if PFC is enabled. Previously, if DCB was enabled but PFC was disabled the incorrect pause bits would be checked. Signed-off-by: John Fastabend Acked-by: Don Skidmore Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index d571d101de08..b2af2f67f604 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -642,7 +642,7 @@ static inline bool ixgbe_tx_xon_state(struct ixgbe_adapter *adapter, u32 txoff = IXGBE_TFCS_TXOFF; #ifdef CONFIG_IXGBE_DCB - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + if (adapter->dcb_cfg.pfc_mode_enable) { int tc; int reg_idx = tx_ring->reg_idx; int dcb_i = adapter->ring_feature[RING_F_DCB].indices; From 57f1553ee5d9f093660cc49098f494e17ed11668 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Jun 2010 00:42:30 +0000 Subject: [PATCH 26/30] syncookies: remove Kconfig text line about disabled-by-default syncookies default to on since e994b7c901ded7200b525a707c6da71f2cf6d4bb (tcp: Don't make syn cookies initial setting depend on CONFIG_SYSCTL). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8e3a1fd938ab..7c3a7d191249 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -303,7 +303,7 @@ config ARPD If unsure, say N. config SYN_COOKIES - bool "IP: TCP syncookie support (disabled per default)" + bool "IP: TCP syncookie support" ---help--- Normal TCP/IP networking is open to an attack known as "SYN flooding". This denial-of-service attack prevents legitimate remote @@ -328,13 +328,13 @@ config SYN_COOKIES server is really overloaded. If this happens frequently better turn them off. - If you say Y here, note that SYN cookies aren't enabled by default; - you can enable them by saying Y to "/proc file system support" and + If you say Y here, you can disable SYN cookies at run time by + saying Y to "/proc file system support" and "Sysctl support" below and executing the command - echo 1 >/proc/sys/net/ipv4/tcp_syncookies + echo 0 > /proc/sys/net/ipv4/tcp_syncookies - at boot time after the /proc file system has been mounted. + after the /proc file system has been mounted. If unsure, say N. From 536e00e570c87f258554e919c444b81a7002e46d Mon Sep 17 00:00:00 2001 From: Ben McKeegan Date: Wed, 2 Jun 2010 23:14:33 +0000 Subject: [PATCH 27/30] ppp_generic: fix multilink fragment sizes Fix bug in multilink fragment size calculation introduced by commit 9c705260feea6ae329bc6b6d5f6d2ef0227eda0a "ppp: ppp_mp_explode() redesign" Signed-off-by: Ben McKeegan Signed-off-by: David S. Miller --- drivers/net/ppp_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index c5f8eb102bf7..1b2c29150202 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1422,7 +1422,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) flen = len; if (nfree > 0) { if (pch->speed == 0) { - flen = totlen/nfree; + flen = len/nfree; if (nbigger > 0) { flen++; nbigger--; From ca55158c6ecb7832a6ad80ac44a14d23bab8cdfc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 09:03:58 +0000 Subject: [PATCH 28/30] rps: tcp: fix rps_sock_flow_table table updates I believe a moderate SYN flood attack can corrupt RFS flow table (rps_sock_flow_table), making RPS/RFS much less effective. Even in a normal situation, server handling short lived sessions suffer from bad steering for the first data packet of a session, if another SYN packet is received for another session. We do following action in tcp_v4_rcv() : sock_rps_save_rxhash(sk, skb->rxhash); We should _not_ do this if sk is a LISTEN socket, as about each packet received on a LISTEN socket has a different rxhash than previous one. -> RPS_NO_CPU markers are spread all over rps_sock_flow_table. Also, it makes sense to protect sk->rxhash field changes with socket lock (We currently can change it even if user thread owns the lock and might use rxhash) This patch moves sock_rps_save_rxhash() to a sock locked section, and only for non LISTEN sockets. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 202cf09c4cd4..fe193e53af44 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1555,6 +1555,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + sock_rps_save_rxhash(sk, skb->rxhash); TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; @@ -1579,7 +1580,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } return 0; } - } + } else + sock_rps_save_rxhash(sk, skb->rxhash); + TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { @@ -1672,8 +1675,6 @@ process: skb->dev = NULL; - sock_rps_save_rxhash(sk, skb->rxhash); - bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { From c44649216522cd607a4027d2ebf4a8147d3fa94c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 05:45:47 +0000 Subject: [PATCH 29/30] tcp: use correct net ns in cookie_v4_check() Its better to make a route lookup in appropriate namespace. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5c24db4a3c91..9f6b22206c52 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -347,7 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, { .sport = th->dest, .dport = th->source } } }; security_req_classify_flow(req, &fl); - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(sock_net(sk), &rt, &fl)) { reqsk_free(req); goto out; } From ca7335948e294faf8adf65f2c95ca18ea78540db Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Fri, 4 Jun 2010 16:14:15 -0700 Subject: [PATCH 30/30] X25: remove duplicated #include Remove duplicated #include('s) in drivers/net/wan/x25_asy.c Signed-off-by: Huang Weiyi Signed-off-by: David S. Miller --- drivers/net/wan/x25_asy.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 166e77dfffda..e47f5a986b1c 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -37,8 +37,6 @@ #include #include "x25_asy.h" -#include - static struct net_device **x25_asy_devs; static int x25_asy_maxdev = SL_NRUNIT;