From 55a8d02bbb25edf9589b2f11afec410d053da497 Mon Sep 17 00:00:00 2001 From: Nan Jia Date: Sun, 31 May 2015 19:53:28 +1000 Subject: [PATCH 001/170] jfs: removed a prohibited space after opening parenthesis Fixed a coding style issue. Signed-off-by: Nan Jia Signed-off-by: Dave Kleikamp --- fs/jfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jfs/file.c b/fs/jfs/file.c index e98d39d75cf4..b9dc23cd04f2 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -76,7 +76,7 @@ static int jfs_open(struct inode *inode, struct file *file) if (ji->active_ag == -1) { struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); - atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]); + atomic_inc(&jfs_sb->bmap->db_active[ji->active_ag]); } spin_unlock_irq(&ji->ag_lock); } From f7f31adf07d1c9e66c160e9bdd77e12531cd6996 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 4 Jun 2015 17:57:03 +0100 Subject: [PATCH 002/170] jfs: fix indentation on if statement The if statement and closing brace are indented by 1 extra space, so remove this extra spacing. Cosmetic change only. Signed-off-by: Colin Ian King Signed-off-by: Dave Kleikamp --- fs/jfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 070dc4b33544..28d69fade18a 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -133,11 +133,11 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) * It has been committed since the last change, but was still * on the dirty inode list. */ - if (!test_cflag(COMMIT_Dirty, inode)) { + if (!test_cflag(COMMIT_Dirty, inode)) { /* Make sure committed changes hit the disk */ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait); return 0; - } + } if (jfs_commit_inode(inode, wait)) { jfs_err("jfs_write_inode: jfs_commit_inode failed!"); From 7c258670ce655659a4c8d1013676c55e74d09ee7 Mon Sep 17 00:00:00 2001 From: Tedd Ho-Jeong An Date: Thu, 25 Jun 2015 23:27:55 -0700 Subject: [PATCH 003/170] Bluetooth: hidp: Initialize list header of hidp session user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When new hidp session is created, list header in l2cap_user is not initialized and this causes list_empty() to fail in l2cap_register_user() even if l2cap_user list is empty. Signed-off-by: Tedd Ho-Jeong An Tested-by: Jörg Otte Signed-off-by: Marcel Holtmann --- net/bluetooth/hidp/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 9070dfd6b4ad..f1a117f8cad2 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -915,6 +915,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr, session->conn = l2cap_conn_get(conn); session->user.probe = hidp_session_probe; session->user.remove = hidp_session_remove; + INIT_LIST_HEAD(&session->user.list); session->ctrl_sock = ctrl_sock; session->intr_sock = intr_sock; skb_queue_head_init(&session->ctrl_transmit); From ab944c83f6690df0c7f67e6bcc29fc0c82ef6021 Mon Sep 17 00:00:00 2001 From: Tedd Ho-Jeong An Date: Tue, 30 Jun 2015 11:43:40 -0700 Subject: [PATCH 004/170] Bluetooth: Reinitialize the list after deletion for session user list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user->list is deleted with list_del(), it doesn't initialize the entry which can cause the issue with list_empty(). According to the comment from the list.h, list_empty() returns false even if the list is empty and put the entry in an undefined state. /** * list_del - deletes entry from list. * @entry: the element to delete from the list. * Note: list_empty() on entry does not return true after this, the entry is * in an undefined state. */ Because of this behavior, list_empty() returns false even if list is empty when the device is reconnected. So, user->list needs to be re-initialized after list_del(). list.h already have a macro list_del_init() which deletes the entry and initailze it again. Signed-off-by: Tedd Ho-Jeong An Tested-by: Jörg Otte Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 51594fb7b9e7..45fffa413642 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1634,7 +1634,7 @@ void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) if (list_empty(&user->list)) goto out_unlock; - list_del(&user->list); + list_del_init(&user->list); user->remove(conn, user); out_unlock: @@ -1648,7 +1648,7 @@ static void l2cap_unregister_all_users(struct l2cap_conn *conn) while (!list_empty(&conn->users)) { user = list_first_entry(&conn->users, struct l2cap_user, list); - list_del(&user->list); + list_del_init(&user->list); user->remove(conn, user); } } From db490cb9aeb77b392f4bdaad3ebe96ea1d5c7bfe Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Tue, 17 Mar 2015 16:02:20 +0100 Subject: [PATCH 005/170] s390/zcrypt: enable s390 hwrng to seed kernel entropy Set the 'quality' property in the zcrypt rng device structure to enable the zcrypt hwrng device to take part in the kernel entropy seeding process. A module parameter named hwrng_seed will be introduced to disable the participation. By default this parameter is set to 1 (enabled). Signed-off-by: Ingo Tuchscherer Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_api.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 08f1830cbfc4..01bf1f5cf2e9 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -54,6 +54,10 @@ MODULE_DESCRIPTION("Cryptographic Coprocessor interface, " \ "Copyright IBM Corp. 2001, 2012"); MODULE_LICENSE("GPL"); +static int zcrypt_hwrng_seed = 1; +module_param_named(hwrng_seed, zcrypt_hwrng_seed, int, S_IRUSR|S_IRGRP); +MODULE_PARM_DESC(hwrng_seed, "Turn on/off hwrng auto seed, default is 1 (on)."); + static DEFINE_SPINLOCK(zcrypt_device_lock); static LIST_HEAD(zcrypt_device_list); static int zcrypt_device_count = 0; @@ -1373,6 +1377,7 @@ static int zcrypt_rng_data_read(struct hwrng *rng, u32 *data) static struct hwrng zcrypt_rng_dev = { .name = "zcrypt", .data_read = zcrypt_rng_data_read, + .quality = 990, }; static int zcrypt_rng_device_add(void) @@ -1387,6 +1392,8 @@ static int zcrypt_rng_device_add(void) goto out; } zcrypt_rng_buffer_index = 0; + if (!zcrypt_hwrng_seed) + zcrypt_rng_dev.quality = 0; rc = hwrng_register(&zcrypt_rng_dev); if (rc) goto out_free; From a313bdc5310dd807655d3ca3eb2219cd65dfe45a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 25 Jun 2015 09:32:22 +0200 Subject: [PATCH 006/170] s390/sclp: fix compile error Fix this error when compiling with CONFIG_SMP=n and CONFIG_DYNAMIC_DEBUG=y: drivers/s390/char/sclp_early.c: In function 'sclp_read_info_early': drivers/s390/char/sclp_early.c:87:19: error: 'EBUSY' undeclared (first use in this function) } while (rc == -EBUSY); ^ Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp_early.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index aeed7969fd79..7bc6df3100ef 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -7,6 +7,7 @@ #define KMSG_COMPONENT "sclp_early" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include #include #include #include From a215c8fbde8600d133f122691d0f8c30de6dda02 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 25 Jun 2015 14:52:13 +0200 Subject: [PATCH 007/170] s390/oprofile: fix compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix these errors when compiling with CONFIG_OPROFILE=y and CONFIG_PERF_EVENTS=n: arch/s390/oprofile/init.c: In function ‘oprofile_hwsampler_start’: arch/s390/oprofile/init.c:93:2: error: implicit declaration of function 'perf_reserve_sampling' [-Werror=implicit-function-declaration] retval = perf_reserve_sampling(); ^ arch/s390/oprofile/init.c:99:3: error: implicit declaration of function 'perf_release_sampling' [-Werror=implicit-function-declaration] perf_release_sampling(); ^ Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/perf_event.h | 8 ++++++++ arch/s390/oprofile/init.c | 1 + 2 files changed, 9 insertions(+) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 4cb19fe76dd9..f897ec73dc8c 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -87,7 +87,15 @@ struct sf_raw_sample { } __packed; /* Perf hardware reserve and release functions */ +#ifdef CONFIG_PERF_EVENTS int perf_reserve_sampling(void); void perf_release_sampling(void); +#else /* CONFIG_PERF_EVENTS */ +static inline int perf_reserve_sampling(void) +{ + return 0; +} +static inline void perf_release_sampling(void) {} +#endif /* CONFIG_PERF_EVENTS */ #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index bc927a09a172..9cfa2ffaa9d6 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "../../../drivers/oprofile/oprof.h" From f307170d6e591a48529425b1ed6ca835790995a9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Jun 2015 17:23:37 -0500 Subject: [PATCH 008/170] netfilter: nf_queue: Don't recompute the hook_list head If someone sends packets from one of the netdevice ingress hooks to the a userspace queue, and then userspace later accepts the packet, the netfilter code can enter an infinite loop as the list head will never be found. Pass in the saved list_head to avoid this. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index cd60d397fe05..8a8b2abc35ff 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -213,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT) { next_hook: - verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook], + verdict = nf_iterate(entry->state.hook_list, skb, &entry->state, &elem); } From a1bc1b356a9d21bf29bc7c873718b5cacdf119b4 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 20 Jun 2015 00:17:50 +0200 Subject: [PATCH 009/170] netfilter: bridge: fix CONFIG_NF_DEFRAG_IPV4/6 related warnings/errors br_nf_ip_fragment() is not needed when neither CONFIG_NF_DEFRAG_IPV4 nor CONFIG_NF_DEFRAG_IPV6 is set. struct brnf_frag_data must be available if either CONFIG_NF_DEFRAG_IPV4 or CONFIG_NF_DEFRAG_IPV6 is set. Fixes: efb6de9b4ba0 ("netfilter: bridge: forward IPv6 fragmented packets") Reported-by: kbuild test robot Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index d89f4fac0bc5..8a394bd3af83 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -111,7 +111,7 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) /* largest possible L2 header, see br_nf_dev_queue_xmit() */ #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct brnf_frag_data { char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; u8 encap_size; @@ -694,6 +694,7 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) } #endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, int (*output)(struct sock *, struct sk_buff *)) { @@ -712,6 +713,7 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, return ip_do_fragment(sk, skb, output); } +#endif static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { From 3bd229976f64bea64c60803f9fc8d9f0059ba2f2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jun 2015 22:21:00 +0200 Subject: [PATCH 010/170] netfilter: arptables: use percpu jumpstack commit 482cfc318559 ("netfilter: xtables: avoid percpu ruleset duplication") Unlike ip and ip6tables, arp tables were never converted to use the percpu jump stack. It still uses the rule blob to store return address, which isn't safe anymore since we now share this blob among all processors. Because there is no TEE support for arptables, we don't need to cope with reentrancy, so we can use loocal variable to hold stack offset. Fixes: 482cfc318559 ("netfilter: xtables: avoid percpu ruleset duplication") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 95c9b6eece25..92305a1a021a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -254,9 +254,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; - struct arpt_entry *e, *back; + struct arpt_entry *e, **jumpstack; const char *indev, *outdev; const void *table_base; + unsigned int cpu, stackidx = 0; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; @@ -270,15 +271,16 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + cpu = smp_processor_id(); /* * Ensure we load private-> members after we've fetched the base * pointer. */ smp_read_barrier_depends(); table_base = private->entries; + jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; e = get_entry(table_base, private->hook_entry[hook]); - back = get_entry(table_base, private->underflow[hook]); acpar.in = state->in; acpar.out = state->out; @@ -312,18 +314,23 @@ unsigned int arpt_do_table(struct sk_buff *skb, verdict = (unsigned int)(-v) - 1; break; } - e = back; - back = get_entry(table_base, back->comefrom); + if (stackidx == 0) { + e = get_entry(table_base, + private->underflow[hook]); + } else { + e = jumpstack[--stackidx]; + e = arpt_next_entry(e); + } continue; } if (table_base + v != arpt_next_entry(e)) { - /* Save old back ptr in next entry */ - struct arpt_entry *next = arpt_next_entry(e); - next->comefrom = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + if (stackidx >= private->stacksize) { + verdict = NF_DROP; + break; + } + jumpstack[stackidx++] = e; } e = get_entry(table_base, v); From dd302b59bde0149c20df7278c0d36c765e66afbd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jun 2015 22:27:51 +0200 Subject: [PATCH 011/170] netfilter: bridge: don't leak skb in error paths br_nf_dev_queue_xmit must free skb in its error path. NF_DROP is misleading -- its an okfn, not a netfilter hook. Fixes: 462fb2af9788a ("bridge : Sanitize skb before it enters the IP stack") Fixes: efb6de9b4ba00 ("netfilter: bridge: forward IPv6 fragmented packets") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 8a394bd3af83..c8b9bcfe997e 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -744,7 +744,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) struct brnf_frag_data *data; if (br_validate_ipv4(skb)) - return NF_DROP; + goto drop; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -769,7 +769,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) struct brnf_frag_data *data; if (br_validate_ipv6(skb)) - return NF_DROP; + goto drop; IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -784,12 +784,16 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) if (v6ops) return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); - else - return -EMSGSIZE; + + kfree_skb(skb); + return -EMSGSIZE; } #endif nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); + drop: + kfree_skb(skb); + return 0; } /* PF_BRIDGE/POST_ROUTING ********************************************/ From 6742b9e310bcf511b876532846e5302b07b7fedc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 1 Jul 2015 16:14:25 +0200 Subject: [PATCH 012/170] netfilter: nfnetlink: keep going batch handling on missing modules After a fresh boot with no modules in place at all and a large rulesets, the existing nfnetlink_rcv_batch() funcion can take long time to commit the ruleset due to the many abort path. This is specifically a problem for the existing client of this code, ie. nf_tables, since it results in several synchronize_rcu() call in a row. This patch changes the policy to keep full batch processing on missing modules errors so we abort only once. Reported-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 40 +++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8b117c90ecd7..0c0e8ecf02ab 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -269,6 +269,12 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) } } +enum { + NFNL_BATCH_FAILURE = (1 << 0), + NFNL_BATCH_DONE = (1 << 1), + NFNL_BATCH_REPLAY = (1 << 2), +}; + static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { @@ -276,13 +282,15 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; - bool success = true, done = false; static LIST_HEAD(err_list); + u32 status; int err; if (subsys_id >= NFNL_SUBSYS_COUNT) return netlink_ack(skb, nlh, -EINVAL); replay: + status = 0; + skb = netlink_skb_clone(oskb, GFP_KERNEL); if (!skb) return netlink_ack(oskb, nlh, -ENOMEM); @@ -336,10 +344,10 @@ replay: if (type == NFNL_MSG_BATCH_BEGIN) { /* Malformed: Batch begin twice */ nfnl_err_reset(&err_list); - success = false; + status |= NFNL_BATCH_FAILURE; goto done; } else if (type == NFNL_MSG_BATCH_END) { - done = true; + status |= NFNL_BATCH_DONE; goto done; } else if (type < NLMSG_MIN_TYPE) { err = -EINVAL; @@ -382,11 +390,8 @@ replay: * original skb. */ if (err == -EAGAIN) { - nfnl_err_reset(&err_list); - ss->abort(oskb); - nfnl_unlock(subsys_id); - kfree_skb(skb); - goto replay; + status |= NFNL_BATCH_REPLAY; + goto next; } } ack: @@ -402,7 +407,7 @@ ack: */ nfnl_err_reset(&err_list); netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); - success = false; + status |= NFNL_BATCH_FAILURE; goto done; } /* We don't stop processing the batch on errors, thus, @@ -410,19 +415,26 @@ ack: * triggers. */ if (err) - success = false; + status |= NFNL_BATCH_FAILURE; } - +next: msglen = NLMSG_ALIGN(nlh->nlmsg_len); if (msglen > skb->len) msglen = skb->len; skb_pull(skb, msglen); } done: - if (success && done) - ss->commit(oskb); - else + if (status & NFNL_BATCH_REPLAY) { ss->abort(oskb); + nfnl_err_reset(&err_list); + nfnl_unlock(subsys_id); + kfree_skb(skb); + goto replay; + } else if (status == NFNL_BATCH_DONE) { + ss->commit(oskb); + } else { + ss->abort(oskb); + } nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); From 25c14ef86a0d5ec9320e833685c15bc96f504864 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Wed, 1 Jul 2015 14:21:57 +0530 Subject: [PATCH 013/170] enic: fix issues in enic_poll In enic_poll, we clean tx and rx queues, when low latency busy socket polling is happening, enic_poll will only clean tx queue. After cleaning tx, it should return total budget for re-poll. There is a small window between vnic_intr_unmask() and enic_poll_unlock_napi(). In this window if an irq occurs and napi is scheduled on different cpu, it tries to acquire enic_poll_lock_napi() and fails. Unlock napi_poll before unmasking the interrupt. v2: Do not change tx wonk done behaviour. Consider only rx work done for completing napi. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index da2004e2a741..918a8e42139b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1170,7 +1170,7 @@ static int enic_poll(struct napi_struct *napi, int budget) wq_work_done, 0 /* dont unmask intr */, 0 /* dont reset intr timer */); - return rq_work_done; + return budget; } if (budget > 0) @@ -1191,6 +1191,7 @@ static int enic_poll(struct napi_struct *napi, int budget) 0 /* don't reset intr timer */); err = vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf); + enic_poll_unlock_napi(&enic->rq[cq_rq], napi); /* Buffer allocation failed. Stay in polling * mode so we can try to fill the ring again. @@ -1208,7 +1209,6 @@ static int enic_poll(struct napi_struct *napi, int budget) napi_complete(napi); vnic_intr_unmask(&enic->intr[intr]); } - enic_poll_unlock_napi(&enic->rq[cq_rq], napi); return rq_work_done; } From 462e1ead9296a8452499fb10cf3b51903ffe24ac Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 2 Jul 2015 05:48:17 -0700 Subject: [PATCH 014/170] bridge: vlan: fix usage of vlan 0 and 4095 again Vlan ids 0 and 4095 were disallowed by commit: 8adff41c3d25 ("bridge: Don't use VID 0 and 4095 in vlan filtering") but then the check was removed when vlan ranges were introduced by: bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") So reintroduce the vlan range check. Before patch: [root@testvm ~]# bridge vlan add vid 0 dev eth0 master (succeeds) After Patch: [root@testvm ~]# bridge vlan add vid 0 dev eth0 master RTNETLINK answers: Invalid argument Signed-off-by: Nikolay Aleksandrov Fixes: bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") Acked-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6b67ed3831de..364bdc98bd9b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -457,6 +457,8 @@ static int br_afspec(struct net_bridge *br, if (nla_len(attr) != sizeof(struct bridge_vlan_info)) return -EINVAL; vinfo = nla_data(attr); + if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) + return -EINVAL; if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { if (vinfo_start) return -EINVAL; From 87775312a86bcf213e3b21f6f7c79e2e00d96f7b Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 2 Jul 2015 16:30:24 +0200 Subject: [PATCH 015/170] net-ipv6: Delete an unnecessary check before the function call "free_percpu" The free_percpu() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- net/ipv6/route.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1a1122a6bbf5..6090969937f8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -369,10 +369,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev; dst_destroy_metrics_generic(dst); - - if (rt->rt6i_pcpu) - free_percpu(rt->rt6i_pcpu); - + free_percpu(rt->rt6i_pcpu); rt6_uncached_list_del(rt); idev = rt->rt6i_idev; From f6e1c91666990d06ba37c76129495c724202144d Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 2 Jul 2015 17:58:21 +0200 Subject: [PATCH 016/170] net-RDS: Delete an unnecessary check before the function call "module_put" The module_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- net/rds/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/transport.c b/net/rds/transport.c index 8b4a6cd2c3a7..83498e1c75b8 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL_GPL(rds_trans_unregister); void rds_trans_put(struct rds_transport *trans) { - if (trans && trans->t_owner) + if (trans) module_put(trans->t_owner); } From 92b80eb33c52583b48ed289bd993579b87b52d9a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 2 Jul 2015 18:38:12 +0200 Subject: [PATCH 017/170] netlink: Delete an unnecessary check before the function call "module_put" The module_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index dea925388a5b..9a0ae7172f92 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -158,7 +158,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt) out: spin_unlock(&netlink_tap_lock); - if (found && nt->module) + if (found) module_put(nt->module); return found ? 0 : -ENODEV; From 4c938d22c88a9ddccc8c55a85e0430e9c62b1ac5 Mon Sep 17 00:00:00 2001 From: Angga Date: Fri, 3 Jul 2015 14:40:52 +1200 Subject: [PATCH 018/170] ipv6: Make MLD packets to only be processed locally Before commit daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().") MLD packets were only processed locally. After the change, a copy of MLD packet goes through ip6_mr_input, causing MRT6MSG_NOCACHE message to be generated to user space. Make MLD packet only processed locally. Fixes: daad151263cf ("ipv6: Make ipv6_is_mld() inline and use it from ip6_mc_input().") Signed-off-by: Hermin Anggawijaya Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f2e464eba5ef..57990c929cd8 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -331,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (!ipv6_is_mld(skb, nexthdr, offset)) - goto out; + if (ipv6_is_mld(skb, nexthdr, offset)) + deliver = true; - deliver = true; + goto out; } /* unknown RA - process it normally */ } From fda8b18c515a5e2caf821887ceafb42c35094eaf Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 3 Jul 2015 16:10:51 +0530 Subject: [PATCH 019/170] cxgb4: Fix incorrect sequence numbers shown in devlog Part of commit 49aa284fe64c4c1 ("cxgb4: Add support for devlog") change introduced a real bug where the Device Log Sequence Numbers are no longer being converted from firmware Big-Endian to local CPU-Endian format. This patch moves all of the translation into the devlog_show() routine. The only endianness code now in devlog_open() is the small loop to find the earliest (lowest Sequence Number) Device Log entry in the circular buffer. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 484eb8c37489..a11485fbb33f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -952,16 +952,23 @@ static int devlog_show(struct seq_file *seq, void *v) * eventually have to put a format interpreter in here ... */ seq_printf(seq, "%10d %15llu %8s %8s ", - e->seqno, e->timestamp, + be32_to_cpu(e->seqno), + be64_to_cpu(e->timestamp), (e->level < ARRAY_SIZE(devlog_level_strings) ? devlog_level_strings[e->level] : "UNKNOWN"), (e->facility < ARRAY_SIZE(devlog_facility_strings) ? devlog_facility_strings[e->facility] : "UNKNOWN")); - seq_printf(seq, e->fmt, e->params[0], e->params[1], - e->params[2], e->params[3], e->params[4], - e->params[5], e->params[6], e->params[7]); + seq_printf(seq, e->fmt, + be32_to_cpu(e->params[0]), + be32_to_cpu(e->params[1]), + be32_to_cpu(e->params[2]), + be32_to_cpu(e->params[3]), + be32_to_cpu(e->params[4]), + be32_to_cpu(e->params[5]), + be32_to_cpu(e->params[6]), + be32_to_cpu(e->params[7])); } return 0; } @@ -1043,23 +1050,17 @@ static int devlog_open(struct inode *inode, struct file *file) return ret; } - /* Translate log multi-byte integral elements into host native format - * and determine where the first entry in the log is. + /* Find the earliest (lowest Sequence Number) log entry in the + * circular Device Log. */ for (fseqno = ~((u32)0), index = 0; index < dinfo->nentries; index++) { struct fw_devlog_e *e = &dinfo->log[index]; - int i; __u32 seqno; if (e->timestamp == 0) continue; - e->timestamp = (__force __be64)be64_to_cpu(e->timestamp); seqno = be32_to_cpu(e->seqno); - for (i = 0; i < 8; i++) - e->params[i] = - (__force __be32)be32_to_cpu(e->params[i]); - if (seqno < fseqno) { fseqno = seqno; dinfo->first = index; From 0fd972a7d91d6e15393c449492a04d94c0b89351 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 1 May 2015 20:13:42 -0400 Subject: [PATCH 020/170] module: relocate module_init from init.h to module.h Modular users will always be users of init functionality, but users of init functionality are not necessarily always modules. Hence any functionality like module_init and module_exit would be more at home in the module.h file. And module.h should explicitly include init.h to make the dependency clear. We've already done all the legwork needed to ensure that this move does not cause any build regressions due to implicit header file include assumptions about where module_init lives. Cc: Rusty Russell Acked-by: Rusty Russell Signed-off-by: Paul Gortmaker --- include/linux/init.h | 78 --------------------------------------- include/linux/module.h | 84 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 78 deletions(-) diff --git a/include/linux/init.h b/include/linux/init.h index 7c68c36d3fd8..b449f378f995 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -282,68 +282,8 @@ void __init parse_early_param(void); void __init parse_early_options(char *cmdline); #endif /* __ASSEMBLY__ */ -/** - * module_init() - driver initialization entry point - * @x: function to be run at kernel boot time or module insertion - * - * module_init() will either be called during do_initcalls() (if - * builtin) or at module insertion time (if a module). There can only - * be one per module. - */ -#define module_init(x) __initcall(x); - -/** - * module_exit() - driver exit entry point - * @x: function to be run when driver is removed - * - * module_exit() will wrap the driver clean-up code - * with cleanup_module() when used with rmmod when - * the driver is a module. If the driver is statically - * compiled into the kernel, module_exit() has no effect. - * There can only be one per module. - */ -#define module_exit(x) __exitcall(x); - #else /* MODULE */ -/* - * In most cases loadable modules do not need custom - * initcall levels. There are still some valid cases where - * a driver may be needed early if built in, and does not - * matter when built as a loadable module. Like bus - * snooping debug drivers. - */ -#define early_initcall(fn) module_init(fn) -#define core_initcall(fn) module_init(fn) -#define core_initcall_sync(fn) module_init(fn) -#define postcore_initcall(fn) module_init(fn) -#define postcore_initcall_sync(fn) module_init(fn) -#define arch_initcall(fn) module_init(fn) -#define subsys_initcall(fn) module_init(fn) -#define subsys_initcall_sync(fn) module_init(fn) -#define fs_initcall(fn) module_init(fn) -#define fs_initcall_sync(fn) module_init(fn) -#define rootfs_initcall(fn) module_init(fn) -#define device_initcall(fn) module_init(fn) -#define device_initcall_sync(fn) module_init(fn) -#define late_initcall(fn) module_init(fn) -#define late_initcall_sync(fn) module_init(fn) - -#define console_initcall(fn) module_init(fn) -#define security_initcall(fn) module_init(fn) - -/* Each module must use one module_init(). */ -#define module_init(initfn) \ - static inline initcall_t __inittest(void) \ - { return initfn; } \ - int init_module(void) __attribute__((alias(#initfn))); - -/* This is only required if you want to be unloadable. */ -#define module_exit(exitfn) \ - static inline exitcall_t __exittest(void) \ - { return exitfn; } \ - void cleanup_module(void) __attribute__((alias(#exitfn))); - #define __setup_param(str, unique_id, fn) /* nothing */ #define __setup(str, func) /* nothing */ #endif @@ -351,24 +291,6 @@ void __init parse_early_options(char *cmdline); /* Data marked not to be saved by software suspend */ #define __nosavedata __section(.data..nosave) -/* This means "can be init if no module support, otherwise module load - may call it." */ -#ifdef CONFIG_MODULES -#define __init_or_module -#define __initdata_or_module -#define __initconst_or_module -#define __INIT_OR_MODULE .text -#define __INITDATA_OR_MODULE .data -#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits -#else -#define __init_or_module __init -#define __initdata_or_module __initdata -#define __initconst_or_module __initconst -#define __INIT_OR_MODULE __INIT -#define __INITDATA_OR_MODULE __INITDATA -#define __INITRODATA_OR_MODULE __INITRODATA -#endif /*CONFIG_MODULES*/ - #ifdef MODULE #define __exit_p(x) x #else diff --git a/include/linux/module.h b/include/linux/module.h index d67b1932cc59..3a19c79918e0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,89 @@ extern struct module_attribute module_uevent; extern int init_module(void); extern void cleanup_module(void); +#ifndef MODULE +/** + * module_init() - driver initialization entry point + * @x: function to be run at kernel boot time or module insertion + * + * module_init() will either be called during do_initcalls() (if + * builtin) or at module insertion time (if a module). There can only + * be one per module. + */ +#define module_init(x) __initcall(x); + +/** + * module_exit() - driver exit entry point + * @x: function to be run when driver is removed + * + * module_exit() will wrap the driver clean-up code + * with cleanup_module() when used with rmmod when + * the driver is a module. If the driver is statically + * compiled into the kernel, module_exit() has no effect. + * There can only be one per module. + */ +#define module_exit(x) __exitcall(x); + +#else /* MODULE */ + +/* + * In most cases loadable modules do not need custom + * initcall levels. There are still some valid cases where + * a driver may be needed early if built in, and does not + * matter when built as a loadable module. Like bus + * snooping debug drivers. + */ +#define early_initcall(fn) module_init(fn) +#define core_initcall(fn) module_init(fn) +#define core_initcall_sync(fn) module_init(fn) +#define postcore_initcall(fn) module_init(fn) +#define postcore_initcall_sync(fn) module_init(fn) +#define arch_initcall(fn) module_init(fn) +#define subsys_initcall(fn) module_init(fn) +#define subsys_initcall_sync(fn) module_init(fn) +#define fs_initcall(fn) module_init(fn) +#define fs_initcall_sync(fn) module_init(fn) +#define rootfs_initcall(fn) module_init(fn) +#define device_initcall(fn) module_init(fn) +#define device_initcall_sync(fn) module_init(fn) +#define late_initcall(fn) module_init(fn) +#define late_initcall_sync(fn) module_init(fn) + +#define console_initcall(fn) module_init(fn) +#define security_initcall(fn) module_init(fn) + +/* Each module must use one module_init(). */ +#define module_init(initfn) \ + static inline initcall_t __inittest(void) \ + { return initfn; } \ + int init_module(void) __attribute__((alias(#initfn))); + +/* This is only required if you want to be unloadable. */ +#define module_exit(exitfn) \ + static inline exitcall_t __exittest(void) \ + { return exitfn; } \ + void cleanup_module(void) __attribute__((alias(#exitfn))); + +#endif + +/* This means "can be init if no module support, otherwise module load + may call it." */ +#ifdef CONFIG_MODULES +#define __init_or_module +#define __initdata_or_module +#define __initconst_or_module +#define __INIT_OR_MODULE .text +#define __INITDATA_OR_MODULE .data +#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits +#else +#define __init_or_module __init +#define __initdata_or_module __initdata +#define __initconst_or_module __initconst +#define __INIT_OR_MODULE __INIT +#define __INITDATA_OR_MODULE __INITDATA +#define __INITRODATA_OR_MODULE __INITRODATA +#endif /*CONFIG_MODULES*/ + /* Archs provide a method of finding the correct exception table. */ struct exception_table_entry; From 14a0abfc4ab0b49c7e48eca5c6b31288ea457dee Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 26 Jun 2015 12:42:53 +0530 Subject: [PATCH 021/170] ARC: Kconfig: better way to disable ARC_HAS_LLSC for ARC_CPU_750D Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index e7cee0a5c56d..91cf4055acab 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -115,6 +115,7 @@ if ISA_ARCOMPACT config ARC_CPU_750D bool "ARC750D" + select ARC_CANT_LLSC help Support for ARC750 core @@ -362,7 +363,7 @@ config ARC_CANT_LLSC config ARC_HAS_LLSC bool "Insn: LLOCK/SCOND (efficient atomic ops)" default y - depends on !ARC_CPU_750D && !ARC_CANT_LLSC + depends on !ARC_CANT_LLSC config ARC_HAS_SWAPE bool "Insn: SWAPE (endian-swap)" From b607eddd7122595bbcf49c00192faf3e49b142d3 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 29 Jun 2015 15:24:37 +0300 Subject: [PATCH 022/170] ARCv2: guard SLC DMA ops with spinlock SLC maintenance ops need to be serialized by software as there is no inherent buffering / quequing of aux commands. It can silently ignore a new aux operation if previous one is still ongoing (SLC_CTRL_BUSY) So gaurd the SLC op using a spin lock The spin lock doesn't seem to be contended even in heavy workloads such as iperf. On FPGA @ 75 MHz. [1] Before this change: ============================================================ # iperf -c 10.42.0.1 ------------------------------------------------------------ Client connecting to 10.42.0.1, TCP port 5001 TCP window size: 43.8 KByte (default) ------------------------------------------------------------ [ 3] local 10.42.0.110 port 38935 connected with 10.42.0.1 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0-10.0 sec 48.4 MBytes 40.6 Mbits/sec ============================================================ [2] After this change: ============================================================ # iperf -c 10.42.0.1 ------------------------------------------------------------ Client connecting to 10.42.0.1, TCP port 5001 TCP window size: 43.8 KByte (default) ------------------------------------------------------------ [ 3] local 10.42.0.243 port 60248 connected with 10.42.0.1 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0-10.0 sec 47.5 MBytes 39.8 Mbits/sec # iperf -c 10.42.0.1 ------------------------------------------------------------ Client connecting to 10.42.0.1, TCP port 5001 TCP window size: 43.8 KByte (default) ------------------------------------------------------------ [ 3] local 10.42.0.243 port 60249 connected with 10.42.0.1 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0-10.0 sec 54.9 MBytes 46.0 Mbits/sec ============================================================ Signed-off-by: Alexey Brodkin Cc: arc-linux-dev@synopsys.com Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index b29d62ed4f7e..1cd6695b6ab5 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -468,10 +468,18 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 + /* + * SLC is shared between all cores and concurrent aux operations from + * multiple cores need to be serialized using a spinlock + * A concurrent operation can be silently ignored and/or the old/new + * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop + * below) + */ + static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned int ctrl; - local_irq_save(flags); + spin_lock_irqsave(&lock, flags); /* * The Region Flush operation is specified by CTRL.RGN_OP[11..9] @@ -504,7 +512,7 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); - local_irq_restore(flags); + spin_unlock_irqrestore(&lock, flags); #endif } From 61754c18752ffb78145671e94f053fb202fff041 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Wed, 1 Jul 2015 17:19:30 +0200 Subject: [PATCH 023/170] kbuild: Allow arch Makefiles to override {cpp,ld,c}flags Since commit a1c48bb1 (Makefile: Fix unrecognized cross-compiler command line options), the arch Makefile is included earlier by the main Makefile, preventing the arc architecture to set its -O3 compiler option. Since there might be more use cases for an arch Makefile to fine-tune the options, add support for ARCH_CPPFLAGS, ARCH_AFLAGS and ARCH_CFLAGS variables that are appended to the respective kbuild variables. The user still has the final say via the KCPPFLAGS, KAFLAGS and KCFLAGS variables. Reported-by: Vineet Gupta Cc: stable@vger.kernel.org # 3.16+ Signed-off-by: Michal Marek --- Documentation/kbuild/makefiles.txt | 8 ++++++++ Makefile | 9 +++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index e63b446d973c..13f888a02a3d 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -952,6 +952,14 @@ When kbuild executes, the following steps are followed (roughly): $(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic mode) if this option is supported by $(AR). + ARCH_CPPFLAGS, ARCH_AFLAGS, ARCH_CFLAGS Overrides the kbuild defaults + + These variables are appended to the KBUILD_CPPFLAGS, + KBUILD_AFLAGS, and KBUILD_CFLAGS, respectively, after the + top-level Makefile has set any other flags. This provides a + means for an architecture to override the defaults. + + --- 6.2 Add prerequisites to archheaders: The archheaders: rule is used to generate header files that diff --git a/Makefile b/Makefile index 13270c0a9336..8c31fbc90589 100644 --- a/Makefile +++ b/Makefile @@ -780,10 +780,11 @@ endif include scripts/Makefile.kasan include scripts/Makefile.extrawarn -# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments -KBUILD_CPPFLAGS += $(KCPPFLAGS) -KBUILD_AFLAGS += $(KAFLAGS) -KBUILD_CFLAGS += $(KCFLAGS) +# Add any arch overrides and user supplied CPPFLAGS, AFLAGS and CFLAGS as the +# last assignments +KBUILD_CPPFLAGS += $(ARCH_CPPFLAGS) $(KCPPFLAGS) +KBUILD_AFLAGS += $(ARCH_AFLAGS) $(KAFLAGS) +KBUILD_CFLAGS += $(ARCH_CFLAGS) $(KCFLAGS) # Use --build-id when available. LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\ From 97709069214eb75312c14946803b9da4d3814203 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 18 Jun 2015 13:54:01 +0530 Subject: [PATCH 024/170] ARC: Override toplevel default -O2 with -O3 ARC kernels have historically been built with -O3, despite top level Makefile defaulting to -O2. This was facilitated by implicitly ordering of arch makefile include AFTER top level assigned -O2. An upstream fix to top level a1c48bb160f ("Makefile: Fix unrecognized cross-compiler command line options") changed the ordering, making ARC -O3 defunct. Fix that by NOT relying on any ordering whatsoever and use the proper arch override facility now present in kbuild (ARCH_*FLAGS) Depends-on: ("kbuild: Allow arch Makefiles to override {cpp,ld,c}flags") Suggested-by: Michal Marek Cc: Geert Uytterhoeven Cc: stable@vger.kernel.org # 3.16+ Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 6107062c0111..46d87310220d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -49,7 +49,8 @@ endif ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE # Generic build system uses -O2, we want -O3 -cflags-y += -O3 +# Note: No need to add to cflags-y as that happens anyways +ARCH_CFLAGS += -O3 endif # small data is default for elf32 tool-chain. If not usable, disable it From f718c2efff0b0460e5335607a1c6caf620847680 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 3 Jul 2015 10:40:43 +0530 Subject: [PATCH 025/170] ARC: Don't memzero twice in dma_alloc_coherent for __GFP_ZERO alloc_pages_exact() get gfp flags and handle zero'ing already And while it, fix the case where ioremap fails: return rightaway. Signed-off-by: Vineet Gupta --- arch/arc/mm/dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 74a637a1cfc4..57706a9c6948 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -60,8 +60,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size, /* This is kernel Virtual address (0x7000_0000 based) */ kvaddr = ioremap_nocache((unsigned long)paddr, size); - if (kvaddr != NULL) - memset(kvaddr, 0, size); + if (kvaddr == NULL) + return NULL; /* This is bus address, platform dependent */ *dma_handle = (dma_addr_t)paddr; From bccea41ec02301a439a26e43fa49521ae8da4352 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 6 Jul 2015 10:25:01 +0530 Subject: [PATCH 026/170] ARC: fix unused var wanring Fixes: 9bf39ab2adaf ("vfs: add file_path() helper") Signed-off-by: Vineet Gupta --- arch/arc/kernel/troubleshoot.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 807f7d61d7a7..a6f91e88ce36 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -58,7 +58,6 @@ static void show_callee_regs(struct callee_regs *cregs) static void print_task_path_n_nm(struct task_struct *tsk, char *buf) { - struct path path; char *path_nm = NULL; struct mm_struct *mm; struct file *exe_file; From 83ce3e6fcc16b4d36d40765618777b5a6a30d75b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 30 Jun 2015 13:37:28 +0530 Subject: [PATCH 027/170] ARCv2: intc: IDU: support irq affinity With this nsim standlone / OSCI have working irq affinity - AXS103 still needs some work as IDU is not visible in intc hierarchy yet ! Signed-off-by: Vineet Gupta --- arch/arc/kernel/mcip.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 30284e8de6ff..dfeea22048af 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -218,11 +218,28 @@ static void idu_irq_unmask(struct irq_data *data) raw_spin_unlock_irqrestore(&mcip_lock, flags); } +#ifdef CONFIG_SMP static int -idu_irq_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool f) +idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask, + bool force) { + unsigned long flags; + cpumask_t online; + + /* errout if no online cpu per @cpumask */ + if (!cpumask_and(&online, cpumask, cpu_online_mask)) + return -EINVAL; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + idu_set_dest(data->hwirq, cpumask_bits(&online)[0]); + idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); + return IRQ_SET_MASK_OK; } +#endif static struct irq_chip idu_irq_chip = { .name = "MCIP IDU Intc", From 6b12ec177c410ef984d2b97717df77c9269eaeac Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 2 Jul 2015 14:02:54 +0530 Subject: [PATCH 028/170] ARCv2: intc: IDU: Fix potential race in installing a chained IRQ handler Signed-off-by: Vineet Gupta --- arch/arc/kernel/mcip.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index dfeea22048af..6fb0a2fd1d28 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -347,8 +347,7 @@ idu_of_init(struct device_node *intc, struct device_node *parent) if (!i) idu_first_irq = irq; - irq_set_handler_data(irq, domain); - irq_set_chained_handler(irq, idu_cascade_isr); + irq_set_chained_handler_and_data(irq, idu_cascade_isr, domain); } __mcip_cmd(CMD_IDU_ENABLE, 0); From acb33cc541d7a5495b16a133702d4c401ea4e294 Mon Sep 17 00:00:00 2001 From: "Vutla, Lokesh" Date: Thu, 2 Jul 2015 18:33:28 +0530 Subject: [PATCH 029/170] crypto: omap-des - Fix unmapping of dma channels dma_unmap_sg() is being called twice after completing the task. Looks like this is a copy paste error when creating des driver. With this the following warn appears during boot: [ 4.210457] ------------[ cut here ]------------ [ 4.215114] WARNING: CPU: 0 PID: 0 at lib/dma-debug.c:1080 check_unmap+0x710/0x9a0() [ 4.222899] omap-des 480a5000.des: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x00000000ab2ce000] [size=8 bytes] [ 4.236785] Modules linked in: [ 4.239860] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.14.39-02999-g1bc045a-dirty #182 [ 4.247918] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 4.255710] [] (show_stack) from [] (dump_stack+0x84/0xb8) [ 4.262977] [] (dump_stack) from [] (warn_slowpath_common+0x68/0x8c) [ 4.271107] [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x30/0x40) [ 4.279854] [] (warn_slowpath_fmt) from [] (check_unmap+0x710/0x9a0) [ 4.287991] [] (check_unmap) from [] (debug_dma_unmap_sg+0x90/0x19c) [ 4.296128] [] (debug_dma_unmap_sg) from [] (omap_des_done_task+0x1cc/0x3e4) [ 4.304963] [] (omap_des_done_task) from [] (tasklet_action+0x84/0x124) [ 4.313370] [] (tasklet_action) from [] (__do_softirq+0xf0/0x20c) [ 4.321235] [] (__do_softirq) from [] (irq_exit+0x98/0xec) [ 4.328500] [] (irq_exit) from [] (handle_IRQ+0x50/0xb0) [ 4.335589] [] (handle_IRQ) from [] (gic_handle_irq+0x28/0x5c) Removing the duplicate call to dma_unmap_sg(). Cc: stable@vger.kernel.org Reported-by: Tomi Valkeinen Signed-off-by: Lokesh Vutla Signed-off-by: Herbert Xu --- drivers/crypto/omap-des.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 46307098f8ba..0a70e46d5416 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -536,9 +536,6 @@ static int omap_des_crypt_dma_stop(struct omap_des_dev *dd) dmaengine_terminate_all(dd->dma_lch_in); dmaengine_terminate_all(dd->dma_lch_out); - dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE); - dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, DMA_FROM_DEVICE); - return err; } From b78fb51b68884ba9b8541a2f1914e8b6d054474c Mon Sep 17 00:00:00 2001 From: "qipeng.zha" Date: Tue, 7 Jul 2015 00:04:45 +0800 Subject: [PATCH 030/170] intel_pmc_ipc: Fix compiler casting warnings Avoid casting variables to different sizes due to different compilers and settings. Reported-by: Fengguang Wu Signed-off-by: qipeng.zha Signed-off-by: Darren Hart --- drivers/platform/x86/intel_pmc_ipc.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index d734763dab69..69d43b8fe9ba 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -96,18 +96,18 @@ static struct intel_pmc_ipc_dev { struct completion cmd_complete; /* The following PMC BARs share the same ACPI device with the IPC */ - void *acpi_io_base; + resource_size_t acpi_io_base; int acpi_io_size; struct platform_device *tco_dev; /* gcr */ - void *gcr_base; + resource_size_t gcr_base; int gcr_size; /* punit */ - void *punit_base; + resource_size_t punit_base; int punit_size; - void *punit_base2; + resource_size_t punit_base2; int punit_size2; struct platform_device *punit_dev; } ipcdev; @@ -480,11 +480,11 @@ static int ipc_create_punit_device(void) pdev->dev.parent = ipcdev.dev; res = punit_res; - res->start = (resource_size_t)ipcdev.punit_base; + res->start = ipcdev.punit_base; res->end = res->start + ipcdev.punit_size - 1; res = punit_res + PUNIT_RESOURCE_INTER; - res->start = (resource_size_t)ipcdev.punit_base2; + res->start = ipcdev.punit_base2; res->end = res->start + ipcdev.punit_size2 - 1; ret = platform_device_add_resources(pdev, punit_res, @@ -522,15 +522,15 @@ static int ipc_create_tco_device(void) pdev->dev.parent = ipcdev.dev; res = tco_res + TCO_RESOURCE_ACPI_IO; - res->start = (resource_size_t)ipcdev.acpi_io_base + TCO_BASE_OFFSET; + res->start = ipcdev.acpi_io_base + TCO_BASE_OFFSET; res->end = res->start + TCO_REGS_SIZE - 1; res = tco_res + TCO_RESOURCE_SMI_EN_IO; - res->start = (resource_size_t)ipcdev.acpi_io_base + SMI_EN_OFFSET; + res->start = ipcdev.acpi_io_base + SMI_EN_OFFSET; res->end = res->start + SMI_EN_SIZE - 1; res = tco_res + TCO_RESOURCE_GCR_MEM; - res->start = (resource_size_t)ipcdev.gcr_base; + res->start = ipcdev.gcr_base; res->end = res->start + ipcdev.gcr_size - 1; ret = platform_device_add_resources(pdev, tco_res, ARRAY_SIZE(tco_res)); @@ -589,7 +589,7 @@ static int ipc_plat_get_res(struct platform_device *pdev) return -ENXIO; } size = resource_size(res); - ipcdev.acpi_io_base = (void *)res->start; + ipcdev.acpi_io_base = res->start; ipcdev.acpi_io_size = size; dev_info(&pdev->dev, "io res: %llx %x\n", (long long)res->start, (int)resource_size(res)); @@ -601,7 +601,7 @@ static int ipc_plat_get_res(struct platform_device *pdev) return -ENXIO; } size = resource_size(res); - ipcdev.punit_base = (void *)res->start; + ipcdev.punit_base = res->start; ipcdev.punit_size = size; dev_info(&pdev->dev, "punit data res: %llx %x\n", (long long)res->start, (int)resource_size(res)); @@ -613,7 +613,7 @@ static int ipc_plat_get_res(struct platform_device *pdev) return -ENXIO; } size = resource_size(res); - ipcdev.punit_base2 = (void *)res->start; + ipcdev.punit_base2 = res->start; ipcdev.punit_size2 = size; dev_info(&pdev->dev, "punit interface res: %llx %x\n", (long long)res->start, (int)resource_size(res)); @@ -637,7 +637,7 @@ static int ipc_plat_get_res(struct platform_device *pdev) } ipcdev.ipc_base = addr; - ipcdev.gcr_base = (void *)(res->start + size); + ipcdev.gcr_base = res->start + size; ipcdev.gcr_size = PLAT_RESOURCE_GCR_SIZE; dev_info(&pdev->dev, "ipc res: %llx %x\n", (long long)res->start, (int)resource_size(res)); From ced53f6d12e497ba210a518e7e76178da987f932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 6 Jul 2015 12:08:55 +0200 Subject: [PATCH 031/170] dell-laptop: Clear buffer before each SMBIOS call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that before initializing SMBIOS call, the input buffer does not contain any garbage (e.g. values from previous SMBIOS call). This fixes problems with passing undefined/random parameters to SMBIOS functions. Signed-off-by: Pali Rohár Signed-off-by: Darren Hart --- drivers/platform/x86/dell-laptop.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index ed317ccac4a2..85fbe7c247a4 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -311,10 +311,15 @@ static DEFINE_MUTEX(buffer_mutex); static int hwswitch_state; +static void clear_buffer(void) +{ + memset(buffer, 0, sizeof(struct calling_interface_buffer)); +} + static void get_buffer(void) { mutex_lock(&buffer_mutex); - memset(buffer, 0, sizeof(struct calling_interface_buffer)); + clear_buffer(); } static void release_buffer(void) @@ -558,6 +563,8 @@ static int dell_rfkill_set(void *data, bool blocked) !(buffer->output[1] & BIT(16))) disable = 1; + clear_buffer(); + buffer->input[0] = (1 | (radio<<8) | (disable << 16)); dell_send_request(buffer, 17, 11); @@ -572,6 +579,7 @@ static void dell_rfkill_update_sw_state(struct rfkill *rfkill, int radio, if (status & BIT(0)) { /* Has hw-switch, sync sw_state to BIOS */ int block = rfkill_blocked(rfkill); + clear_buffer(); buffer->input[0] = (1 | (radio << 8) | (block << 16)); dell_send_request(buffer, 17, 11); } else { @@ -774,6 +782,7 @@ static int __init dell_setup_rfkill(void) get_buffer(); dell_send_request(buffer, 17, 11); status = buffer->output[1]; + clear_buffer(); buffer->input[0] = 0x2; dell_send_request(buffer, 17, 11); hwswitch_state = buffer->output[1]; From 715d0cdd5a459908c2b2eb1cfa30d6f6d1d7d710 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 6 Jul 2015 12:08:56 +0200 Subject: [PATCH 032/170] dell-laptop: Check return value of each SMBIOS call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that return value of each SMBIOS call is properly checked and do not continue processing output if the call failed. Signed-off-by: Pali Rohár Signed-off-by: Darren Hart --- drivers/platform/x86/dell-laptop.c | 83 +++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 25 deletions(-) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 85fbe7c247a4..efcfc4916af2 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -553,9 +553,15 @@ static int dell_rfkill_set(void *data, bool blocked) int disable = blocked ? 1 : 0; unsigned long radio = (unsigned long)data; int hwswitch_bit = (unsigned long)data - 1; + int ret; get_buffer(); + dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; + + if (ret != 0) + goto out; /* If the hardware switch controls this radio, and the hardware switch is disabled, always disable the radio */ @@ -567,9 +573,11 @@ static int dell_rfkill_set(void *data, bool blocked) buffer->input[0] = (1 | (radio<<8) | (disable << 16)); dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; + out: release_buffer(); - return 0; + return dell_smi_error(ret); } /* Must be called with the buffer held */ @@ -598,14 +606,18 @@ static void dell_rfkill_update_hw_state(struct rfkill *rfkill, int radio, static void dell_rfkill_query(struct rfkill *rfkill, void *data) { int status; + int ret; get_buffer(); dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; status = buffer->output[1]; + release_buffer(); + + if (ret != 0) + return; dell_rfkill_update_hw_state(rfkill, (unsigned long)data, status); - - release_buffer(); } static const struct rfkill_ops dell_rfkill_ops = { @@ -618,12 +630,15 @@ static struct dentry *dell_laptop_dir; static int dell_debugfs_show(struct seq_file *s, void *data) { int status; + int ret; get_buffer(); dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; status = buffer->output[1]; release_buffer(); + seq_printf(s, "return:\t%d\n", ret); seq_printf(s, "status:\t0x%X\n", status); seq_printf(s, "Bit 0 : Hardware switch supported: %lu\n", status & BIT(0)); @@ -702,11 +717,17 @@ static const struct file_operations dell_debugfs_fops = { static void dell_update_rfkill(struct work_struct *ignored) { int status; + int ret; get_buffer(); + dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; status = buffer->output[1]; + if (ret != 0) + goto out; + if (wifi_rfkill) { dell_rfkill_update_hw_state(wifi_rfkill, 1, status); dell_rfkill_update_sw_state(wifi_rfkill, 1, status); @@ -720,6 +741,7 @@ static void dell_update_rfkill(struct work_struct *ignored) dell_rfkill_update_sw_state(wwan_rfkill, 3, status); } + out: release_buffer(); } static DECLARE_DELAYED_WORK(dell_rfkill_work, dell_update_rfkill); @@ -781,6 +803,7 @@ static int __init dell_setup_rfkill(void) get_buffer(); dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; status = buffer->output[1]; clear_buffer(); buffer->input[0] = 0x2; @@ -788,6 +811,10 @@ static int __init dell_setup_rfkill(void) hwswitch_state = buffer->output[1]; release_buffer(); + /* dell wireless info smbios call is not supported */ + if (ret != 0) + return 0; + if (!(status & BIT(0))) { if (force_rfkill) { /* No hwsitch, clear all hw-controlled bits */ @@ -941,47 +968,50 @@ static void dell_cleanup_rfkill(void) static int dell_send_intensity(struct backlight_device *bd) { - int ret = 0; + int token; + int ret; + + token = find_token_location(BRIGHTNESS_TOKEN); + if (token == -1) + return -ENODEV; get_buffer(); - buffer->input[0] = find_token_location(BRIGHTNESS_TOKEN); + buffer->input[0] = token; buffer->input[1] = bd->props.brightness; - if (buffer->input[0] == -1) { - ret = -ENODEV; - goto out; - } - if (power_supply_is_system_supplied() > 0) dell_send_request(buffer, 1, 2); else dell_send_request(buffer, 1, 1); - out: + ret = dell_smi_error(buffer->output[0]); + release_buffer(); return ret; } static int dell_get_intensity(struct backlight_device *bd) { - int ret = 0; + int token; + int ret; + + token = find_token_location(BRIGHTNESS_TOKEN); + if (token == -1) + return -ENODEV; get_buffer(); - buffer->input[0] = find_token_location(BRIGHTNESS_TOKEN); - - if (buffer->input[0] == -1) { - ret = -ENODEV; - goto out; - } + buffer->input[0] = token; if (power_supply_is_system_supplied() > 0) dell_send_request(buffer, 0, 2); else dell_send_request(buffer, 0, 1); - ret = buffer->output[1]; + if (buffer->output[0]) + ret = dell_smi_error(buffer->output[0]); + else + ret = buffer->output[1]; - out: release_buffer(); return ret; } @@ -2045,6 +2075,7 @@ static void kbd_led_exit(void) static int __init dell_init(void) { int max_intensity = 0; + int token; int ret; if (!dmi_check_system(dell_device_table)) @@ -2103,13 +2134,15 @@ static int __init dell_init(void) if (acpi_video_get_backlight_type() != acpi_backlight_vendor) return 0; - get_buffer(); - buffer->input[0] = find_token_location(BRIGHTNESS_TOKEN); - if (buffer->input[0] != -1) { + token = find_token_location(BRIGHTNESS_TOKEN); + if (token != -1) { + get_buffer(); + buffer->input[0] = token; dell_send_request(buffer, 0, 2); - max_intensity = buffer->output[3]; + if (buffer->output[0] == 0) + max_intensity = buffer->output[3]; + release_buffer(); } - release_buffer(); if (max_intensity) { struct backlight_properties props; From 22565ba0bf231eb4267b1d2ebad300d55b28a427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 6 Jul 2015 12:08:57 +0200 Subject: [PATCH 033/170] dell-laptop: Do not cache hwswitch state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hwswitch state can be changed at runtime, so make sure dell-laptop always knows the current state. It can be modified by the userspace utility smbios-wireless-ctl. Signed-off-by: Pali Rohár Signed-off-by: Darren Hart --- drivers/platform/x86/dell-laptop.c | 85 +++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 24 deletions(-) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index efcfc4916af2..aaeeae81e3a9 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -309,8 +309,6 @@ static const struct dmi_system_id dell_quirks[] __initconst = { static struct calling_interface_buffer *buffer; static DEFINE_MUTEX(buffer_mutex); -static int hwswitch_state; - static void clear_buffer(void) { memset(buffer, 0, sizeof(struct calling_interface_buffer)); @@ -553,20 +551,30 @@ static int dell_rfkill_set(void *data, bool blocked) int disable = blocked ? 1 : 0; unsigned long radio = (unsigned long)data; int hwswitch_bit = (unsigned long)data - 1; + int hwswitch; + int status; int ret; get_buffer(); dell_send_request(buffer, 17, 11); ret = buffer->output[0]; + status = buffer->output[1]; if (ret != 0) goto out; + clear_buffer(); + + buffer->input[0] = 0x2; + dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; + hwswitch = buffer->output[1]; + /* If the hardware switch controls this radio, and the hardware switch is disabled, always disable the radio */ - if ((hwswitch_state & BIT(hwswitch_bit)) && - !(buffer->output[1] & BIT(16))) + if (ret == 0 && (hwswitch & BIT(hwswitch_bit)) && + (status & BIT(0)) && !(status & BIT(16))) disable = 1; clear_buffer(); @@ -597,27 +605,43 @@ static void dell_rfkill_update_sw_state(struct rfkill *rfkill, int radio, } static void dell_rfkill_update_hw_state(struct rfkill *rfkill, int radio, - int status) + int status, int hwswitch) { - if (hwswitch_state & (BIT(radio - 1))) + if (hwswitch & (BIT(radio - 1))) rfkill_set_hw_state(rfkill, !(status & BIT(16))); } static void dell_rfkill_query(struct rfkill *rfkill, void *data) { + int radio = ((unsigned long)data & 0xF); + int hwswitch; int status; int ret; get_buffer(); + dell_send_request(buffer, 17, 11); ret = buffer->output[0]; status = buffer->output[1]; + + if (ret != 0 || !(status & BIT(0))) { + release_buffer(); + return; + } + + clear_buffer(); + + buffer->input[0] = 0x2; + dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; + hwswitch = buffer->output[1]; + release_buffer(); if (ret != 0) return; - dell_rfkill_update_hw_state(rfkill, (unsigned long)data, status); + dell_rfkill_update_hw_state(rfkill, radio, status, hwswitch); } static const struct rfkill_ops dell_rfkill_ops = { @@ -629,13 +653,24 @@ static struct dentry *dell_laptop_dir; static int dell_debugfs_show(struct seq_file *s, void *data) { + int hwswitch_state; + int hwswitch_ret; int status; int ret; get_buffer(); + dell_send_request(buffer, 17, 11); ret = buffer->output[0]; status = buffer->output[1]; + + clear_buffer(); + + buffer->input[0] = 0x2; + dell_send_request(buffer, 17, 11); + hwswitch_ret = buffer->output[0]; + hwswitch_state = buffer->output[1]; + release_buffer(); seq_printf(s, "return:\t%d\n", ret); @@ -680,7 +715,8 @@ static int dell_debugfs_show(struct seq_file *s, void *data) seq_printf(s, "Bit 21: WiGig is blocked: %lu\n", (status & BIT(21)) >> 21); - seq_printf(s, "\nhwswitch_state:\t0x%X\n", hwswitch_state); + seq_printf(s, "\nhwswitch_return:\t%d\n", hwswitch_ret); + seq_printf(s, "hwswitch_state:\t0x%X\n", hwswitch_state); seq_printf(s, "Bit 0 : Wifi controlled by switch: %lu\n", hwswitch_state & BIT(0)); seq_printf(s, "Bit 1 : Bluetooth controlled by switch: %lu\n", @@ -716,6 +752,7 @@ static const struct file_operations dell_debugfs_fops = { static void dell_update_rfkill(struct work_struct *ignored) { + int hwswitch = 0; int status; int ret; @@ -728,16 +765,26 @@ static void dell_update_rfkill(struct work_struct *ignored) if (ret != 0) goto out; + clear_buffer(); + + buffer->input[0] = 0x2; + dell_send_request(buffer, 17, 11); + ret = buffer->output[0]; + + if (ret == 0 && (status & BIT(0))) + hwswitch = buffer->output[1]; + if (wifi_rfkill) { - dell_rfkill_update_hw_state(wifi_rfkill, 1, status); + dell_rfkill_update_hw_state(wifi_rfkill, 1, status, hwswitch); dell_rfkill_update_sw_state(wifi_rfkill, 1, status); } if (bluetooth_rfkill) { - dell_rfkill_update_hw_state(bluetooth_rfkill, 2, status); + dell_rfkill_update_hw_state(bluetooth_rfkill, 2, status, + hwswitch); dell_rfkill_update_sw_state(bluetooth_rfkill, 2, status); } if (wwan_rfkill) { - dell_rfkill_update_hw_state(wwan_rfkill, 3, status); + dell_rfkill_update_hw_state(wwan_rfkill, 3, status, hwswitch); dell_rfkill_update_sw_state(wwan_rfkill, 3, status); } @@ -805,25 +852,15 @@ static int __init dell_setup_rfkill(void) dell_send_request(buffer, 17, 11); ret = buffer->output[0]; status = buffer->output[1]; - clear_buffer(); - buffer->input[0] = 0x2; - dell_send_request(buffer, 17, 11); - hwswitch_state = buffer->output[1]; release_buffer(); /* dell wireless info smbios call is not supported */ if (ret != 0) return 0; - if (!(status & BIT(0))) { - if (force_rfkill) { - /* No hwsitch, clear all hw-controlled bits */ - hwswitch_state &= ~7; - } else { - /* rfkill is only tested on laptops with a hwswitch */ - return 0; - } - } + /* rfkill is only tested on laptops with a hwswitch */ + if (!(status & BIT(0)) && !force_rfkill) + return 0; if ((status & (1<<2|1<<8)) == (1<<2|1<<8)) { wifi_rfkill = rfkill_alloc("dell-wifi", &platform_device->dev, From f9c87a6f46d508eae0d9ae640be98d50f237f827 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Jul 2015 17:58:19 +0200 Subject: [PATCH 034/170] s390/sclp: clear upper register halves in _sclp_print_early If the kernel is compiled with gcc 5.1 and the XZ compression option the decompress_kernel function calls _sclp_print_early in 64-bit mode while the content of the upper register half of %r6 is non-zero. This causes a specification exception on the servc instruction in _sclp_servc. The _sclp_print_early function saves and restores the upper registers halves but it fails to clear them for the 31-bit code of the mini sclp driver. Cc: Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sclp.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S index 43c3169ea49c..ada0c07fe1a8 100644 --- a/arch/s390/kernel/sclp.S +++ b/arch/s390/kernel/sclp.S @@ -270,6 +270,8 @@ ENTRY(_sclp_print_early) jno .Lesa2 ahi %r15,-80 stmh %r6,%r15,96(%r15) # store upper register halves + basr %r13,0 + lmh %r0,%r15,.Lzeroes-.(%r13) # clear upper register halves .Lesa2: lr %r10,%r2 # save string pointer lhi %r2,0 @@ -291,6 +293,8 @@ ENTRY(_sclp_print_early) .Lesa3: lm %r6,%r15,120(%r15) # restore registers br %r14 +.Lzeroes: + .fill 64,4,0 .LwritedataS4: .long 0x00760005 # SCLP command for write data From bb8bd38b9a1685334b73e8c62e128cbedb875867 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 1 Jul 2015 12:57:40 -0400 Subject: [PATCH 035/170] bio integrity: do not assume bio_integrity_pool exists if bioset exists bio_integrity_alloc() and bio_integrity_free() assume that if a bio was allocated from a bioset that that bioset also had its bio_integrity_pool allocated using bioset_integrity_create(). This is a very bad assumption given that bioset_create() and bioset_integrity_create() are completely disjoint. Not all callers of bioset_create() have been trained to also call bioset_integrity_create() -- and they may not care to be. Fix this by falling back to kmalloc'ing 'struct bio_integrity_payload' rather than force all bioset consumers to (wastefully) preallocate a bio_integrity_pool that they very likely won't actually need (given the niche nature of the current block integrity support). Otherwise, a NULL pointer "Kernel BUG" with a trace like the following will be observed (as seen on s390x using zfcp storage) because dm-io doesn't use bioset_integrity_create() when creating its bioset: [ 791.643338] Call Trace: [ 791.643339] ([<00000003df98b848>] 0x3df98b848) [ 791.643341] [<00000000002c5de8>] bio_integrity_alloc+0x48/0xf8 [ 791.643348] [<00000000002c6486>] bio_integrity_prep+0xae/0x2f0 [ 791.643349] [<0000000000371e38>] blk_queue_bio+0x1c8/0x3d8 [ 791.643355] [<000000000036f8d0>] generic_make_request+0xc0/0x100 [ 791.643357] [<000000000036f9b2>] submit_bio+0xa2/0x198 [ 791.643406] [<000003ff801f9774>] dispatch_io+0x15c/0x3b0 [dm_mod] [ 791.643419] [<000003ff801f9b3e>] dm_io+0x176/0x2f0 [dm_mod] [ 791.643423] [<000003ff8074b28a>] do_reads+0x13a/0x1a8 [dm_mirror] [ 791.643425] [<000003ff8074b43a>] do_mirror+0x142/0x298 [dm_mirror] [ 791.643428] [<0000000000154fca>] process_one_work+0x18a/0x3f8 [ 791.643432] [<000000000015598a>] worker_thread+0x132/0x3b0 [ 791.643435] [<000000000015d49a>] kthread+0xd2/0xd8 [ 791.643438] [<00000000005bc0ca>] kernel_thread_starter+0x6/0xc [ 791.643446] [<00000000005bc0c4>] kernel_thread_starter+0x0/0xc Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- block/bio-integrity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 0436c21db7f2..719b7152aed1 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -51,7 +51,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, unsigned long idx = BIO_POOL_NONE; unsigned inline_vecs; - if (!bs) { + if (!bs || !bs->bio_integrity_pool) { bip = kmalloc(sizeof(struct bio_integrity_payload) + sizeof(struct bio_vec) * nr_vecs, gfp_mask); inline_vecs = nr_vecs; @@ -104,7 +104,7 @@ void bio_integrity_free(struct bio *bio) kfree(page_address(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset); - if (bs) { + if (bs && bs->bio_integrity_pool) { if (bip->bip_slab != BIO_POOL_NONE) bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab); From 0762b23d23c1f23beab91a3af0fa89749b75f03c Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Tue, 7 Jul 2015 12:41:07 +0530 Subject: [PATCH 036/170] block: use FIELD_SIZEOF to calculate size of a field use FIELD_SIZEOF instead of open coding Signed-off-by: Maninder Singh Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 82819e68f58b..627ed0c593fb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3370,7 +3370,7 @@ EXPORT_SYMBOL(blk_post_runtime_resume); int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * - sizeof(((struct request *)0)->cmd_flags)); + FIELD_SIZEOF(struct request, cmd_flags)); /* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", From a322baad1003798312741b0cb97bd2c7511ccf61 Mon Sep 17 00:00:00 2001 From: Arianna Avanzini Date: Tue, 7 Jul 2015 03:08:15 +0200 Subject: [PATCH 037/170] block/blk-cgroup.c: free per-blkcg data when freeing the blkcg Currently, per-blkcg data is freed each time a policy is deactivated, that is also upon scheduler switch. However, when switching from a scheduler implementing a policy which requires per-blkcg data to another one, that same policy might be active on other devices, and therefore those same per-blkcg data could be still in use. This commit lets per-blkcg data be freed when the blkcg is freed instead of on policy deactivation. Signed-off-by: Arianna Avanzini Reported-and-tested-by: Michael Kaminsky Fixes: e48453c3 ("block, cgroup: implement policy-specific per-blkcg data") Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 9f97da52d006..5e2723f2c6a3 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -822,8 +822,13 @@ static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); - if (blkcg != &blkcg_root) + if (blkcg != &blkcg_root) { + int i; + + for (i = 0; i < BLKCG_MAX_POLS; i++) + kfree(blkcg->pd[i]); kfree(blkcg); + } } static struct cgroup_subsys_state * @@ -1162,8 +1167,6 @@ void blkcg_deactivate_policy(struct request_queue *q, kfree(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; - kfree(blkg->blkcg->pd[pol->plid]); - blkg->blkcg->pd[pol->plid] = NULL; spin_unlock(&blkg->blkcg->lock); } From 030f4e968741d65aea9cd5f7814d1164967801ef Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 7 Jul 2015 17:30:25 +0800 Subject: [PATCH 038/170] crypto: nx - Fix reentrancy bugs This patch fixes a host of reentrancy bugs in the nx driver. The following algorithms are affected: * CCM * GCM * CTR * XCBC * SHA256 * SHA512 The crypto API allows a single transform to be used by multiple threads simultaneously. For example, IPsec will use a single tfm to process packets for a given SA. As packets may arrive on multiple CPUs that tfm must be reentrant. The nx driver does try to deal with this by using a spin lock. Unfortunately only the basic AES/CBC/ECB algorithms do this in the correct way. The symptom of these bugs may range from the generation of incorrect output to memory corruption. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx-aes-ccm.c | 6 ++- drivers/crypto/nx/nx-aes-ctr.c | 7 ++-- drivers/crypto/nx/nx-aes-gcm.c | 17 ++++---- drivers/crypto/nx/nx-aes-xcbc.c | 70 +++++++++++++++++++++------------ drivers/crypto/nx/nx-sha256.c | 43 +++++++++++--------- drivers/crypto/nx/nx-sha512.c | 44 ++++++++++++--------- drivers/crypto/nx/nx.c | 3 ++ drivers/crypto/nx/nx.h | 14 +++++-- 8 files changed, 125 insertions(+), 79 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c index 67f80813a06f..e4311ce0cd78 100644 --- a/drivers/crypto/nx/nx-aes-ccm.c +++ b/drivers/crypto/nx/nx-aes-ccm.c @@ -494,8 +494,9 @@ out: static int ccm4309_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); @@ -525,8 +526,9 @@ static int ccm_aes_nx_encrypt(struct aead_request *req) static int ccm4309_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 2617cd4d54dd..dd7e9f3f5b6b 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -72,7 +72,7 @@ static int ctr3686_aes_nx_set_key(struct crypto_tfm *tfm, if (key_len < CTR_RFC3686_NONCE_SIZE) return -EINVAL; - memcpy(nx_ctx->priv.ctr.iv, + memcpy(nx_ctx->priv.ctr.nonce, in_key + key_len - CTR_RFC3686_NONCE_SIZE, CTR_RFC3686_NONCE_SIZE); @@ -131,14 +131,15 @@ static int ctr3686_aes_nx_crypt(struct blkcipher_desc *desc, unsigned int nbytes) { struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *iv = nx_ctx->priv.ctr.iv; + u8 iv[16]; + memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_IV_SIZE); memcpy(iv + CTR_RFC3686_NONCE_SIZE, desc->info, CTR_RFC3686_IV_SIZE); iv[12] = iv[13] = iv[14] = 0; iv[15] = 1; - desc->info = nx_ctx->priv.ctr.iv; + desc->info = iv; return ctr_aes_nx_crypt(desc, dst, src, nbytes); } diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 08ac6d48688c..92c993f08213 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -317,6 +317,7 @@ out: static int gcm_aes_nx_crypt(struct aead_request *req, int enc) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct blkcipher_desc desc; unsigned int nbytes = req->cryptlen; @@ -326,7 +327,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) spin_lock_irqsave(&nx_ctx->lock, irq_flags); - desc.info = nx_ctx->priv.gcm.iv; + desc.info = rctx->iv; /* initialize the counter */ *(u32 *)(desc.info + NX_GCM_CTR_OFFSET) = 1; @@ -424,8 +425,8 @@ out: static int gcm_aes_nx_encrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -434,8 +435,8 @@ static int gcm_aes_nx_encrypt(struct aead_request *req) static int gcm_aes_nx_decrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -445,7 +446,8 @@ static int gcm_aes_nx_decrypt(struct aead_request *req) static int gcm4106_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); @@ -457,7 +459,8 @@ static int gcm4106_aes_nx_encrypt(struct aead_request *req) static int gcm4106_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c index 8c2faffab4a3..c2f7d4befb55 100644 --- a/drivers/crypto/nx/nx-aes-xcbc.c +++ b/drivers/crypto/nx/nx-aes-xcbc.c @@ -42,6 +42,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, unsigned int key_len) { struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc); + struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; switch (key_len) { case AES_KEYSIZE_128: @@ -51,7 +52,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, return -EINVAL; } - memcpy(nx_ctx->priv.xcbc.key, in_key, key_len); + memcpy(csbcpb->cpb.aes_xcbc.key, in_key, key_len); return 0; } @@ -148,32 +149,29 @@ out: return rc; } -static int nx_xcbc_init(struct shash_desc *desc) +static int nx_crypto_ctx_aes_xcbc_init2(struct crypto_tfm *tfm) { - struct xcbc_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; - struct nx_sg *out_sg; - int len; + int err; + + err = nx_crypto_ctx_aes_xcbc_init(tfm); + if (err) + return err; nx_ctx_init(nx_ctx, HCOP_FC_AES); - memset(sctx, 0, sizeof *sctx); - NX_CPB_SET_KEY_SIZE(csbcpb, NX_KS_AES_128); csbcpb->cpb.hdr.mode = NX_MODE_AES_XCBC_MAC; - memcpy(csbcpb->cpb.aes_xcbc.key, nx_ctx->priv.xcbc.key, AES_BLOCK_SIZE); - memset(nx_ctx->priv.xcbc.key, 0, sizeof *nx_ctx->priv.xcbc.key); + return 0; +} - len = AES_BLOCK_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, nx_ctx->ap->sglen); +static int nx_xcbc_init(struct shash_desc *desc) +{ + struct xcbc_state *sctx = shash_desc_ctx(desc); - if (len != AES_BLOCK_SIZE) - return -EINVAL; - - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + memset(sctx, 0, sizeof *sctx); return 0; } @@ -186,6 +184,7 @@ static int nx_xcbc_update(struct shash_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u32 to_process = 0, leftover, total; unsigned int max_sg_len; unsigned long irq_flags; @@ -213,6 +212,17 @@ static int nx_xcbc_update(struct shash_desc *desc, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = AES_BLOCK_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, nx_ctx->ap->sglen); + + if (data_len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } + + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + do { to_process = total - to_process; to_process = to_process & ~(AES_BLOCK_SIZE - 1); @@ -235,8 +245,10 @@ static int nx_xcbc_update(struct shash_desc *desc, (u8 *) sctx->buffer, &data_len, max_sg_len); - if (data_len != sctx->count) - return -EINVAL; + if (data_len != sctx->count) { + rc = -EINVAL; + goto out; + } } data_len = to_process - sctx->count; @@ -245,8 +257,10 @@ static int nx_xcbc_update(struct shash_desc *desc, &data_len, max_sg_len); - if (data_len != to_process - sctx->count) - return -EINVAL; + if (data_len != to_process - sctx->count) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); @@ -325,15 +339,19 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out) in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)sctx->buffer, &len, nx_ctx->ap->sglen); - if (len != sctx->count) - return -EINVAL; + if (len != sctx->count) { + rc = -EINVAL; + goto out; + } len = AES_BLOCK_SIZE; out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, nx_ctx->ap->sglen); - if (len != AES_BLOCK_SIZE) - return -EINVAL; + if (len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); @@ -372,7 +390,7 @@ struct shash_alg nx_shash_aes_xcbc_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_aes_xcbc_init, + .cra_init = nx_crypto_ctx_aes_xcbc_init2, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 4e91bdb83c59..08f8d5cd6334 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -29,34 +29,28 @@ #include "nx.h" -static int nx_sha256_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm) { - struct sha256_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - struct nx_sg *out_sg; - int len; - u32 max_sg_len; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; + + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; nx_ctx_init(nx_ctx, HCOP_FC_SHA); - memset(sctx, 0, sizeof *sctx); - nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA256]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA256); - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); + return 0; +} - len = SHA256_DIGEST_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, max_sg_len); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); +static int nx_sha256_init(struct shash_desc *desc) { + struct sha256_state *sctx = shash_desc_ctx(desc); - if (len != SHA256_DIGEST_SIZE) - return -EINVAL; + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be32(SHA256_H0); sctx->state[1] = __cpu_to_be32(SHA256_H1); @@ -78,6 +72,7 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; int rc = 0; @@ -108,6 +103,16 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = SHA256_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA256_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { /* * to_process: the SHA256_BLOCK_SIZE data chunk to process in @@ -282,7 +287,7 @@ struct shash_alg nx_shash_sha256_alg = { .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha256_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index e6a58d2ee628..aff0fe58eac0 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -28,34 +28,29 @@ #include "nx.h" -static int nx_sha512_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm) { - struct sha512_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - struct nx_sg *out_sg; - int len; - u32 max_sg_len; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; + + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; nx_ctx_init(nx_ctx, HCOP_FC_SHA); - memset(sctx, 0, sizeof *sctx); - nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA512]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA512); - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); + return 0; +} - len = SHA512_DIGEST_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, max_sg_len); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); +static int nx_sha512_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); - if (len != SHA512_DIGEST_SIZE) - return -EINVAL; + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be64(SHA512_H0); sctx->state[1] = __cpu_to_be64(SHA512_H1); @@ -77,6 +72,7 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; int rc = 0; @@ -107,6 +103,16 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = SHA512_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA512_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { /* * to_process: the SHA512_BLOCK_SIZE data chunk to process in @@ -288,7 +294,7 @@ struct shash_alg nx_shash_sha512_alg = { .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha512_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index f6198f29a4a8..436971343ff7 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -713,12 +713,15 @@ static int nx_crypto_ctx_init(struct nx_crypto_ctx *nx_ctx, u32 fc, u32 mode) /* entry points from the crypto tfm initializers */ int nx_crypto_ctx_aes_ccm_init(struct crypto_tfm *tfm) { + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct nx_ccm_rctx)); return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES, NX_MODE_AES_CCM); } int nx_crypto_ctx_aes_gcm_init(struct crypto_aead *tfm) { + crypto_aead_set_reqsize(tfm, sizeof(struct nx_gcm_rctx)); return nx_crypto_ctx_init(crypto_aead_ctx(tfm), NX_FC_AES, NX_MODE_AES_GCM); } diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index de3ea8738146..cdff03a42ae7 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -2,6 +2,8 @@ #ifndef __NX_H__ #define __NX_H__ +#include + #define NX_NAME "nx-crypto" #define NX_STRING "IBM Power7+ Nest Accelerator Crypto Driver" #define NX_VERSION "1.0" @@ -91,8 +93,11 @@ struct nx_crypto_driver { #define NX_GCM4106_NONCE_LEN (4) #define NX_GCM_CTR_OFFSET (12) -struct nx_gcm_priv { +struct nx_gcm_rctx { u8 iv[16]; +}; + +struct nx_gcm_priv { u8 iauth_tag[16]; u8 nonce[NX_GCM4106_NONCE_LEN]; }; @@ -100,8 +105,11 @@ struct nx_gcm_priv { #define NX_CCM_AES_KEY_LEN (16) #define NX_CCM4309_AES_KEY_LEN (19) #define NX_CCM4309_NONCE_LEN (3) -struct nx_ccm_priv { +struct nx_ccm_rctx { u8 iv[16]; +}; + +struct nx_ccm_priv { u8 b0[16]; u8 iauth_tag[16]; u8 oauth_tag[16]; @@ -113,7 +121,7 @@ struct nx_xcbc_priv { }; struct nx_ctr_priv { - u8 iv[16]; + u8 nonce[CTR_RFC3686_NONCE_SIZE]; }; struct nx_crypto_ctx { From 91c269a0d3eadd63f7112411ee812fbc170dc488 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 6 Jul 2015 20:55:35 +0200 Subject: [PATCH 039/170] MAINTAINER: add bridge netfilter So scripts/get_maintainer.pl shows the Netfilter mailing lists. Reported-by: Julien Grall Signed-off-by: Pablo Neira Ayuso --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 993d4cfd5aa0..8183b4659a4d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6996,6 +6996,7 @@ F: include/uapi/linux/netfilter/ F: net/*/netfilter.c F: net/*/netfilter/ F: net/netfilter/ +F: net/bridge/br_netfilter*.c NETLABEL M: Paul Moore From 86e8971800381c3a8d8d9327f83b1f97ccb04a4f Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Tue, 7 Jul 2015 15:55:21 +0100 Subject: [PATCH 040/170] netfilter: bridge: Use __in6_dev_get rather than in6_dev_get in br_validate_ipv6 The commit efb6de9b4ba0092b2c55f6a52d16294a8a698edd "netfilter: bridge: forward IPv6 fragmented packets" introduced a new function br_validate_ipv6 which take a reference on the inet6 device. Although, the reference is not released at the end. This will result to the impossibility to destroy any netdevice using ipv6 and bridge. It's possible to directly retrieve the inet6 device without taking a reference as all netfilter hooks are protected by rcu_read_lock via nf_hook_slow. Spotted while trying to destroy a Xen guest on the upstream Linux: "unregister_netdevice: waiting for vif1.0 to become free. Usage count = 1" Signed-off-by: Julien Grall Cc: Bernhard Thaler Cc: Pablo Neira Ayuso Cc: fw@strlen.de Cc: ian.campbell@citrix.com Cc: wei.liu2@citrix.com Cc: Bob Liu Acked-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 6d12d2675c80..13b7d1e3d185 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -104,7 +104,7 @@ int br_validate_ipv6(struct sk_buff *skb) { const struct ipv6hdr *hdr; struct net_device *dev = skb->dev; - struct inet6_dev *idev = in6_dev_get(skb->dev); + struct inet6_dev *idev = __in6_dev_get(skb->dev); u32 pkt_len; u8 ip6h_len = sizeof(struct ipv6hdr); From 6224beb12e190ff11f3c7d4bf50cb2922878f600 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 7 Jul 2015 15:05:03 -0400 Subject: [PATCH 041/170] tracing: Have branch tracer use recursive field of task struct Fengguang Wu's tests triggered a bug in the branch tracer's start up test when CONFIG_DEBUG_PREEMPT set. This was because that config adds some debug logic in the per cpu field, which calls back into the branch tracer. The branch tracer has its own recursive checks, but uses a per cpu variable to implement it. If retrieving the per cpu variable calls back into the branch tracer, you can see how things will break. Instead of using a per cpu variable, use the trace_recursion field of the current task struct. Simply set a bit when entering the branch tracing and clear it when leaving. If the bit is set on entry, just don't do the tracing. There's also the case with lockdep, as the local_irq_save() called before the recursion can also trigger code that can call back into the function. Changing that to a raw_local_irq_save() will protect that as well. This prevents the recursion and the inevitable crash that follows. Link: http://lkml.kernel.org/r/20150630141803.GA28071@wfg-t540p.sh.intel.com Cc: stable@vger.kernel.org # 3.10+ Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 + kernel/trace/trace_branch.c | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f060716b02ae..74bde81601a9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -444,6 +444,7 @@ enum { TRACE_CONTROL_BIT, + TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index a87b43f49eb4..e2e12ad3186f 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -36,9 +36,12 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) struct trace_branch *entry; struct ring_buffer *buffer; unsigned long flags; - int cpu, pc; + int pc; const char *p; + if (current->trace_recursion & TRACE_BRANCH_BIT) + return; + /* * I would love to save just the ftrace_likely_data pointer, but * this code can also be used by modules. Ugly things can happen @@ -49,10 +52,10 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (unlikely(!tr)) return; - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); - if (atomic_inc_return(&data->disabled) != 1) + raw_local_irq_save(flags); + current->trace_recursion |= TRACE_BRANCH_BIT; + data = this_cpu_ptr(tr->trace_buffer.data); + if (atomic_read(&data->disabled)) goto out; pc = preempt_count(); @@ -81,8 +84,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) __buffer_unlock_commit(buffer, event); out: - atomic_dec(&data->disabled); - local_irq_restore(flags); + current->trace_recursion &= ~TRACE_BRANCH_BIT; + raw_local_irq_restore(flags); } static inline From 32f675bbc9be14a40d972820e190ac56341ce198 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jul 2015 15:57:19 +0200 Subject: [PATCH 042/170] net_sched: gen_estimator: extend pps limit rate estimators are limited to 4 Mpps, which was fine years ago, but too small with current hardware generation. Lets use 2^5 scaling instead of 2^10 to get 128 Mpps new limit. On 64bit arch, use an "unsigned long" for temp storage and remove limit. (We do not expect 32bit arches to be able to reach this point) Tested: tc -s -d filter sh dev eth0 parent ffff: filter protocol ip pref 1 u32 filter protocol ip pref 1 u32 fh 800: ht divisor 1 filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15 match 07000000/ff000000 at 12 action order 1: gact action drop random type none pass val 0 index 1 ref 1 bind 1 installed 166 sec Action statistics: Sent 39734251496 bytes 863788076 pkt (dropped 863788117, overlimits 0 requeues 0) rate 4067Mbit 11053596pps backlog 0b 0p requeues 0 Signed-off-by: Eric Dumazet Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9dfb88a933e7..92d886f4adcb 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,7 +66,7 @@ NOTES. - * avbps is scaled by 2^5, avpps is scaled by 2^10. + * avbps and avpps are scaled by 2^5. * both values are reported as 32 bit unsigned values. bps can overflow for fast links : max speed being 34360Mbit/sec * Minimal interval is HZ/4=250msec (it is the greatest common divisor @@ -85,10 +85,10 @@ struct gen_estimator struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; int ewma_log; + u32 last_packets; + unsigned long avpps; u64 last_bytes; u64 avbps; - u32 last_packets; - u32 avpps; struct rcu_head e_rcu; struct rb_node node; struct gnet_stats_basic_cpu __percpu *cpu_bstats; @@ -118,8 +118,8 @@ static void est_timer(unsigned long arg) rcu_read_lock(); list_for_each_entry_rcu(e, &elist[idx].list, list) { struct gnet_stats_basic_packed b = {0}; + unsigned long rate; u64 brate; - u32 rate; spin_lock(e->stats_lock); read_lock(&est_lock); @@ -133,10 +133,11 @@ static void est_timer(unsigned long arg) e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log); e->rate_est->bps = (e->avbps+0xF)>>5; - rate = (b.packets - e->last_packets)<<(12 - idx); + rate = b.packets - e->last_packets; + rate <<= (7 - idx); e->last_packets = b.packets; e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); - e->rate_est->pps = (e->avpps+0x1FF)>>10; + e->rate_est->pps = (e->avpps + 0xF) >> 5; skip: read_unlock(&est_lock); spin_unlock(e->stats_lock); From aa43c5ff737dac5dcfccd12d23abeefbdf0579c7 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sat, 4 Jul 2015 11:22:30 +0200 Subject: [PATCH 043/170] NET: hamradio: Fix IP over bpq encapsulation. Since 1d5da757da860a6916adbf68b09e868062b4b3b8 (ax25: Stop using magic neighbour cache operations.) any attempt to transmit IP packets over a bpqether device will result in a message like "Dead loop on virtual device bpq0, fix it urgently!" Fix suggested by Eric W. Biederman . Signed-off-by: Ralf Baechle Cc: # 4.1 Signed-off-by: David S. Miller --- drivers/net/hamradio/bpqether.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 7856b6ccf5c5..d95a50ae996d 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -482,6 +482,7 @@ static void bpq_setup(struct net_device *dev) memcpy(dev->dev_addr, &ax25_defaddr, AX25_ADDR_LEN); dev->flags = 0; + dev->features = NETIF_F_LLTX; /* Allow recursion */ #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) dev->header_ops = &ax25_header_ops; From cfa520056407acaa29f3aa7aee10c929b371c7cf Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sun, 5 Jul 2015 12:16:27 -0500 Subject: [PATCH 044/170] net: phy: add dependency on HAS_IOMEM to MDIO_BUS_MUX_MMIOREG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On UML builds, mdio-mux-mmioreg.c fails to compile: drivers/net/phy/mdio-mux-mmioreg.c:50:3: error: implicit declaration of function ‘ioremap’ [-Werror=implicit-function-declaration] drivers/net/phy/mdio-mux-mmioreg.c:63:3: error: implicit declaration of function ‘iounmap’ [-Werror=implicit-function-declaration] This is due to CONFIG_OF now being user selectable. Add a dependency on HAS_IOMEM to fix this. Signed-off-by: Rob Herring Cc: Florian Fainelli Cc: David S. Miller Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index cf18940f4e84..cb86d7a01542 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -191,7 +191,7 @@ config MDIO_BUS_MUX_GPIO config MDIO_BUS_MUX_MMIOREG tristate "Support for MMIO device-controlled MDIO bus multiplexers" - depends on OF_MDIO + depends on OF_MDIO && HAS_IOMEM select MDIO_BUS_MUX help This module provides a driver for MDIO bus multiplexers that From f7e2965db17dd3b60f05fad88e7afc79ea75b48f Mon Sep 17 00:00:00 2001 From: Satish Ashok Date: Mon, 6 Jul 2015 05:53:35 -0700 Subject: [PATCH 045/170] bridge: mdb: start delete timer for temp static entries Start the delete timer when adding temp static entries so they can expire. Signed-off-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Fixes: ccb1c31a7a87 ("bridge: add flags to distinguish permanent mdb entires") Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index e29ad70b3000..3bfc675cf0d1 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -323,6 +323,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; struct net_bridge_mdb_htable *mdb; + unsigned long now = jiffies; int err; mdb = mlock_dereference(br->mdb, br); @@ -347,6 +348,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); + if (state == MDB_TEMPORARY) + mod_timer(&p->timer, now + br->multicast_membership_interval); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); return 0; From 142b942a75cb10ede1b42bf85368d41449ab4e3b Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Mon, 6 Jul 2015 15:51:20 +0200 Subject: [PATCH 046/170] rhashtable: fix for resize events during table walk If rhashtable_walk_next detects a resize operation in progress, it jumps to the new table and continues walking that one. But it misses to drop the reference to it's current item, leading it to continue traversing the new table's bucket in which the current item is sorted into, and after reaching that bucket's end continues traversing the new table's second bucket instead of the first one, thereby potentially missing items. This fixes the rhashtable runtime test for me. Bug probably introduced by Herbert Xu's patch eddee5ba ("rhashtable: Fix walker behaviour during rehash") although not explicitly tested. Fixes: eddee5ba ("rhashtable: Fix walker behaviour during rehash") Signed-off-by: Phil Sutter Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/rhashtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a60a6d335a91..cc0c69710dcf 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -610,6 +610,8 @@ next: iter->skip = 0; } + iter->p = NULL; + /* Ensure we see any new tables. */ smp_rmb(); @@ -620,8 +622,6 @@ next: return ERR_PTR(-EAGAIN); } - iter->p = NULL; - return NULL; } EXPORT_SYMBOL_GPL(rhashtable_walk_next); From d339727c2b1a10f25e6636670ab6e1841170e328 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 17:13:26 +0200 Subject: [PATCH 047/170] net: graceful exit from netif_alloc_netdev_queues() User space can crash kernel with ip link add ifb10 numtxqueues 100000 type ifb We must replace a BUG_ON() by proper test and return -EINVAL for crazy values. Fixes: 60877a32bce00 ("net: allow large number of tx queues") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6778a9999d52..0ad626214332 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6409,7 +6409,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev) struct netdev_queue *tx; size_t sz = count * sizeof(*tx); - BUG_ON(count < 1 || count > 0xffff); + if (count < 1 || count > 0xffff) + return -EINVAL; tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!tx) { From 95ec655bc465ccb2a3329d4aff9a45e3c8188db5 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 6 Jul 2015 17:25:10 +0200 Subject: [PATCH 048/170] Revert "dev: set iflink to 0 for virtual interfaces" This reverts commit e1622baf54df8cc958bf29d71de5ad545ea7d93c. The side effect of this commit is to add a '@NONE' after each virtual interface name with a 'ip link'. It may break existing scripts. Reported-by: Olivier Hartkopp Signed-off-by: Nicolas Dichtel Tested-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/core/dev.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 0ad626214332..1e57efda055b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -677,10 +677,6 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - /* If dev->rtnl_link_ops is set, it's a virtual interface. */ - if (dev->rtnl_link_ops) - return 0; - return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); From cfbfd86bfde15020bccde377e11586ee5c8b701d Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Mon, 6 Jul 2015 11:57:37 -0500 Subject: [PATCH 049/170] amd-xgbe: Fix DMA API debug warning When running a kernel configured with CONFIG_DMA_API_DEBUG=y a warning is issued: DMA-API: device driver tries to sync DMA memory it has not allocated This warning is the result of mapping the full range of the Rx buffer pages allocated and then performing a dma_sync_single_for_cpu against a calculated DMA address. The proper thing to do is to use the dma_sync_single_range_for_cpu with a base DMA address and an offset. Reported-by: Kim Phillips Signed-off-by: Tom Lendacky Tested-by: Kim Phillips Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 3 ++- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 11 +++++++---- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 17 +++++++++++------ drivers/net/ethernet/amd/xgbe/xgbe.h | 3 ++- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 661cdaa7ea96..b3bc87fe3764 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -303,7 +303,8 @@ static void xgbe_set_buffer_data(struct xgbe_buffer_data *bd, get_page(pa->pages); bd->pa = *pa; - bd->dma = pa->pages_dma + pa->pages_offset; + bd->dma_base = pa->pages_dma; + bd->dma_off = pa->pages_offset; bd->dma_len = len; pa->pages_offset += len; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 506e832c9e9a..a4473d8ff4fa 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1110,6 +1110,7 @@ static void xgbe_rx_desc_reset(struct xgbe_prv_data *pdata, unsigned int rx_usecs = pdata->rx_usecs; unsigned int rx_frames = pdata->rx_frames; unsigned int inte; + dma_addr_t hdr_dma, buf_dma; if (!rx_usecs && !rx_frames) { /* No coalescing, interrupt for every descriptor */ @@ -1129,10 +1130,12 @@ static void xgbe_rx_desc_reset(struct xgbe_prv_data *pdata, * Set buffer 2 (hi) address to buffer dma address (hi) and * set control bits OWN and INTE */ - rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->rx.hdr.dma)); - rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->rx.hdr.dma)); - rdesc->desc2 = cpu_to_le32(lower_32_bits(rdata->rx.buf.dma)); - rdesc->desc3 = cpu_to_le32(upper_32_bits(rdata->rx.buf.dma)); + hdr_dma = rdata->rx.hdr.dma_base + rdata->rx.hdr.dma_off; + buf_dma = rdata->rx.buf.dma_base + rdata->rx.buf.dma_off; + rdesc->desc0 = cpu_to_le32(lower_32_bits(hdr_dma)); + rdesc->desc1 = cpu_to_le32(upper_32_bits(hdr_dma)); + rdesc->desc2 = cpu_to_le32(lower_32_bits(buf_dma)); + rdesc->desc3 = cpu_to_le32(upper_32_bits(buf_dma)); XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE, inte); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 1e9c28d19ef8..aae9d5ecd182 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1765,8 +1765,9 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, /* Start with the header buffer which may contain just the header * or the header plus data */ - dma_sync_single_for_cpu(pdata->dev, rdata->rx.hdr.dma, - rdata->rx.hdr.dma_len, DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base, + rdata->rx.hdr.dma_off, + rdata->rx.hdr.dma_len, DMA_FROM_DEVICE); packet = page_address(rdata->rx.hdr.pa.pages) + rdata->rx.hdr.pa.pages_offset; @@ -1778,8 +1779,11 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, len -= copy_len; if (len) { /* Add the remaining data as a frag */ - dma_sync_single_for_cpu(pdata->dev, rdata->rx.buf.dma, - rdata->rx.buf.dma_len, DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(pdata->dev, + rdata->rx.buf.dma_base, + rdata->rx.buf.dma_off, + rdata->rx.buf.dma_len, + DMA_FROM_DEVICE); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rdata->rx.buf.pa.pages, @@ -1945,8 +1949,9 @@ read_again: if (!skb) error = 1; } else if (rdesc_len) { - dma_sync_single_for_cpu(pdata->dev, - rdata->rx.buf.dma, + dma_sync_single_range_for_cpu(pdata->dev, + rdata->rx.buf.dma_base, + rdata->rx.buf.dma_off, rdata->rx.buf.dma_len, DMA_FROM_DEVICE); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 63d72a140053..717ce21b6077 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -337,7 +337,8 @@ struct xgbe_buffer_data { struct xgbe_page_alloc pa; struct xgbe_page_alloc pa_unmap; - dma_addr_t dma; + dma_addr_t dma_base; + unsigned long dma_off; unsigned int dma_len; }; From 6c3e921b18edca290099adfddde8a50236bf2d80 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 6 Jul 2015 20:34:55 +0200 Subject: [PATCH 050/170] net: fec: Ensure clocks are enabled while using mdio bus When a switch is attached to the mdio bus, the mdio bus can be used while the interface is not open. If the IPG clock is not enabled, MDIO reads/writes will simply time out. Add support for runtime PM to control this clock. Enable/disable this clock using runtime PM, with open()/close() and mdio read()/write() function triggering runtime PM operations. Since PM is optional, the IPG clock is enabled at probe and is no longer modified by fec_enet_clk_enable(), thus if PM is not enabled in the kernel, it is guaranteed the clock is running when MDIO operations are performed. Signed-off-by: Andrew Lunn Acked-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 88 +++++++++++++++++++---- 1 file changed, 75 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1f89c59b4353..42e20e5385ac 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,7 @@ static void fec_enet_itr_coal_init(struct net_device *ndev); #define FEC_ENET_RAEM_V 0x8 #define FEC_ENET_RAFL_V 0x8 #define FEC_ENET_OPD_V 0xFFF0 +#define FEC_MDIO_PM_TIMEOUT 100 /* ms */ static struct platform_device_id fec_devtype[] = { { @@ -1767,7 +1769,13 @@ static void fec_enet_adjust_link(struct net_device *ndev) static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; unsigned long time_left; + int ret = 0; + + ret = pm_runtime_get_sync(dev); + if (IS_ERR_VALUE(ret)) + return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1783,18 +1791,30 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO read timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto out; } - /* return value */ - return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); + ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); + +out: + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; } static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value) { struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; unsigned long time_left; + int ret = 0; + + ret = pm_runtime_get_sync(dev); + if (IS_ERR_VALUE(ret)) + return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1811,10 +1831,13 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO write timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; } - return 0; + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; } static int fec_enet_clk_enable(struct net_device *ndev, bool enable) @@ -1826,9 +1849,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) ret = clk_prepare_enable(fep->clk_ahb); if (ret) return ret; - ret = clk_prepare_enable(fep->clk_ipg); - if (ret) - goto failed_clk_ipg; if (fep->clk_enet_out) { ret = clk_prepare_enable(fep->clk_enet_out); if (ret) @@ -1852,7 +1872,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) } } else { clk_disable_unprepare(fep->clk_ahb); - clk_disable_unprepare(fep->clk_ipg); if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { @@ -1874,8 +1893,6 @@ failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); failed_clk_enet_out: - clk_disable_unprepare(fep->clk_ipg); -failed_clk_ipg: clk_disable_unprepare(fep->clk_ahb); return ret; @@ -2847,10 +2864,14 @@ fec_enet_open(struct net_device *ndev) struct fec_enet_private *fep = netdev_priv(ndev); int ret; + ret = pm_runtime_get_sync(&fep->pdev->dev); + if (IS_ERR_VALUE(ret)) + return ret; + pinctrl_pm_select_default_state(&fep->pdev->dev); ret = fec_enet_clk_enable(ndev, true); if (ret) - return ret; + goto clk_enable; /* I should reset the ring buffers here, but I don't yet know * a simple way to do that. @@ -2881,6 +2902,9 @@ err_enet_mii_probe: fec_enet_free_buffers(ndev); err_enet_alloc: fec_enet_clk_enable(ndev, false); +clk_enable: + pm_runtime_mark_last_busy(&fep->pdev->dev); + pm_runtime_put_autosuspend(&fep->pdev->dev); pinctrl_pm_select_sleep_state(&fep->pdev->dev); return ret; } @@ -2903,6 +2927,9 @@ fec_enet_close(struct net_device *ndev) fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&fep->pdev->dev); + pm_runtime_mark_last_busy(&fep->pdev->dev); + pm_runtime_put_autosuspend(&fep->pdev->dev); + fec_enet_free_buffers(ndev); return 0; @@ -3388,6 +3415,10 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_clk; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; + fep->reg_phy = devm_regulator_get(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { ret = regulator_enable(fep->reg_phy); @@ -3434,6 +3465,8 @@ fec_probe(struct platform_device *pdev) netif_carrier_off(ndev); fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); ret = register_netdev(ndev); if (ret) @@ -3447,6 +3480,12 @@ fec_probe(struct platform_device *pdev) fep->rx_copybreak = COPYBREAK_DEFAULT; INIT_WORK(&fep->tx_timeout_work, fec_enet_timeout_work); + + pm_runtime_set_autosuspend_delay(&pdev->dev, FEC_MDIO_PM_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + return 0; failed_register: @@ -3457,6 +3496,8 @@ failed_init: if (fep->reg_phy) regulator_disable(fep->reg_phy); failed_regulator: + clk_disable_unprepare(fep->clk_ipg); +failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: failed_phy: @@ -3568,7 +3609,28 @@ failed_clk: return ret; } -static SIMPLE_DEV_PM_OPS(fec_pm_ops, fec_suspend, fec_resume); +static int __maybe_unused fec_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + clk_disable_unprepare(fep->clk_ipg); + + return 0; +} + +static int __maybe_unused fec_runtime_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + return clk_prepare_enable(fep->clk_ipg); +} + +static const struct dev_pm_ops fec_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(fec_suspend, fec_resume) + SET_RUNTIME_PM_OPS(fec_runtime_suspend, fec_runtime_resume, NULL) +}; static struct platform_driver fec_driver = { .driver = { From 4f7d2cdfdde71ffe962399b7020c674050329423 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Jul 2015 00:07:52 +0200 Subject: [PATCH 051/170] rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver Jason Gunthorpe reported that since commit c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric"), we don't verify IFLA_VF_INFO attributes anymore with respect to their policy, that is, ifla_vfinfo_policy[]. Before, they were part of ifla_policy[], but they have been nested since placed under IFLA_VFINFO_LIST, that contains the attribute IFLA_VF_INFO, which is another nested attribute for the actual VF attributes such as IFLA_VF_MAC, IFLA_VF_VLAN, etc. Despite the policy being split out from ifla_policy[] in this commit, it's never applied anywhere. nla_for_each_nested() only does basic nla_ok() testing for struct nlattr, but it doesn't know about the data context and their requirements. Fix, on top of Jason's initial work, does 1) parsing of the attributes with the right policy, and 2) using the resulting parsed attribute table from 1) instead of the nla_for_each_nested() loop (just like we used to do when still part of ifla_policy[]). Reference: http://thread.gmane.org/gmane.linux.network/368913 Fixes: c02db8c6290b ("rtnetlink: make SR-IOV VF interface symmetric") Reported-by: Jason Gunthorpe Cc: Chris Wright Cc: Sucheta Chakraborty Cc: Greg Rose Cc: Jeff Kirsher Cc: Rony Efraim Cc: Vlad Zolotarov Cc: Nicolas Dichtel Cc: Thomas Graf Signed-off-by: Jason Gunthorpe Signed-off-by: Daniel Borkmann Acked-by: Vlad Zolotarov Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 187 ++++++++++++++++++++++--------------------- 1 file changed, 96 insertions(+), 91 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 01ced4a889e0..9e433d58d265 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1328,10 +1328,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, }; -static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { - [IFLA_VF_INFO] = { .type = NLA_NESTED }, -}; - static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, @@ -1488,96 +1484,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } -static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { - int rem, err = -EINVAL; - struct nlattr *vf; const struct net_device_ops *ops = dev->netdev_ops; + int err = -EINVAL; - nla_for_each_nested(vf, attr, rem) { - switch (nla_type(vf)) { - case IFLA_VF_MAC: { - struct ifla_vf_mac *ivm; - ivm = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, - ivm->mac); - break; - } - case IFLA_VF_VLAN: { - struct ifla_vf_vlan *ivv; - ivv = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - break; - } - case IFLA_VF_TX_RATE: { - struct ifla_vf_tx_rate *ivt; - struct ifla_vf_info ivf; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_get_vf_config) - err = ops->ndo_get_vf_config(dev, ivt->vf, - &ivf); - if (err) - break; - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivf.min_tx_rate, - ivt->rate); - break; - } - case IFLA_VF_RATE: { - struct ifla_vf_rate *ivt; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rate) - err = ops->ndo_set_vf_rate(dev, ivt->vf, - ivt->min_tx_rate, - ivt->max_tx_rate); - break; - } - case IFLA_VF_SPOOFCHK: { - struct ifla_vf_spoofchk *ivs; - ivs = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_spoofchk) - err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, - ivs->setting); - break; - } - case IFLA_VF_LINK_STATE: { - struct ifla_vf_link_state *ivl; - ivl = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_link_state) - err = ops->ndo_set_vf_link_state(dev, ivl->vf, - ivl->link_state); - break; - } - case IFLA_VF_RSS_QUERY_EN: { - struct ifla_vf_rss_query_en *ivrssq_en; + if (tb[IFLA_VF_MAC]) { + struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); - ivrssq_en = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_rss_query_en) - err = ops->ndo_set_vf_rss_query_en(dev, - ivrssq_en->vf, - ivrssq_en->setting); - break; - } - default: - err = -EINVAL; - break; - } - if (err) - break; + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + if (err < 0) + return err; } + + if (tb[IFLA_VF_VLAN]) { + struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, + ivv->qos); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_TX_RATE]) { + struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); + struct ifla_vf_info ivf; + + err = -EOPNOTSUPP; + if (ops->ndo_get_vf_config) + err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); + if (err < 0) + return err; + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivf.min_tx_rate, + ivt->rate); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RATE]) { + struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rate) + err = ops->ndo_set_vf_rate(dev, ivt->vf, + ivt->min_tx_rate, + ivt->max_tx_rate); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_SPOOFCHK]) { + struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_LINK_STATE]) { + struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_RSS_QUERY_EN]) { + struct ifla_vf_rss_query_en *ivrssq_en; + + err = -EOPNOTSUPP; + ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ops->ndo_set_vf_rss_query_en) + err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, + ivrssq_en->setting); + if (err < 0) + return err; + } + return err; } @@ -1773,14 +1771,21 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *vfinfo[IFLA_VF_MAX + 1]; struct nlattr *attr; int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) { + if (nla_type(attr) != IFLA_VF_INFO || + nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } - err = do_setvfinfo(dev, attr); + err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, + ifla_vf_policy); + if (err < 0) + goto errout; + err = do_setvfinfo(dev, vfinfo); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; From fc24f2b2094366da8786f59f2606307e934cea17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Tue, 7 Jul 2015 08:34:13 +0300 Subject: [PATCH 052/170] ip_tunnel: fix ipv4 pmtu check to honor inner ip header df MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frag needed should be sent only if the inner header asked to not fragment. Currently fragmentation is broken if the tunnel has df set, but df was not asked in the original packet. The tunnel's df needs to be still checked to update internally the pmtu cache. Commit 23a3647bc4f93bac broke it, and this commit fixes the ipv4 df check back to the way it was. Fixes: 23a3647bc4f93bac ("ip_tunnels: Use skb-len to PMTU check.") Cc: Pravin B Shelar Signed-off-by: Timo Teräs Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 4c2c3ba4ba65..626d9e56a6bd 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -586,7 +586,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, EXPORT_SYMBOL(ip_tunnel_encap); static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, - struct rtable *rt, __be16 df) + struct rtable *rt, __be16 df, + const struct iphdr *inner_iph) { struct ip_tunnel *tunnel = netdev_priv(dev); int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; @@ -603,7 +604,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && - (df & htons(IP_DF)) && mtu < pkt_size) { + (inner_iph->frag_off & htons(IP_DF)) && + mtu < pkt_size) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; @@ -737,7 +739,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { ip_rt_put(rt); goto tx_error; } From b5a983f3141239b5b0b4a4e66efa31f8a26354b8 Mon Sep 17 00:00:00 2001 From: Mazhar Rana Date: Tue, 7 Jul 2015 15:04:50 +0530 Subject: [PATCH 053/170] bonding: "primary_reselect" with "failure" is not working properly When "primary_reselect" is set to "failure", primary interface should not become active until current active slave is down. But if we set first member of bond device as a "primary" interface and "primary_reselect" is set to "failure" then whenever primary interface's link get back(up) it become active slave even if current active slave is still up. With this patch, "bond_find_best_slave" will not traverse members if primary interface is not candidate for failover/reselection and current active slave is still up. Signed-off-by: Mazhar Rana Signed-off-by: Jay Vosburgh Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 51 ++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 19eb990d398c..317a49480475 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -689,40 +689,57 @@ out: } -static bool bond_should_change_active(struct bonding *bond) +static struct slave *bond_choose_primary_or_current(struct bonding *bond) { struct slave *prim = rtnl_dereference(bond->primary_slave); struct slave *curr = rtnl_dereference(bond->curr_active_slave); - if (!prim || !curr || curr->link != BOND_LINK_UP) - return true; + if (!prim || prim->link != BOND_LINK_UP) { + if (!curr || curr->link != BOND_LINK_UP) + return NULL; + return curr; + } + if (bond->force_primary) { bond->force_primary = false; - return true; + return prim; + } + + if (!curr || curr->link != BOND_LINK_UP) + return prim; + + /* At this point, prim and curr are both up */ + switch (bond->params.primary_reselect) { + case BOND_PRI_RESELECT_ALWAYS: + return prim; + case BOND_PRI_RESELECT_BETTER: + if (prim->speed < curr->speed) + return curr; + if (prim->speed == curr->speed && prim->duplex <= curr->duplex) + return curr; + return prim; + case BOND_PRI_RESELECT_FAILURE: + return curr; + default: + netdev_err(bond->dev, "impossible primary_reselect %d\n", + bond->params.primary_reselect); + return curr; } - if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER && - (prim->speed < curr->speed || - (prim->speed == curr->speed && prim->duplex <= curr->duplex))) - return false; - if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE) - return false; - return true; } /** - * find_best_interface - select the best available slave to be the active one + * bond_find_best_slave - select the best available slave to be the active one * @bond: our bonding struct */ static struct slave *bond_find_best_slave(struct bonding *bond) { - struct slave *slave, *bestslave = NULL, *primary; + struct slave *slave, *bestslave = NULL; struct list_head *iter; int mintime = bond->params.updelay; - primary = rtnl_dereference(bond->primary_slave); - if (primary && primary->link == BOND_LINK_UP && - bond_should_change_active(bond)) - return primary; + slave = bond_choose_primary_or_current(bond); + if (slave) + return slave; bond_for_each_slave(bond, slave, iter) { if (slave->link == BOND_LINK_UP) From 9e9f665a18008999e749bd41394efcbf5c37a726 Mon Sep 17 00:00:00 2001 From: Daniel Pieczko Date: Tue, 7 Jul 2015 11:37:00 +0100 Subject: [PATCH 054/170] sfc: refactor code in efx_ef10_set_mac_address() Re-organize the structure of error handling to avoid having to duplicate the netif_err() around the ifdefs. The only change to the behaviour of the error-handling is that the PF's data structure to record VF details should only be updated if the original command succeeded. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 45 +++++++++++++++------------------ 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 847643455468..9740cd02f81d 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3829,38 +3829,27 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) efx_net_open(efx->net_dev); netif_device_attach(efx->net_dev); -#if !defined(CONFIG_SFC_SRIOV) - if (rc == -EPERM) - netif_err(efx, drv, efx->net_dev, - "Cannot change MAC address; use sfboot to enable mac-spoofing" - " on this interface\n"); -#else - if (rc == -EPERM) { +#ifdef CONFIG_SFC_SRIOV + if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) { struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; - /* Switch to PF and change MAC address on vport */ - if (efx->pci_dev->is_virtfn && pci_dev_pf) { - struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); + if (rc == -EPERM) { + struct efx_nic *efx_pf; - if (!efx_ef10_sriov_set_vf_mac(efx_pf, + /* Switch to PF and change MAC address on vport */ + efx_pf = pci_get_drvdata(pci_dev_pf); + + rc = efx_ef10_sriov_set_vf_mac(efx_pf, nic_data->vf_index, - efx->net_dev->dev_addr)) - return 0; - } - netif_err(efx, drv, efx->net_dev, - "Cannot change MAC address; use sfboot to enable mac-spoofing" - " on this interface\n"); - } else if (efx->pci_dev->is_virtfn) { - /* Successfully changed by VF (with MAC spoofing), so update the - * parent PF if possible. - */ - struct pci_dev *pci_dev_pf = efx->pci_dev->physfn; - - if (pci_dev_pf) { + efx->net_dev->dev_addr); + } else if (!rc) { struct efx_nic *efx_pf = pci_get_drvdata(pci_dev_pf); struct efx_ef10_nic_data *nic_data = efx_pf->nic_data; unsigned int i; + /* MAC address successfully changed by VF (with MAC + * spoofing) so update the parent PF if possible. + */ for (i = 0; i < efx_pf->vf_count; ++i) { struct ef10_vf *vf = nic_data->vf + i; @@ -3871,8 +3860,14 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) } } } - } + } else #endif + if (rc == -EPERM) { + netif_err(efx, drv, efx->net_dev, + "Cannot change MAC address; use sfboot to enable" + " mac-spoofing on this interface\n"); + } + return rc; } From 7a186f4703de6f4b7feb5d09735b096145b8918c Mon Sep 17 00:00:00 2001 From: Daniel Pieczko Date: Tue, 7 Jul 2015 11:37:19 +0100 Subject: [PATCH 055/170] sfc: add legacy method for changing a PF's MAC address Some versions of MCFW do not support the MC_CMD_VADAPTOR_SET_MAC command, and ENOSYS will be returned. If the PF created its own vport, the function's datapath must be stopped and the vport can be reconfigured to reflect the new MAC address. If the MCFW created the vport for the PF (which is the case when the nic_data->vport_mac is blank), nothing further needs to be done as the vport is not under the control of the PF. This only applies to PFs because the MCFW in question does not support VFs. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 120 ++++++++++++++++++++++++++ drivers/net/ethernet/sfc/ef10_sriov.c | 42 --------- drivers/net/ethernet/sfc/ef10_sriov.h | 6 ++ 3 files changed, 126 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 9740cd02f81d..e0cb3617f549 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -101,6 +101,11 @@ static unsigned int efx_ef10_mem_map_size(struct efx_nic *efx) return resource_size(&efx->pci_dev->resource[bar]); } +static bool efx_ef10_is_vf(struct efx_nic *efx) +{ + return efx->type->is_vf; +} + static int efx_ef10_get_pf_index(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); @@ -677,6 +682,48 @@ static int efx_ef10_probe_pf(struct efx_nic *efx) return efx_ef10_probe(efx); } +int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_ALLOC_IN_LEN); + + MCDI_SET_DWORD(inbuf, VADAPTOR_ALLOC_IN_UPSTREAM_PORT_ID, port_id); + return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_ALLOC, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_FREE_IN_LEN); + + MCDI_SET_DWORD(inbuf, VADAPTOR_FREE_IN_UPSTREAM_PORT_ID, port_id); + return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +int efx_ef10_vport_add_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_ADD_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_ADD_MAC_ADDRESS_IN_MACADDR), mac); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_ADD_MAC_ADDRESS, inbuf, + sizeof(inbuf), NULL, 0, NULL); +} + +int efx_ef10_vport_del_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_DEL_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_DEL_MAC_ADDRESS_IN_MACADDR), mac); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_DEL_MAC_ADDRESS, inbuf, + sizeof(inbuf), NULL, 0, NULL); +} + #ifdef CONFIG_SFC_SRIOV static int efx_ef10_probe_vf(struct efx_nic *efx) { @@ -3804,6 +3851,72 @@ static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx) WARN_ON(remove_failed); } +static int efx_ef10_vport_set_mac_address(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u8 mac_old[ETH_ALEN]; + int rc, rc2; + + /* Only reconfigure a PF-created vport */ + if (is_zero_ether_addr(nic_data->vport_mac)) + return 0; + + efx_device_detach_sync(efx); + efx_net_stop(efx->net_dev); + down_write(&efx->filter_sem); + efx_ef10_filter_table_remove(efx); + up_write(&efx->filter_sem); + + rc = efx_ef10_vadaptor_free(efx, nic_data->vport_id); + if (rc) + goto restore_filters; + + ether_addr_copy(mac_old, nic_data->vport_mac); + rc = efx_ef10_vport_del_mac(efx, nic_data->vport_id, + nic_data->vport_mac); + if (rc) + goto restore_vadaptor; + + rc = efx_ef10_vport_add_mac(efx, nic_data->vport_id, + efx->net_dev->dev_addr); + if (!rc) { + ether_addr_copy(nic_data->vport_mac, efx->net_dev->dev_addr); + } else { + rc2 = efx_ef10_vport_add_mac(efx, nic_data->vport_id, mac_old); + if (rc2) { + /* Failed to add original MAC, so clear vport_mac */ + eth_zero_addr(nic_data->vport_mac); + goto reset_nic; + } + } + +restore_vadaptor: + rc2 = efx_ef10_vadaptor_alloc(efx, nic_data->vport_id); + if (rc2) + goto reset_nic; +restore_filters: + down_write(&efx->filter_sem); + rc2 = efx_ef10_filter_table_probe(efx); + up_write(&efx->filter_sem); + if (rc2) + goto reset_nic; + + rc2 = efx_net_open(efx->net_dev); + if (rc2) + goto reset_nic; + + netif_device_attach(efx->net_dev); + + return rc; + +reset_nic: + netif_err(efx, drv, efx->net_dev, + "Failed to restore when changing MAC address - scheduling reset\n"); + efx_schedule_reset(efx, RESET_TYPE_DATAPATH); + + return rc ? rc : rc2; +} + static int efx_ef10_set_mac_address(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_SET_MAC_IN_LEN); @@ -3866,6 +3979,13 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) netif_err(efx, drv, efx->net_dev, "Cannot change MAC address; use sfboot to enable" " mac-spoofing on this interface\n"); + } else if (rc == -ENOSYS && !efx_ef10_is_vf(efx)) { + /* If the active MCFW does not support MC_CMD_VADAPTOR_SET_MAC + * fall-back to the method of changing the MAC address on the + * vport. This only applies to PFs because such versions of + * MCFW do not support VFs. + */ + rc = efx_ef10_vport_set_mac_address(efx); } return rc; diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 6c9b6e45509a..7485f71b4e2f 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -29,30 +29,6 @@ static int efx_ef10_evb_port_assign(struct efx_nic *efx, unsigned int port_id, NULL, 0, NULL); } -static int efx_ef10_vport_add_mac(struct efx_nic *efx, - unsigned int port_id, u8 *mac) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN); - - MCDI_SET_DWORD(inbuf, VPORT_ADD_MAC_ADDRESS_IN_VPORT_ID, port_id); - ether_addr_copy(MCDI_PTR(inbuf, VPORT_ADD_MAC_ADDRESS_IN_MACADDR), mac); - - return efx_mcdi_rpc(efx, MC_CMD_VPORT_ADD_MAC_ADDRESS, inbuf, - sizeof(inbuf), NULL, 0, NULL); -} - -static int efx_ef10_vport_del_mac(struct efx_nic *efx, - unsigned int port_id, u8 *mac) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN); - - MCDI_SET_DWORD(inbuf, VPORT_DEL_MAC_ADDRESS_IN_VPORT_ID, port_id); - ether_addr_copy(MCDI_PTR(inbuf, VPORT_DEL_MAC_ADDRESS_IN_MACADDR), mac); - - return efx_mcdi_rpc(efx, MC_CMD_VPORT_DEL_MAC_ADDRESS, inbuf, - sizeof(inbuf), NULL, 0, NULL); -} - static int efx_ef10_vswitch_alloc(struct efx_nic *efx, unsigned int port_id, unsigned int vswitch_type) { @@ -136,24 +112,6 @@ static int efx_ef10_vport_free(struct efx_nic *efx, unsigned int port_id) NULL, 0, NULL); } -static int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_ALLOC_IN_LEN); - - MCDI_SET_DWORD(inbuf, VADAPTOR_ALLOC_IN_UPSTREAM_PORT_ID, port_id); - return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_ALLOC, inbuf, sizeof(inbuf), - NULL, 0, NULL); -} - -static int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_FREE_IN_LEN); - - MCDI_SET_DWORD(inbuf, VADAPTOR_FREE_IN_UPSTREAM_PORT_ID, port_id); - return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_FREE, inbuf, sizeof(inbuf), - NULL, 0, NULL); -} - static void efx_ef10_sriov_free_vf_vports(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; diff --git a/drivers/net/ethernet/sfc/ef10_sriov.h b/drivers/net/ethernet/sfc/ef10_sriov.h index db4ef537c610..6d25b92cb45e 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.h +++ b/drivers/net/ethernet/sfc/ef10_sriov.h @@ -65,5 +65,11 @@ int efx_ef10_vswitching_restore_pf(struct efx_nic *efx); int efx_ef10_vswitching_restore_vf(struct efx_nic *efx); void efx_ef10_vswitching_remove_pf(struct efx_nic *efx); void efx_ef10_vswitching_remove_vf(struct efx_nic *efx); +int efx_ef10_vport_add_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac); +int efx_ef10_vport_del_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac); +int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id); +int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id); #endif /* EF10_SRIOV_H */ From 535a61777f44eebcb71f2a6866f95b17ee0f13c7 Mon Sep 17 00:00:00 2001 From: Daniel Pieczko Date: Tue, 7 Jul 2015 11:37:33 +0100 Subject: [PATCH 056/170] sfc: suppress handled MCDI failures when changing the MAC address Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index e0cb3617f549..605cc8948594 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3933,8 +3933,8 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) efx->net_dev->dev_addr); MCDI_SET_DWORD(inbuf, VADAPTOR_SET_MAC_IN_UPSTREAM_PORT_ID, nic_data->vport_id); - rc = efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf, - sizeof(inbuf), NULL, 0, NULL); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf, + sizeof(inbuf), NULL, 0, NULL); efx_ef10_filter_table_probe(efx); up_write(&efx->filter_sem); @@ -3986,6 +3986,9 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) * MCFW do not support VFs. */ rc = efx_ef10_vport_set_mac_address(efx); + } else { + efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC, + sizeof(inbuf), NULL, 0, rc); } return rc; From fdd75ea8df370f206a8163786e7470c1277a5064 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 7 Jul 2015 09:43:45 -0400 Subject: [PATCH 057/170] net/tipc: initialize security state for new connection socket Calling connect() with an AF_TIPC socket would trigger a series of error messages from SELinux along the lines of: SELinux: Invalid class 0 type=AVC msg=audit(1434126658.487:34500): avc: denied { } for pid=292 comm="kworker/u16:5" scontext=system_u:system_r:kernel_t:s0 tcontext=system_u:object_r:unlabeled_t:s0 tclass= permissive=0 This was due to a failure to initialize the security state of the new connection sock by the tipc code, leaving it with junk in the security class field and an unlabeled secid. Add a call to security_sk_clone() to inherit the security state from the parent socket. Reported-by: Tim Shearer Signed-off-by: Stephen Smalley Acked-by: Paul Moore Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 46b6ed534ef2..3a7567f690f3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2007,6 +2007,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk); From 1973db0df7c3bd69de2a1041d3364567287771d9 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Tue, 7 Jul 2015 18:30:39 +0530 Subject: [PATCH 058/170] drivers: net: cpsw: fix crash while accessing second slave ethernet interface When cpsw's number of slave is set to 1 in device tree and while accessing second slave ndev and priv in cpsw_tx_interrupt(), there is a kernel crash. This is due to cpsw_get_slave_priv() not verifying number of slaves while retriving netdev priv and returns a invalid memory region. Fixing the issue by introducing number of slave check in cpsw_get_slave_priv() and cpsw_get_slave_ndev(). [ 15.879589] Unable to handle kernel paging request at virtual address 0f0e142c [ 15.888540] pgd = ed374000 [ 15.891359] [0f0e142c] *pgd=00000000 [ 15.895105] Internal error: Oops: 5 [#1] SMP ARM [ 15.899936] Modules linked in: [ 15.903139] CPU: 0 PID: 593 Comm: udhcpc Tainted: G W 4.1.0-12205-gfda8b18-dirty #10 [ 15.912386] Hardware name: Generic AM43 (Flattened Device Tree) [ 15.918557] task: ed2a2e00 ti: ed3fe000 task.ti: ed3fe000 [ 15.924187] PC is at cpsw_tx_interrupt+0x30/0x44 [ 15.929008] LR is at _raw_spin_unlock_irqrestore+0x40/0x44 [ 15.934726] pc : [] lr : [] psr: 20000193 [ 15.934726] sp : ed3ffc08 ip : ed2a2e40 fp : 00000000 [ 15.946685] r10: c0969ce8 r9 : c0969cfc r8 : 00000000 [ 15.952129] r7 : 000000c6 r6 : ee54ab00 r5 : ee169c64 r4 : ee534e00 [ 15.958932] r3 : 0f0e0d0c r2 : 00000000 r1 : ed3ffbc0 r0 : 00000001 [ 15.965735] Flags: nzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user [ 15.973261] Control: 10c5387d Table: ad374059 DAC: 00000015 [ 15.979246] Process udhcpc (pid: 593, stack limit = 0xed3fe218) [ 15.985414] Stack: (0xed3ffc08 to 0xed400000) [ 15.989954] fc00: ee54ab00 c009928c c0a9e648 60000193 000032e4 ee169c00 [ 15.998478] fc20: ee169c64 ee169c00 ee169c64 ee54ab00 00000001 00000001 ee67e268 ee008800 [ 16.006995] fc40: ee534800 c009946c ee169c00 ee169c64 c08bd660 c009c370 c009c2a4 000000c6 [ 16.015513] fc60: c08b75c4 c08b0854 00000000 c0098b3c 000000c6 c0098c50 ed3ffcb0 0000003a [ 16.024033] fc80: ed3ffcb0 fa24010c c08b7800 fa240100 ee7e9880 c00094c4 c05ef4e8 60000013 [ 16.032556] fca0: ffffffff ed3ffce4 ee7e9880 c05ef964 00000001 ed2a33d8 00000000 ed2a2e00 [ 16.041080] fcc0: 60000013 ee536bf8 60000013 ee51b800 ee7e9880 ee67e268 ee7e9880 ee534800 [ 16.049603] fce0: c0ad0768 ed3ffcf8 c008e910 c05ef4e8 60000013 ffffffff 00000001 00000001 [ 16.058121] fd00: ee536bf8 c0487a04 00000000 00000000 ee534800 00000000 00000156 c048c990 [ 16.066645] fd20: 00000000 00000000 c0969f40 00000000 00000000 c05000e8 00000001 00000000 [ 16.075167] fd40: 00000000 c051eefc 00000000 ee67e268 00000000 00000000 ee51b800 ed3ffd9c [ 16.083690] fd60: 00000000 ee67e200 ee51b800 ee7e9880 ee67e268 00000000 00000000 ee67e200 [ 16.092211] fd80: ee51b800 ee7e9880 ee67e268 ee534800 ee67e200 c051eedc ee67e268 00000010 [ 16.100727] fda0: 00000000 00000000 ee7e9880 ee534800 00000000 ee67e268 ee51b800 c05006fc [ 16.109247] fdc0: ee67e268 00000001 c0500488 00000156 ee7e9880 00000000 ed3fe000 fffffff4 [ 16.117771] fde0: ed3fff1c ee7e9880 ee534800 00000148 00000000 ed1f8340 00000000 00000000 [ 16.126289] fe00: 00000000 c05a9054 00000000 00000000 00000156 c0ab62a8 00000010 ed3e7000 [ 16.134812] fe20: 00000000 00000008 edcfb700 ed3fff1c c0fb5f94 ed2a2e00 c0fb5f64 000005d8 [ 16.143336] fe40: c0a9b3b8 00000000 ed3e7070 00000000 00000000 00000000 00009f40 00000000 [ 16.151858] fe60: 00000000 00020022 00110008 00000000 00000000 43004400 00000000 ffffffff [ 16.160374] fe80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 16.168898] fea0: edcfb700 bee5f380 00000014 00000000 ed3fe000 00000000 00004400 c04e2b64 [ 16.177415] fec0: 00000002 c04e3b00 ed3ffeec 00000001 0000011a 00000000 00000000 bee5f394 [ 16.185937] fee0: 00000148 ed3fff10 00000014 00000001 00000000 00000000 ed3ffee4 00000000 [ 16.194459] ff00: 00000000 00000000 00000000 c04e3664 00080011 00000002 06000000 ffffffff [ 16.202980] ff20: 0000ffff ffffffff 0000ffff c008dd54 ee5a6f08 ee636e80 c096972d c0089c14 [ 16.211499] ff40: 00000000 60000013 ee5a6f40 60000013 00000000 ee5a6f40 00000002 00000006 [ 16.220023] ff60: 00000000 edcfb700 00000001 ed2a2e00 c000f60c 00000001 0000011a c008ea34 [ 16.228540] ff80: 00000006 00000000 bee5f380 00000014 bee5f380 00000014 bee5f380 00000122 [ 16.237059] ffa0: c000f7c4 c000f5e0 bee5f380 00000014 00000006 bee5f394 00000148 00000000 [ 16.245581] ffc0: bee5f380 00000014 bee5f380 00000122 fffffd6e 00004300 00004800 00004400 [ 16.254104] ffe0: bee5f378 bee5f36c 000307ec b6f39044 40000010 00000006 ed36fa40 00000000 [ 16.262642] [] (cpsw_tx_interrupt) from [] (handle_irq_event_percpu+0x64/0x204) [ 16.272076] [] (handle_irq_event_percpu) from [] (handle_irq_event+0x40/0x64) [ 16.281330] [] (handle_irq_event) from [] (handle_fasteoi_irq+0xcc/0x1a8) [ 16.290220] [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x20/0x30) [ 16.299197] [] (generic_handle_irq) from [] (__handle_domain_irq+0x64/0xdc) [ 16.308273] [] (__handle_domain_irq) from [] (gic_handle_irq+0x20/0x60) [ 16.316987] [] (gic_handle_irq) from [] (__irq_svc+0x44/0x5c) [ 16.324779] Exception stack(0xed3ffcb0 to 0xed3ffcf8) [ 16.330044] fca0: 00000001 ed2a33d8 00000000 ed2a2e00 [ 16.338567] fcc0: 60000013 ee536bf8 60000013 ee51b800 ee7e9880 ee67e268 ee7e9880 ee534800 [ 16.347090] fce0: c0ad0768 ed3ffcf8 c008e910 c05ef4e8 60000013 ffffffff [ 16.353987] [] (__irq_svc) from [] (_raw_spin_unlock_irqrestore+0x34/0x44) [ 16.362973] [] (_raw_spin_unlock_irqrestore) from [] (cpdma_check_free_tx_desc+0x60/0x6c) [ 16.373311] [] (cpdma_check_free_tx_desc) from [] (cpsw_ndo_start_xmit+0xb4/0x1ac) [ 16.383017] [] (cpsw_ndo_start_xmit) from [] (dev_hard_start_xmit+0x2a4/0x4c0) [ 16.392364] [] (dev_hard_start_xmit) from [] (sch_direct_xmit+0xf4/0x210) [ 16.401246] [] (sch_direct_xmit) from [] (__dev_queue_xmit+0x2ac/0x7bc) [ 16.409960] [] (__dev_queue_xmit) from [] (packet_sendmsg+0xc68/0xeb4) [ 16.418585] [] (packet_sendmsg) from [] (sock_sendmsg+0x14/0x24) [ 16.426663] [] (sock_sendmsg) from [] (SyS_sendto+0xb4/0xe0) [ 16.434377] [] (SyS_sendto) from [] (ret_fast_syscall+0x0/0x54) [ 16.442360] Code: e5943118 e593303c e3530000 0a000002 (e5930720) [ 16.448716] ---[ end trace a68159f094d85ba6 ]--- [ 16.453526] Kernel panic - not syncing: Fatal exception in interrupt [ 16.460149] ---[ end Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Mugunthan V N Cc: # v3.8+ Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 462820514fae..e778703880d3 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -509,9 +509,11 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { (func)(slave++, ##arg); \ } while (0) #define cpsw_get_slave_ndev(priv, __slave_no__) \ - (priv->slaves[__slave_no__].ndev) + ((__slave_no__ < priv->data.slaves) ? \ + priv->slaves[__slave_no__].ndev : NULL) #define cpsw_get_slave_priv(priv, __slave_no__) \ - ((priv->slaves[__slave_no__].ndev) ? \ + (((__slave_no__ < priv->data.slaves) && \ + (priv->slaves[__slave_no__].ndev)) ? \ netdev_priv(priv->slaves[__slave_no__].ndev) : NULL) \ #define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb) \ From f1158b74e54f2e2462ba5e2f45a118246d9d5b43 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 7 Jul 2015 15:55:56 +0200 Subject: [PATCH 059/170] bridge: mdb: zero out the local br_ip variable before use Since commit b0e9a30dd669 ("bridge: Add vlan id to multicast groups") there's a check in br_ip_equal() for a matching vlan id, but the mdb functions were not modified to use (or at least zero it) so when an entry was added it would have a garbage vlan id (from the local br_ip variable in __br_mdb_add/del) and this would prevent it from being matched and also deleted. So zero out the whole local ip var to protect ourselves from future changes and also to fix the current bug, since there's no vlan id support in the mdb uapi - use always vlan id 0. Example before patch: root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent RTNETLINK answers: Invalid argument After patch: root@debian:~# bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb del dev br0 port eth1 grp 239.0.0.1 permanent root@debian:~# bridge mdb Signed-off-by: Nikolay Aleksandrov Fixes: b0e9a30dd669 ("bridge: Add vlan id to multicast groups") Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 3bfc675cf0d1..60868c212679 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -374,6 +374,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; @@ -420,6 +421,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) { if (timer_pending(&br->ip4_other_query.timer)) From 671b53eec2edcbfac3e53d02cf3d0c6d9ecc07de Mon Sep 17 00:00:00 2001 From: Shradha Shah Date: Wed, 8 Jul 2015 10:12:45 +0100 Subject: [PATCH 060/170] sfc: Ensure down_write(&filter_sem) and up_write() are matched before calling efx_net_open() This patch avoids the double up_write to filter_sem if efx_net_open() fails. Resolves: 2d432f20d27c1813a2746008e16dd6ce12a14dc1 Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10_sriov.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 7485f71b4e2f..3c17f274e802 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -598,21 +598,21 @@ int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan, MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL, vf->vlan, &vf->vport_id); if (rc) - goto reset_nic; + goto reset_nic_up_write; restore_mac: if (!is_zero_ether_addr(vf->mac)) { rc2 = efx_ef10_vport_add_mac(efx, vf->vport_id, vf->mac); if (rc2) { eth_zero_addr(vf->mac); - goto reset_nic; + goto reset_nic_up_write; } } restore_evb_port: rc2 = efx_ef10_evb_port_assign(efx, vf->vport_id, vf_i); if (rc2) - goto reset_nic; + goto reset_nic_up_write; else vf->vport_assigned = 1; @@ -620,14 +620,16 @@ restore_vadaptor: if (vf->efx) { rc2 = efx_ef10_vadaptor_alloc(vf->efx, EVB_PORT_ID_ASSIGNED); if (rc2) - goto reset_nic; + goto reset_nic_up_write; } restore_filters: if (vf->efx) { rc2 = vf->efx->type->filter_table_probe(vf->efx); if (rc2) - goto reset_nic; + goto reset_nic_up_write; + + up_write(&vf->efx->filter_sem); up_write(&vf->efx->filter_sem); @@ -639,9 +641,12 @@ restore_filters: } return rc; +reset_nic_up_write: + if (vf->efx) + up_write(&vf->efx->filter_sem); + reset_nic: if (vf->efx) { - up_write(&vf->efx->filter_sem); netif_err(efx, drv, efx->net_dev, "Failed to restore VF - scheduling reset.\n"); efx_schedule_reset(vf->efx, RESET_TYPE_DATAPATH); From 974d7af5fcc295dcf8315255142b2fe44fd74b0c Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 7 Jul 2015 13:56:57 -0400 Subject: [PATCH 061/170] ipv4: add support for linkdown sysctl to netconf This kernel patch exports the value of the new ignore_routes_with_linkdown via netconf. v2: changes to notify userspace via netlink when sysctl values change and proposed for 'net' since this could be considered a bugfix Signed-off-by: Andy Gospodarek Suggested-by: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/netconf.h | 1 + net/ipv4/devinet.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index 669a1f0b1d97..23cbd34e4ac7 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -15,6 +15,7 @@ enum { NETCONFA_RP_FILTER, NETCONFA_MC_FORWARDING, NETCONFA_PROXY_NEIGH, + NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7498716e8f54..e813196c91c7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1740,6 +1740,8 @@ static int inet_netconf_msgsize_devconf(int type) size += nla_total_size(4); if (type == -1 || type == NETCONFA_PROXY_NEIGH) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) + size += nla_total_size(4); return size; } @@ -1780,6 +1782,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_PROXY_NEIGH, IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && + nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -1819,6 +1825,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_FORWARDING] = { .len = sizeof(int) }, [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, + [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, @@ -2048,6 +2055,12 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, ifindex, cnf); } + if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && + new_value != old_value) { + ifindex = devinet_conf_ifindex(net, cnf); + inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + ifindex, cnf); + } } return ret; From 0769636cb5b95665ebadcd1a41c46f331f5a397d Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 7 Jul 2015 14:02:18 -0400 Subject: [PATCH 062/170] vmxnet3: prevent receive getting out of sequence on napi poll vmxnet3's current napi path is built to count every rx descriptor we recieve, and use that as a count of the napi budget. That means its possible to return from a napi poll halfway through recieving a fragmented packet accross multiple dma descriptors. If that happens, the next napi poll will start with the descriptor ring in an improper state (e.g. the first descriptor we look at may have the end-of-packet bit set), which will cause a BUG halt in the driver. Fix the issue by only counting whole received packets in the napi poll and returning that value, rather than the descriptor count. Tested by the reporter and myself, successfully Signed-off-by: Neil Horman CC: Shreyas Bhatewara CC: "David S. Miller" Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index da11bb5e9c7f..46f4caddccbe 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1216,7 +1216,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, static const u32 rxprod_reg[2] = { VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2 }; - u32 num_rxd = 0; + u32 num_pkts = 0; bool skip_page_frags = false; struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; @@ -1235,13 +1235,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxDesc *rxd; u32 idx, ring_idx; struct vmxnet3_cmd_ring *ring = NULL; - if (num_rxd >= quota) { + if (num_pkts >= quota) { /* we may stop even before we see the EOP desc of * the current pkt */ break; } - num_rxd++; BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2); idx = rcd->rxdIdx; ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1; @@ -1413,6 +1412,7 @@ not_lro: napi_gro_receive(&rq->napi, skb); ctx->skb = NULL; + num_pkts++; } rcd_done: @@ -1443,7 +1443,7 @@ rcd_done: &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp); } - return num_rxd; + return num_pkts; } From 4eed4d8ff984abcb983ada5b3dbf56fce35f1068 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 7 Jul 2015 20:48:55 +0200 Subject: [PATCH 063/170] 3c59x: Fix shared IRQ handling As its first order of business, boomerang_interrupt() checks whether the device really has any pending interrupts. If it does not, it does nothing and returns, but it still returns IRQ_HANDLED. This is wrong: interrupt was not handled, IRQ handlers of other devices sharing this IRQ line need to be called. vortex_interrupt() has it right: it returns IRQ_NONE in this case via IRQ_RETVAL(0). Do the same in boomerang_interrupt(). Signed-off-by: Denys Vlasenko CC: David S. Miller CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c59x.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 41095ebad97f..2d1ce3c5d0dd 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -2382,6 +2382,7 @@ boomerang_interrupt(int irq, void *dev_id) void __iomem *ioaddr; int status; int work_done = max_interrupt_work; + int handled = 0; ioaddr = vp->ioaddr; @@ -2400,6 +2401,7 @@ boomerang_interrupt(int irq, void *dev_id) if ((status & IntLatch) == 0) goto handler_exit; /* No interrupt: shared IRQs can cause this */ + handled = 1; if (status == 0xffff) { /* h/w no longer present (hotplug)? */ if (vortex_debug > 1) @@ -2501,7 +2503,7 @@ boomerang_interrupt(int irq, void *dev_id) handler_exit: vp->handling_irq = 0; spin_unlock(&vp->lock); - return IRQ_HANDLED; + return IRQ_RETVAL(handled); } static int vortex_rx(struct net_device *dev) From d065c3c17dae95832badf6329512dd057c905890 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Tue, 7 Jul 2015 13:54:12 -0700 Subject: [PATCH 064/170] drivers/net/usb: add device id for NVIDIA Tegra USB 3.0 Ethernet This device is sold as 'NVIDIA Tegra USB 3.0 Ethernet'. Chipset is RTL8153 and works with r8152. Signed-off-by: Zheng Liu Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 8 ++++++++ drivers/net/usb/r8152.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 4545e78840b0..35a2bffe848a 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -523,6 +523,7 @@ static const struct driver_info wwan_info = { #define REALTEK_VENDOR_ID 0x0bda #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef +#define NVIDIA_VENDOR_ID 0x0955 static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -710,6 +711,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index aafa1a1898e4..7f6419ebb5e1 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -494,6 +494,7 @@ enum rtl8152_flags { #define VENDOR_ID_REALTEK 0x0bda #define VENDOR_ID_SAMSUNG 0x04e8 #define VENDOR_ID_LENOVO 0x17ef +#define VENDOR_ID_NVIDIA 0x0955 #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 @@ -4117,6 +4118,7 @@ static struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)}, + {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, {} }; From c936835c1ec6f871f32c9b87a7708700320075b3 Mon Sep 17 00:00:00 2001 From: Peter Dunning Date: Wed, 8 Jul 2015 10:05:10 +0100 Subject: [PATCH 065/170] sfc: Report TX completions to BQL after all TX events in interrupt The limit for BQL is updated each time we call netdev_tx_completed_queue. Without this patch the BQL limit was updated for every TX event we see. The issue was that this only updated the limit to handle the data we complete in two events as the first event wouldn't show that enough traffic had been processed between them. This was OK when interrupt moderation was off but not when it was on as more data had to be completed in a single interrupt. The patch changes this so that we do report the completion to BQL only when all the TX events in the interrupt have been processed. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 14 ++++++++++++++ drivers/net/ethernet/sfc/net_driver.h | 2 ++ drivers/net/ethernet/sfc/tx.c | 3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 804b9ad553d3..03bc03b67f08 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -245,11 +245,17 @@ static int efx_check_disabled(struct efx_nic *efx) */ static int efx_process_channel(struct efx_channel *channel, int budget) { + struct efx_tx_queue *tx_queue; int spent; if (unlikely(!channel->enabled)) return 0; + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->pkts_compl = 0; + tx_queue->bytes_compl = 0; + } + spent = efx_nic_process_eventq(channel, budget); if (spent && efx_channel_has_rx_queue(channel)) { struct efx_rx_queue *rx_queue = @@ -259,6 +265,14 @@ static int efx_process_channel(struct efx_channel *channel, int budget) efx_fast_push_rx_descriptors(rx_queue, true); } + /* Update BQL */ + efx_for_each_channel_tx_queue(tx_queue, channel) { + if (tx_queue->bytes_compl) { + netdev_tx_completed_queue(tx_queue->core_txq, + tx_queue->pkts_compl, tx_queue->bytes_compl); + } + } + return spent; } diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index d72f522bf9c3..47d1e3a96522 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -241,6 +241,8 @@ struct efx_tx_queue { unsigned int read_count ____cacheline_aligned_in_smp; unsigned int old_write_count; unsigned int merge_events; + unsigned int bytes_compl; + unsigned int pkts_compl; /* Members used only on the xmit path */ unsigned int insert_count ____cacheline_aligned_in_smp; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index aaf2987512b5..1833a0146571 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -617,7 +617,8 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); - netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl); + tx_queue->pkts_compl += pkts_compl; + tx_queue->bytes_compl += bytes_compl; if (pkts_compl > 1) ++tx_queue->merge_events; From d5de1987853a7778bb048a9e52b3486eb9aceb17 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jul 2015 17:16:49 +0200 Subject: [PATCH 066/170] macvtap: Destroy minor_idr on module_exit Destroy minor_idr on module_exit, reclaiming the allocated memory. This was detected by the following semantic patch (written by Luis Rodriguez ) @ defines_module_init @ declarer name module_init, module_exit; declarer name DEFINE_IDR; identifier init; @@ module_init(init); @ defines_module_exit @ identifier exit; @@ module_exit(exit); @ declares_idr depends on defines_module_init && defines_module_exit @ identifier idr; @@ DEFINE_IDR(idr); @ on_exit_calls_destroy depends on declares_idr && defines_module_exit @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... idr_destroy(&idr); ... } @ missing_module_idr_destroy depends on declares_idr && defines_module_exit && !on_exit_calls_destroy @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... +idr_destroy(&idr); } Signed-off-by: Johannes Thumshirn Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 6a64197f5bce..b4b61910f616 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -1294,6 +1294,7 @@ static void macvtap_exit(void) class_unregister(macvtap_class); cdev_del(&macvtap_cdev); unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + idr_destroy(&minor_idr); } module_exit(macvtap_exit); From e2fc61f384c9224b55990a644bbcf68c25e20203 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 29 Jun 2015 19:15:03 +0300 Subject: [PATCH 067/170] ARCv2: [axs103] bump CPU frequency from 75 to 90 MHZ With up-to-date FPGA builds ARC cores are supposed to correctly operate even with 90 MHz clock (which is a target frequency for AXS103 release). Signed-off-by: Alexey Brodkin Cc: arc-linux-dev@synopsys.com --- arch/arc/boot/dts/axc003.dtsi | 2 +- arch/arc/boot/dts/axc003_idu.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 15c8d6226c9d..1cd5e82f5dc2 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -12,7 +12,7 @@ / { compatible = "snps,arc"; - clock-frequency = <75000000>; + clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 199d42820eca..2f0b33257db2 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -12,7 +12,7 @@ / { compatible = "snps,arc"; - clock-frequency = <75000000>; + clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; From 80f420842ff42ad61f84584716d74ef635f13892 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 3 Jul 2015 11:26:22 +0530 Subject: [PATCH 068/170] ARC: Make ARC bitops "safer" (add anti-optimization) ARCompact/ARCv2 ISA provide that any instructions which deals with bitpos/count operand ASL, LSL, BSET, BCLR, BMSK .... will only consider lower 5 bits. i.e. auto-clamp the pos to 0-31. ARC Linux bitops exploited this fact by NOT explicitly masking out upper bits for @nr operand in general, saving a bunch of AND/BMSK instructions in generated code around bitops. While this micro-optimization has worked well over years it is NOT safe as shifting a number with a value, greater than native size is "undefined" per "C" spec. So as it turns outm EZChip ran into this eventually, in their massive muti-core SMP build with 64 cpus. There was a test_bit() inside a loop from 63 to 0 and gcc was weirdly optimizing away the first iteration (so it was really adhering to standard by implementing undefined behaviour vs. removing all the iterations which were phony i.e. (1 << [63..32]) | for i = 63 to 0 | X = ( 1 << i ) | if X == 0 | continue So fix the code to do the explicit masking at the expense of generating additional instructions. Fortunately, this can be mitigated to a large extent as gcc has SHIFT_COUNT_TRUNCATED which allows combiner to fold masking into shift operation itself. It is currently not enabled in ARC gcc backend, but could be done after a bit of testing. Fixes STAR 9000866918 ("unsafe "undefined behavior" code in kernel") Reported-by: Noam Camus Cc: Signed-off-by: Vineet Gupta --- arch/arc/include/asm/bitops.h | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 99fe118d3730..57c1f33844d4 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -50,8 +50,7 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ * done for const @nr, but no code is generated due to gcc \ * const prop. \ */ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ + nr &= 0x1f; \ \ __asm__ __volatile__( \ "1: llock %0, [%1] \n" \ @@ -82,8 +81,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long * \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ + nr &= 0x1f; \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -129,16 +127,13 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ unsigned long temp, flags; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ /* \ * spin lock/unlock provide the needed smp_mb() before/after \ */ \ bitops_lock(flags); \ \ temp = *m; \ - *m = temp c_op (1UL << nr); \ + *m = temp c_op (1UL << (nr & 0x1f)); \ \ bitops_unlock(flags); \ } @@ -149,17 +144,14 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long * unsigned long old, flags; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ bitops_lock(flags); \ \ old = *m; \ - *m = old c_op (1 << nr); \ + *m = old c_op (1UL << (nr & 0x1f)); \ \ bitops_unlock(flags); \ \ - return (old & (1 << nr)) != 0; \ + return (old & (1UL << (nr & 0x1f))) != 0; \ } #endif /* CONFIG_ARC_HAS_LLSC */ @@ -174,11 +166,8 @@ static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \ unsigned long temp; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ temp = *m; \ - *m = temp c_op (1UL << nr); \ + *m = temp c_op (1UL << (nr & 0x1f)); \ } #define __TEST_N_BIT_OP(op, c_op, asm_op) \ @@ -187,13 +176,10 @@ static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long unsigned long old; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ old = *m; \ - *m = old c_op (1 << nr); \ + *m = old c_op (1UL << (nr & 0x1f)); \ \ - return (old & (1 << nr)) != 0; \ + return (old & (1UL << (nr & 0x1f))) != 0; \ } #define BIT_OPS(op, c_op, asm_op) \ @@ -224,10 +210,7 @@ test_bit(unsigned int nr, const volatile unsigned long *addr) addr += nr >> 5; - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - mask = 1 << nr; + mask = 1UL << (nr & 0x1f); return ((mask & *addr) != 0); } From 70d93d89416562c32adc9444a15677bdf25a72ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=ABl=20Porquet?= Date: Tue, 7 Jul 2015 17:14:56 -0400 Subject: [PATCH 069/170] arc:irqchip: prepare for drivers/irqchip/irqchip.h removal The IRQCHIP_DECLARE macro migrated to 'include/linux/irqchip.h'. See commit 91e20b5040c67c51aad88cf87db4305c5bd7f79d ("irqchip: Move IRQCHIP_DECLARE macro to include/linux/irqchip.h"). This patch removes the inclusions of private header 'drivers/irqchip/irqchip.h' and if necessary replaces them with inclusions of 'include/linux/irqchip.h'. Signed-off-by: Joel Porquet Signed-off-by: Vineet Gupta --- arch/arc/kernel/intc-arcv2.c | 1 - arch/arc/kernel/intc-compact.c | 1 - arch/arc/kernel/mcip.c | 1 - 3 files changed, 3 deletions(-) diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 6208c630abed..26c156827479 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -12,7 +12,6 @@ #include #include #include -#include "../../drivers/irqchip/irqchip.h" #include /* diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index fcdddb631766..039fac30b5c1 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -12,7 +12,6 @@ #include #include #include -#include "../../drivers/irqchip/irqchip.h" #include /* diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 6fb0a2fd1d28..2fb86589054d 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -175,7 +175,6 @@ void mcip_init_early_smp(void) #include #include #include -#include "../../drivers/irqchip/irqchip.h" /* * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core) From 9138d4138d8e1f0acf734d2fb3455108bf43380f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 25 Jun 2015 13:46:57 +0530 Subject: [PATCH 070/170] ARC: Add llock/scond to futex backend Signed-off-by: Vineet Gupta --- arch/arc/include/asm/futex.h | 48 +++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h index 05b5aaf5b0f9..70cfe16b742d 100644 --- a/arch/arc/include/asm/futex.h +++ b/arch/arc/include/asm/futex.h @@ -16,12 +16,15 @@ #include #include +#ifdef CONFIG_ARC_HAS_LLSC + #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\ \ __asm__ __volatile__( \ - "1: ld %1, [%2] \n" \ + "1: llock %1, [%2] \n" \ insn "\n" \ - "2: st %0, [%2] \n" \ + "2: scond %0, [%2] \n" \ + " bnz 1b \n" \ " mov %0, 0 \n" \ "3: \n" \ " .section .fixup,\"ax\" \n" \ @@ -39,6 +42,33 @@ : "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \ : "cc", "memory") +#else /* !CONFIG_ARC_HAS_LLSC */ + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\ + \ + __asm__ __volatile__( \ + "1: ld %1, [%2] \n" \ + insn "\n" \ + "2: st %0, [%2] \n" \ + " mov %0, 0 \n" \ + "3: \n" \ + " .section .fixup,\"ax\" \n" \ + " .align 4 \n" \ + "4: mov %0, %4 \n" \ + " b 3b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " .align 4 \n" \ + " .word 1b, 4b \n" \ + " .word 2b, 4b \n" \ + " .previous \n" \ + \ + : "=&r" (ret), "=&r" (oldval) \ + : "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \ + : "cc", "memory") + +#endif + static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; @@ -123,11 +153,17 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, pagefault_disable(); - /* TBD : can use llock/scond */ __asm__ __volatile__( - "1: ld %0, [%3] \n" - " brne %0, %1, 3f \n" - "2: st %2, [%3] \n" +#ifdef CONFIG_ARC_HAS_LLSC + "1: llock %0, [%3] \n" + " brne %0, %1, 3f \n" + "2: scond %2, [%3] \n" + " bnz 1b \n" +#else + "1: ld %0, [%3] \n" + " brne %0, %1, 3f \n" + "2: st %2, [%3] \n" +#endif "3: \n" " .section .fixup,\"ax\" \n" "4: mov %0, %4 \n" From b631788ab42ea4ba97dc30b3f47bad0edb0ddb4b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 6 Jul 2015 15:25:21 +0530 Subject: [PATCH 071/170] ARC: slightly refactor macros for boot logging Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index a3d186211ed3..08d98eede6e1 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -150,9 +150,10 @@ static const struct cpuinfo_data arc_cpu_tbl[] = { { {0x00, NULL } } }; -#define IS_AVAIL1(v, str) ((v) ? str : "") -#define IS_USED(cfg) (IS_ENABLED(cfg) ? "" : "(not used) ") -#define IS_AVAIL2(v, str, cfg) IS_AVAIL1(v, str), IS_AVAIL1(v, IS_USED(cfg)) +#define IS_AVAIL1(v, s) ((v) ? s : "") +#define IS_USED_RUN(v) ((v) ? "" : "(not used) ") +#define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) +#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { @@ -226,7 +227,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt); } n += scnprintf(buf + n, len - n, "%s", - IS_USED(CONFIG_ARC_HAS_HW_MPY)); + IS_USED_CFG(CONFIG_ARC_HAS_HW_MPY)); } n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n", From 02941007f59ce015233d4c0f7047776960bf0c17 Mon Sep 17 00:00:00 2001 From: "qipeng.zha" Date: Thu, 9 Jul 2015 00:14:15 +0800 Subject: [PATCH 072/170] intel_pmc_ipc: Update kerneldoc formatting Update kerneldoc formatting per Documentation/kernel-dec-nano-HOWTO.txt. Signed-off-by: qipeng.zha Signed-off-by: Darren Hart --- arch/x86/include/asm/intel_pmc_ipc.h | 27 ------------- drivers/platform/x86/intel_pmc_ipc.c | 57 +++++++++++++++++----------- 2 files changed, 35 insertions(+), 49 deletions(-) diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h index 200ec2e7821d..cd0310e186f4 100644 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -25,36 +25,9 @@ #if IS_ENABLED(CONFIG_INTEL_PMC_IPC) -/* - * intel_pmc_ipc_simple_command - * @cmd: command - * @sub: sub type - */ int intel_pmc_ipc_simple_command(int cmd, int sub); - -/* - * intel_pmc_ipc_raw_cmd - * @cmd: command - * @sub: sub type - * @in: input data - * @inlen: input length in bytes - * @out: output data - * @outlen: output length in dwords - * @sptr: data writing to SPTR register - * @dptr: data writing to DPTR register - */ int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen, u32 dptr, u32 sptr); - -/* - * intel_pmc_ipc_command - * @cmd: command - * @sub: sub type - * @in: input data - * @inlen: input length in bytes - * @out: output data - * @outlen: output length in dwords - */ int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen); diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index 69d43b8fe9ba..105cfffe82c6 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -210,10 +210,15 @@ static int intel_pmc_ipc_check_status(void) return ret; } -/* - * intel_pmc_ipc_simple_command - * @cmd: command - * @sub: sub type +/** + * intel_pmc_ipc_simple_command() - Simple IPC command + * @cmd: IPC command code. + * @sub: IPC command sub type. + * + * Send a simple IPC command to PMC when don't need to specify + * input/output data and source/dest pointers. + * + * Return: an IPC error code or 0 on success. */ int intel_pmc_ipc_simple_command(int cmd, int sub) { @@ -232,16 +237,20 @@ int intel_pmc_ipc_simple_command(int cmd, int sub) } EXPORT_SYMBOL_GPL(intel_pmc_ipc_simple_command); -/* - * intel_pmc_ipc_raw_cmd - * @cmd: command - * @sub: sub type - * @in: input data - * @inlen: input length in bytes - * @out: output data - * @outlen: output length in dwords - * @sptr: data writing to SPTR register - * @dptr: data writing to DPTR register +/** + * intel_pmc_ipc_raw_cmd() - IPC command with data and pointers + * @cmd: IPC command code. + * @sub: IPC command sub type. + * @in: input data of this IPC command. + * @inlen: input data length in bytes. + * @out: output data of this IPC command. + * @outlen: output data length in dwords. + * @sptr: data writing to SPTR register. + * @dptr: data writing to DPTR register. + * + * Send an IPC command to PMC with input/output data and source/dest pointers. + * + * Return: an IPC error code or 0 on success. */ int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen, u32 dptr, u32 sptr) @@ -278,14 +287,18 @@ int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, } EXPORT_SYMBOL_GPL(intel_pmc_ipc_raw_cmd); -/* - * intel_pmc_ipc_command - * @cmd: command - * @sub: sub type - * @in: input data - * @inlen: input length in bytes - * @out: output data - * @outlen: output length in dwords +/** + * intel_pmc_ipc_command() - IPC command with input/output data + * @cmd: IPC command code. + * @sub: IPC command sub type. + * @in: input data of this IPC command. + * @inlen: input data length in bytes. + * @out: output data of this IPC command. + * @outlen: output data length in dwords. + * + * Send an IPC command to PMC with input/output data. + * + * Return: an IPC error code or 0 on success. */ int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen) From 838f13bf4b6737d4aec508558e45f81798fc2677 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:47 -0400 Subject: [PATCH 073/170] blkcg: allow blkcg_pol_mutex to be grabbed from cgroup [file] methods blkcg_pol_mutex primarily protects the blkcg_policy array. It also protects cgroup file type [un]registration during policy addition / removal. This puts blkcg_pol_mutex outside cgroup internal synchronization and in turn makes it impossible to grab from blkcg's cgroup methods as that leads to cyclic dependency. Another problematic dependency arising from this is through cgroup interface file deactivation. Removing a cftype requires removing all files of the type which in turn involves draining all on-going invocations of the file methods. This means that an interface file implementation can't grab blkcg_pol_mutex as draining can lead to AA deadlock. blkcg_reset_stats() is already in this situation. It currently trylocks blkcg_pol_mutex and then unwinds and retries the whole operation on failure, which is cumbersome at best. It has a lengthy comment explaining how cgroup internal synchronization is involved and expected to be updated but as explained above this doesn't need cgroup internal locking to deadlock. It's a self-contained AA deadlock. The described circular dependencies can be easily broken by moving cftype [un]registration out of blkcg_pol_mutex and protect them with an outer mutex. This patch introduces blkcg_pol_register_mutex which wraps entire policy [un]registration including cftype operations and shrinks blkcg_pol_mutex critical section. This also makes the trylock dancing in blkcg_reset_stats() unnecessary. Removed. This patch is necessary for the following blkcg_policy_data allocation bug fixes. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5e2723f2c6a3..2ff74ffcbb27 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -29,6 +29,14 @@ #define MAX_KEY_LEN 100 +/* + * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. + * blkcg_pol_register_mutex nests outside of it and synchronizes entire + * policy [un]register operations including cgroup file additions / + * removals. Putting cgroup file registration outside blkcg_pol_mutex + * allows grabbing it from cgroup callbacks. + */ +static DEFINE_MUTEX(blkcg_pol_register_mutex); static DEFINE_MUTEX(blkcg_pol_mutex); struct blkcg blkcg_root; @@ -453,20 +461,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, struct blkcg_gq *blkg; int i; - /* - * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex - * which ends up putting cgroup's internal cgroup_tree_mutex under - * it; however, cgroup_tree_mutex is nested above cgroup file - * active protection and grabbing blkcg_pol_mutex from a cgroup - * file operation creates a possible circular dependency. cgroup - * internal locking is planned to go through further simplification - * and this issue should go away soon. For now, let's trylock - * blkcg_pol_mutex and restart the write on failure. - * - * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com - */ - if (!mutex_trylock(&blkcg_pol_mutex)) - return restart_syscall(); + mutex_lock(&blkcg_pol_mutex); spin_lock_irq(&blkcg->lock); /* @@ -1190,6 +1185,7 @@ int blkcg_policy_register(struct blkcg_policy *pol) if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) return -EINVAL; + mutex_lock(&blkcg_pol_register_mutex); mutex_lock(&blkcg_pol_mutex); /* find an empty slot */ @@ -1198,19 +1194,23 @@ int blkcg_policy_register(struct blkcg_policy *pol) if (!blkcg_policy[i]) break; if (i >= BLKCG_MAX_POLS) - goto out_unlock; + goto err_unlock; /* register and update blkgs */ pol->plid = i; blkcg_policy[i] = pol; + mutex_unlock(&blkcg_pol_mutex); /* everything is in place, add intf files for the new policy */ if (pol->cftypes) WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys, pol->cftypes)); - ret = 0; -out_unlock: + mutex_unlock(&blkcg_pol_register_mutex); + return 0; + +err_unlock: mutex_unlock(&blkcg_pol_mutex); + mutex_unlock(&blkcg_pol_register_mutex); return ret; } EXPORT_SYMBOL_GPL(blkcg_policy_register); @@ -1223,7 +1223,7 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register); */ void blkcg_policy_unregister(struct blkcg_policy *pol) { - mutex_lock(&blkcg_pol_mutex); + mutex_lock(&blkcg_pol_register_mutex); if (WARN_ON(blkcg_policy[pol->plid] != pol)) goto out_unlock; @@ -1233,8 +1233,10 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) cgroup_rm_cftypes(pol->cftypes); /* unregister and update blkgs */ + mutex_lock(&blkcg_pol_mutex); blkcg_policy[pol->plid] = NULL; -out_unlock: mutex_unlock(&blkcg_pol_mutex); +out_unlock: + mutex_unlock(&blkcg_pol_register_mutex); } EXPORT_SYMBOL_GPL(blkcg_policy_unregister); From 144232b34258c1fc19729e077c6fb161e30da07b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:48 -0400 Subject: [PATCH 074/170] blkcg: blkcg_css_alloc() should grab blkcg_pol_mutex while iterating blkcg_policy[] An entry in blkcg_policy[] is stable while there are non-bypassing in-flight IOs on a request_queue which has the policy activated. This is why most derefs of blkcg_policy[] don't need explicit locking; however, blkcg_css_alloc() isn't invoked from IO path and thus doesn't have this protection and may race policies being added and removed. Fix it by adding explicit blkcg_pol_mutex protection around blkcg_policy[] iteration in blkcg_css_alloc(). Signed-off-by: Tejun Heo Fixes: e48453c386f3 ("block, cgroup: implement policy-specific per-blkcg data") Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 2ff74ffcbb27..05b893de516b 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -844,6 +844,8 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) goto free_blkcg; } + mutex_lock(&blkcg_pol_mutex); + for (i = 0; i < BLKCG_MAX_POLS ; i++) { struct blkcg_policy *pol = blkcg_policy[i]; struct blkcg_policy_data *cpd; @@ -860,6 +862,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) BUG_ON(blkcg->pd[i]); cpd = kzalloc(pol->cpd_size, GFP_KERNEL); if (!cpd) { + mutex_unlock(&blkcg_pol_mutex); ret = ERR_PTR(-ENOMEM); goto free_pd_blkcg; } @@ -868,6 +871,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) pol->cpd_init_fn(blkcg); } + mutex_unlock(&blkcg_pol_mutex); done: spin_lock_init(&blkcg->lock); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); From 7876f930d0e78addc6bbdbba0d6c196a0788d545 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:49 -0400 Subject: [PATCH 075/170] blkcg: implement all_blkcgs list Add all_blkcgs list goes through blkcg->all_blkcgs_node and is protected by blkcg_pol_mutex. This will be used to fix blkcg_policy_data allocation bug. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 17 ++++++++++++----- include/linux/blk-cgroup.h | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 05b893de516b..42ff436ffaf4 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -46,6 +46,8 @@ struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; +static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ + static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -817,6 +819,10 @@ static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); + mutex_lock(&blkcg_pol_mutex); + list_del(&blkcg->all_blkcgs_node); + mutex_unlock(&blkcg_pol_mutex); + if (blkcg != &blkcg_root) { int i; @@ -833,6 +839,8 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) struct cgroup_subsys_state *ret; int i; + mutex_lock(&blkcg_pol_mutex); + if (!parent_css) { blkcg = &blkcg_root; goto done; @@ -844,8 +852,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) goto free_blkcg; } - mutex_lock(&blkcg_pol_mutex); - for (i = 0; i < BLKCG_MAX_POLS ; i++) { struct blkcg_policy *pol = blkcg_policy[i]; struct blkcg_policy_data *cpd; @@ -862,7 +868,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) BUG_ON(blkcg->pd[i]); cpd = kzalloc(pol->cpd_size, GFP_KERNEL); if (!cpd) { - mutex_unlock(&blkcg_pol_mutex); ret = ERR_PTR(-ENOMEM); goto free_pd_blkcg; } @@ -871,7 +876,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) pol->cpd_init_fn(blkcg); } - mutex_unlock(&blkcg_pol_mutex); done: spin_lock_init(&blkcg->lock); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); @@ -879,14 +883,17 @@ done: #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); #endif + list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); + + mutex_unlock(&blkcg_pol_mutex); return &blkcg->css; free_pd_blkcg: for (i--; i >= 0; i--) kfree(blkcg->pd[i]); - free_blkcg: kfree(blkcg); + mutex_unlock(&blkcg_pol_mutex); return ret; } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 58cfab80dd70..cf3e7bc22ef3 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -47,6 +47,7 @@ struct blkcg { struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; + struct list_head all_blkcgs_node; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; #endif From 06b285bd11257bccc5a1b85a835507e33656aff2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:50 -0400 Subject: [PATCH 076/170] blkcg: fix blkcg_policy_data allocation bug e48453c386f3 ("block, cgroup: implement policy-specific per-blkcg data") updated per-blkcg policy data to be dynamically allocated. When a policy is registered, its policy data aren't created. Instead, when the policy is activated on a queue, the policy data are allocated if there are blkg's (blkcg_gq's) which are attached to a given blkcg. This is buggy. Consider the following scenario. 1. A blkcg is created. No blkg's attached yet. 2. The policy is registered. No policy data is allocated. 3. The policy is activated on a queue. As the above blkcg doesn't have any blkg's, it won't allocate the matching blkcg_policy_data. 4. An IO is issued from the blkcg and blkg is created and the blkcg still doesn't have the matching policy data allocated. With cfq-iosched, this leads to an oops. It also doesn't free policy data on policy unregistration assuming that freeing of all policy data on blkcg destruction should take care of it; however, this also is incorrect. 1. A blkcg has policy data. 2. The policy gets unregistered but the policy data remains. 3. Another policy gets registered on the same slot. 4. Later, the new policy tries to allocate policy data on the previous blkcg but the slot is already occupied and gets skipped. The policy ends up operating on the policy data of the previous policy. There's no reason to manage blkcg_policy_data lazily. The reason we do lazy allocation of blkg's is that the number of all possible blkg's is the product of cgroups and block devices which can reach a surprising level. blkcg_policy_data is contrained by the number of cgroups and shouldn't be a problem. This patch makes blkcg_policy_data to be allocated for all existing blkcg's on policy registration and freed on unregistration and removes blkcg_policy_data handling from policy [de]activation paths. This makes that blkcg_policy_data are created and removed with the policy they belong to and fixes the above described problems. Signed-off-by: Tejun Heo Fixes: e48453c386f3 ("block, cgroup: implement policy-specific per-blkcg data") Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 78 ++++++++++++++++++++------------------ include/linux/blk-cgroup.h | 10 +---- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 42ff436ffaf4..9da02c021ebe 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1048,10 +1048,8 @@ int blkcg_activate_policy(struct request_queue *q, const struct blkcg_policy *pol) { LIST_HEAD(pds); - LIST_HEAD(cpds); struct blkcg_gq *blkg; struct blkg_policy_data *pd, *nd; - struct blkcg_policy_data *cpd, *cnd; int cnt = 0, ret; if (blkcg_policy_enabled(q, pol)) @@ -1064,10 +1062,7 @@ int blkcg_activate_policy(struct request_queue *q, cnt++; spin_unlock_irq(q->queue_lock); - /* - * Allocate per-blkg and per-blkcg policy data - * for all existing blkgs. - */ + /* allocate per-blkg policy data for all existing blkgs */ while (cnt--) { pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); if (!pd) { @@ -1075,15 +1070,6 @@ int blkcg_activate_policy(struct request_queue *q, goto out_free; } list_add_tail(&pd->alloc_node, &pds); - - if (!pol->cpd_size) - continue; - cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node); - if (!cpd) { - ret = -ENOMEM; - goto out_free; - } - list_add_tail(&cpd->alloc_node, &cpds); } /* @@ -1093,32 +1079,17 @@ int blkcg_activate_policy(struct request_queue *q, spin_lock_irq(q->queue_lock); list_for_each_entry(blkg, &q->blkg_list, q_node) { - if (WARN_ON(list_empty(&pds)) || - WARN_ON(pol->cpd_size && list_empty(&cpds))) { + if (WARN_ON(list_empty(&pds))) { /* umm... this shouldn't happen, just abort */ ret = -ENOMEM; goto out_unlock; } - cpd = list_first_entry(&cpds, struct blkcg_policy_data, - alloc_node); - list_del_init(&cpd->alloc_node); pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); list_del_init(&pd->alloc_node); /* grab blkcg lock too while installing @pd on @blkg */ spin_lock(&blkg->blkcg->lock); - if (!pol->cpd_size) - goto no_cpd; - if (!blkg->blkcg->pd[pol->plid]) { - /* Per-policy per-blkcg data */ - blkg->blkcg->pd[pol->plid] = cpd; - cpd->plid = pol->plid; - pol->cpd_init_fn(blkg->blkcg); - } else { /* must free it as it has already been extracted */ - kfree(cpd); - } -no_cpd: blkg->pd[pol->plid] = pd; pd->blkg = blkg; pd->plid = pol->plid; @@ -1135,8 +1106,6 @@ out_free: blk_queue_bypass_end(q); list_for_each_entry_safe(pd, nd, &pds, alloc_node) kfree(pd); - list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node) - kfree(cpd); return ret; } EXPORT_SYMBOL_GPL(blkcg_activate_policy); @@ -1191,6 +1160,7 @@ EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); */ int blkcg_policy_register(struct blkcg_policy *pol) { + struct blkcg *blkcg; int i, ret; if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) @@ -1207,9 +1177,27 @@ int blkcg_policy_register(struct blkcg_policy *pol) if (i >= BLKCG_MAX_POLS) goto err_unlock; - /* register and update blkgs */ + /* register @pol */ pol->plid = i; - blkcg_policy[i] = pol; + blkcg_policy[pol->plid] = pol; + + /* allocate and install cpd's */ + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + struct blkcg_policy_data *cpd; + + cpd = kzalloc(pol->cpd_size, GFP_KERNEL); + if (!cpd) { + mutex_unlock(&blkcg_pol_mutex); + goto err_free_cpds; + } + + blkcg->pd[pol->plid] = cpd; + cpd->plid = pol->plid; + pol->cpd_init_fn(blkcg); + } + } + mutex_unlock(&blkcg_pol_mutex); /* everything is in place, add intf files for the new policy */ @@ -1219,6 +1207,14 @@ int blkcg_policy_register(struct blkcg_policy *pol) mutex_unlock(&blkcg_pol_register_mutex); return 0; +err_free_cpds: + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + kfree(blkcg->pd[pol->plid]); + blkcg->pd[pol->plid] = NULL; + } + } + blkcg_policy[pol->plid] = NULL; err_unlock: mutex_unlock(&blkcg_pol_mutex); mutex_unlock(&blkcg_pol_register_mutex); @@ -1234,6 +1230,8 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register); */ void blkcg_policy_unregister(struct blkcg_policy *pol) { + struct blkcg *blkcg; + mutex_lock(&blkcg_pol_register_mutex); if (WARN_ON(blkcg_policy[pol->plid] != pol)) @@ -1243,9 +1241,17 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) if (pol->cftypes) cgroup_rm_cftypes(pol->cftypes); - /* unregister and update blkgs */ + /* remove cpds and unregister */ mutex_lock(&blkcg_pol_mutex); + + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + kfree(blkcg->pd[pol->plid]); + blkcg->pd[pol->plid] = NULL; + } + } blkcg_policy[pol->plid] = NULL; + mutex_unlock(&blkcg_pol_mutex); out_unlock: mutex_unlock(&blkcg_pol_register_mutex); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index cf3e7bc22ef3..1b62d768c7df 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -89,18 +89,12 @@ struct blkg_policy_data { * Policies that need to keep per-blkcg data which is independent * from any request_queue associated to it must specify its size * with the cpd_size field of the blkcg_policy structure and - * embed a blkcg_policy_data in it. blkcg core allocates - * policy-specific per-blkcg structures lazily the first time - * they are actually needed, so it handles them together with - * blkgs. cpd_init() is invoked to let each policy handle - * per-blkcg data. + * embed a blkcg_policy_data in it. cpd_init() is invoked to let + * each policy handle per-blkcg data. */ struct blkcg_policy_data { /* the policy id this per-policy data belongs to */ int plid; - - /* used during policy activation */ - struct list_head alloc_node; }; /* association between a blk cgroup and a request queue */ From ae0afb4f5d44d17a0fd135ae000e2acf12c53617 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 9 Jul 2015 22:59:49 +0200 Subject: [PATCH 077/170] suspend-to-idle: Prevent RCU from complaining about tick_freeze() Put tick_freeze() under RCU_NONIDLE() to prevent RCU from complaining about suspicious RCU usage in idle by trace_suspend_resume() called from there. While at it, fix a comment related to another usage of RCU_NONIDLE() in enter_freeze_proper(). Signed-off-by: Rafael J. Wysocki Reviewed-by: Paul E. McKenney --- drivers/cpuidle/cpuidle.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index e8e2775c3821..48b7228563ad 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -112,7 +112,12 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv, static void enter_freeze_proper(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { - tick_freeze(); + /* + * trace_suspend_resume() called by tick_freeze() for the last CPU + * executing it contains RCU usage regarded as invalid in the idle + * context, so tell RCU about that. + */ + RCU_NONIDLE(tick_freeze()); /* * The state used here cannot be a "coupled" one, because the "coupled" * cpuidle mechanism enables interrupts and doing that with timekeeping @@ -122,7 +127,7 @@ static void enter_freeze_proper(struct cpuidle_driver *drv, WARN_ON(!irqs_disabled()); /* * timekeeping_resume() that will be called by tick_unfreeze() for the - * last CPU executing it calls functions containing RCU read-side + * first CPU executing it calls functions containing RCU read-side * critical sections, so tell RCU about that. */ RCU_NONIDLE(tick_unfreeze()); From adb3505086def6d3e37013ec6cf8313cc719cbb6 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Wed, 8 Jul 2015 10:49:30 +0530 Subject: [PATCH 078/170] net: systemport: Use eth_hw_addr_random Use eth_hw_addr_random() instead of calling random_ether_addr(). Here, this change is setting addr_assign_type to NET_ADDR_RANDOM. The Coccinelle semantic patch that performs this transformation is as follows: @@ identifier a,b; @@ -random_ether_addr(a->b); +eth_hw_addr_random(a); Signed-off-by: Vaishali Thakkar Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 909ad7a0d480..4566cdf0bc39 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1793,7 +1793,7 @@ static int bcm_sysport_probe(struct platform_device *pdev) macaddr = of_get_mac_address(dn); if (!macaddr || !is_valid_ether_addr(macaddr)) { dev_warn(&pdev->dev, "using random Ethernet MAC\n"); - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); } else { ether_addr_copy(dev->dev_addr, macaddr); } From 5a0266af10246c84a6c712f365788800c7e23fd4 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Wed, 8 Jul 2015 14:35:22 +0530 Subject: [PATCH 079/170] drivers: net: cpsw: fix disabling of tx interrupt in rx isr In commit 'c03abd84634d ("net: ethernet: cpsw: don't requests IRQs we don't use")', common isr is split into tx and rx, but in rx isr tx interrupt is also disabledi in cpsw_disable_irq(). So tx interrupts are not handled during rx interrupts and rx napi completion and results in poor tx performance by 40Mbps. Fixing by disabling only rx interrupt in rx isr. Cc: Felipe Balbi Cc: # v4.0+ Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index e778703880d3..f335bf119ab5 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -138,19 +138,6 @@ do { \ #define CPSW_CMINTMAX_INTVL (1000 / CPSW_CMINTMIN_CNT) #define CPSW_CMINTMIN_INTVL ((1000 / CPSW_CMINTMAX_CNT) + 1) -#define cpsw_enable_irq(priv) \ - do { \ - u32 i; \ - for (i = 0; i < priv->num_irqs; i++) \ - enable_irq(priv->irqs_table[i]); \ - } while (0) -#define cpsw_disable_irq(priv) \ - do { \ - u32 i; \ - for (i = 0; i < priv->num_irqs; i++) \ - disable_irq_nosync(priv->irqs_table[i]); \ - } while (0) - #define cpsw_slave_index(priv) \ ((priv->data.dual_emac) ? priv->emac_port : \ priv->data.active_slave) @@ -783,7 +770,7 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) cpsw_intr_disable(priv); if (priv->irq_enabled == true) { - cpsw_disable_irq(priv); + disable_irq_nosync(priv->irqs_table[0]); priv->irq_enabled = false; } @@ -819,7 +806,7 @@ static int cpsw_poll(struct napi_struct *napi, int budget) prim_cpsw = cpsw_get_slave_priv(priv, 0); if (prim_cpsw->irq_enabled == false) { prim_cpsw->irq_enabled = true; - cpsw_enable_irq(priv); + enable_irq(priv->irqs_table[0]); } } @@ -1335,7 +1322,7 @@ static int cpsw_ndo_open(struct net_device *ndev) if (prim_cpsw->irq_enabled == false) { if ((priv == prim_cpsw) || !netif_running(prim_cpsw->ndev)) { prim_cpsw->irq_enabled = true; - cpsw_enable_irq(prim_cpsw); + enable_irq(prim_cpsw->irqs_table[0]); } } From 4a0e3e989d66bb7204b163d9cfaa7fa96d0f2023 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Wed, 8 Jul 2015 13:05:57 +0200 Subject: [PATCH 080/170] cdc_ncm: Add support for moving NDP to end of NCM frame NCM specs are not actually mandating a specific position in the frame for the NDP (Network Datagram Pointer). However, some Huawei devices will ignore our aggregates if it is not placed after the datagrams it points to. Add support for doing just this, in a per-device configurable way. While at it, update NCM subdrivers, disabling this functionality in all of them, except in huawei_cdc_ncm where it is enabled instead. We aren't making any distinction between different Huawei NCM devices, based on what the vendor driver does. Standard NCM devices are left unaffected: if they are compliant, they should be always usable, still stay on the safe side. This change has been tested and working with a Huawei E3131 device (which works regardless of NDP position), a Huawei E3531 (also working both ways) and an E3372 (which mandates NDP to be after indexed datagrams). V1->V2: - corrected wrong NDP acronym definition - fixed possible NULL pointer dereference - patch cleanup V2->V3: - Properly account for the NDP size when writing new packets to SKB Signed-off-by: Enrico Mioso Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 2 +- drivers/net/usb/cdc_ncm.c | 61 ++++++++++++++++++++++++++++---- drivers/net/usb/huawei_cdc_ncm.c | 7 ++-- include/linux/usb/cdc_ncm.h | 7 +++- 4 files changed, 67 insertions(+), 10 deletions(-) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index e4b7a47a825c..efc18e05af0a 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -158,7 +158,7 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf) if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) goto err; - ret = cdc_ncm_bind_common(dev, intf, data_altsetting); + ret = cdc_ncm_bind_common(dev, intf, data_altsetting, 0); if (ret) goto err; diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8067b8fbb0ee..1991e4a24657 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -684,10 +684,12 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) ctx->tx_curr_skb = NULL; } + kfree(ctx->delayed_ndp16); + kfree(ctx); } -int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting) +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags) { const struct usb_cdc_union_desc *union_desc = NULL; struct cdc_ncm_ctx *ctx; @@ -855,6 +857,17 @@ advance: /* finish setting up the device specific data */ cdc_ncm_setup(dev); + /* Device-specific flags */ + ctx->drvflags = drvflags; + + /* Allocate the delayed NDP if needed. */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { + ctx->delayed_ndp16 = kzalloc(ctx->max_ndp_size, GFP_KERNEL); + if (!ctx->delayed_ndp16) + goto error2; + dev_info(&intf->dev, "NDP will be placed at end of frame for this device."); + } + /* override ethtool_ops */ dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; @@ -954,8 +967,11 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM) return -ENODEV; - /* The NCM data altsetting is fixed */ - ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM); + /* The NCM data altsetting is fixed, so we hard-coded it. + * Additionally, generic NCM devices are assumed to accept arbitrarily + * placed NDP. + */ + ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0); /* * We should get an event when network connection is "connected" or @@ -986,6 +1002,14 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ struct usb_cdc_ncm_nth16 *nth16 = (void *)skb->data; size_t ndpoffset = le16_to_cpu(nth16->wNdpIndex); + /* If NDP should be moved to the end of the NCM package, we can't follow the + * NTH16 header as we would normally do. NDP isn't written to the SKB yet, and + * the wNdpIndex field in the header is actually not consistent with reality. It will be later. + */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) + if (ctx->delayed_ndp16->dwSignature == sign) + return ctx->delayed_ndp16; + /* follow the chain of NDPs, looking for a match */ while (ndpoffset) { ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset); @@ -995,7 +1019,8 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ } /* align new NDP */ - cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); + if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) + cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); /* verify that there is room for the NDP and the datagram (reserve) */ if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size) @@ -1008,7 +1033,11 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ nth16->wNdpIndex = cpu_to_le16(skb->len); /* push a new empty NDP */ - ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size); + if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) + ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size); + else + ndp16 = ctx->delayed_ndp16; + ndp16->dwSignature = sign; ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16)); return ndp16; @@ -1023,6 +1052,15 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) struct sk_buff *skb_out; u16 n = 0, index, ndplen; u8 ready2send = 0; + u32 delayed_ndp_size; + + /* When our NDP gets written in cdc_ncm_ndp(), then skb_out->len gets updated + * accordingly. Otherwise, we should check here. + */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) + delayed_ndp_size = ctx->max_ndp_size; + else + delayed_ndp_size = 0; /* if there is a remaining skb, it gets priority */ if (skb != NULL) { @@ -1077,7 +1115,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) cdc_ncm_align_tail(skb_out, ctx->tx_modulus, ctx->tx_remainder, ctx->tx_max); /* check if we had enough room left for both NDP and frame */ - if (!ndp16 || skb_out->len + skb->len > ctx->tx_max) { + if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_max) { if (n == 0) { /* won't fit, MTU problem? */ dev_kfree_skb_any(skb); @@ -1150,6 +1188,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* variables will be reset at next call */ } + /* If requested, put NDP at end of frame. */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { + nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data; + cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max); + nth16->wNdpIndex = cpu_to_le16(skb_out->len); + memcpy(skb_put(skb_out, ctx->max_ndp_size), ctx->delayed_ndp16, ctx->max_ndp_size); + + /* Zero out delayed NDP - signature checking will naturally fail. */ + ndp16 = memset(ctx->delayed_ndp16, 0, ctx->max_ndp_size); + } + /* If collected data size is less or equal ctx->min_tx_pkt * bytes, we send buffers as it is. If we get more data, it * would be more efficient for USB HS mobile device with DMA diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c index 735f7dadb9a0..2680a65cd5e4 100644 --- a/drivers/net/usb/huawei_cdc_ncm.c +++ b/drivers/net/usb/huawei_cdc_ncm.c @@ -73,11 +73,14 @@ static int huawei_cdc_ncm_bind(struct usbnet *usbnet_dev, struct usb_driver *subdriver = ERR_PTR(-ENODEV); int ret = -ENODEV; struct huawei_cdc_ncm_state *drvstate = (void *)&usbnet_dev->data; + int drvflags = 0; /* altsetting should always be 1 for NCM devices - so we hard-coded - * it here + * it here. Some huawei devices will need the NDP part of the NCM package to + * be at the end of the frame. */ - ret = cdc_ncm_bind_common(usbnet_dev, intf, 1); + drvflags |= CDC_NCM_FLAG_NDP_TO_END; + ret = cdc_ncm_bind_common(usbnet_dev, intf, 1, drvflags); if (ret) goto err; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 7c9b484735c5..1f6526c76ee8 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -80,6 +80,9 @@ #define CDC_NCM_TIMER_INTERVAL_MIN 5UL #define CDC_NCM_TIMER_INTERVAL_MAX (U32_MAX / NSEC_PER_USEC) +/* Driver flags */ +#define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ + #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) @@ -103,9 +106,11 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; + int drvflags; u32 timer_interval; u32 max_ndp_size; + struct usb_cdc_ncm_ndp16 *delayed_ndp16; u32 tx_timer_pending; u32 tx_curr_frame_num; @@ -133,7 +138,7 @@ struct cdc_ncm_ctx { }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); -int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); From fecdf8be2d91e04b0a9a4f79ff06499a36f5d14f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Jul 2015 21:42:11 +0200 Subject: [PATCH 081/170] net: pktgen: fix race between pktgen_thread_worker() and kthread_stop() pktgen_thread_worker() is obviously racy, kthread_stop() can come between the kthread_should_stop() check and set_current_state(). Signed-off-by: Oleg Nesterov Reported-by: Jan Stancek Reported-by: Marcelo Leitner Signed-off-by: David S. Miller --- net/core/pktgen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 05badbb58865..41489e3c69af 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3572,8 +3572,10 @@ static int pktgen_thread_worker(void *arg) pktgen_rem_thread(t); /* Wait for kthread_stop */ - while (!kthread_should_stop()) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; schedule(); } __set_current_state(TASK_RUNNING); From 1fbe4b46caca5b01b070af93d513031ffbcc480c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Jul 2015 21:42:13 +0200 Subject: [PATCH 082/170] net: pktgen: kill the "Wait for kthread_stop" code in pktgen_thread_worker() pktgen_thread_worker() doesn't need to wait for kthread_stop(), it can simply exit. Just pktgen_create_thread() and pg_net_exit() should do get_task_struct()/put_task_struct(). kthread_stop(dead_thread) is fine. Signed-off-by: Oleg Nesterov Signed-off-by: David S. Miller --- net/core/pktgen.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 41489e3c69af..1ebdf1c0d118 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3571,15 +3571,6 @@ static int pktgen_thread_worker(void *arg) pr_debug("%s removing thread\n", t->tsk->comm); pktgen_rem_thread(t); - /* Wait for kthread_stop */ - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) - break; - schedule(); - } - __set_current_state(TASK_RUNNING); - return 0; } @@ -3771,6 +3762,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) } t->net = pn; + get_task_struct(p); wake_up_process(p); wait_for_completion(&t->start_done); @@ -3893,6 +3885,7 @@ static void __net_exit pg_net_exit(struct net *net) t = list_entry(q, struct pktgen_thread, th_list); list_del(&t->th_list); kthread_stop(t->tsk); + put_task_struct(t->tsk); kfree(t); } From 35afd02e30d6368073df604920c4ea7cddadf5d0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 10 Jul 2015 01:36:27 +0200 Subject: [PATCH 083/170] cpufreq: Initialize the governor again while restoring policy When all CPUs of a policy are hot-unplugged, we EXIT the governor but don't mark policy->governor as NULL. This was done in order to keep last used governor's information intact in sysfs, while the CPUs are offline. But we also need to clear policy->governor when restoring the policy. Because policy->governor still points to the last governor while policy is restored, following sequence of event happens: - cpufreq_init_policy() called while restoring policy - find_governor() matches last_governor string for present governors and returns last used governor's pointer, say ondemand. policy->governor already has the same address, unless the governor was removed in between. - cpufreq_set_policy() is called with both old/new policies governor set as ondemand. - Because governors matched, we skip governor initialization and return after calling __cpufreq_governor(CPUFREQ_GOV_LIMITS). Because the governor wasn't initialized for this policy, it returned -EBUSY. - cpufreq_init_policy() exits the policy on this error, but doesn't destroy it properly (should be fixed separately). - And so we enter a scenario where the policy isn't completely initialized but used. Fix this by setting policy->governor to NULL while restoring the policy. Reported-and-tested-by: Pi-Cheng Chen Reported-and-tested-by: "Jon Medhurst (Tixy)" Reported-and-tested-by: Steven Rostedt Fixes: 18bf3a124ef8 (cpufreq: Mark policy->governor = NULL for inactive policies) Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b612411655f9..2c22e3902e72 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1132,6 +1132,7 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) down_write(&policy->rwsem); policy->cpu = cpu; + policy->governor = NULL; up_write(&policy->rwsem); } From 5a31d594a9732a2fa2eb83b0c4dcba75da2dff5d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 10 Jul 2015 01:43:27 +0200 Subject: [PATCH 084/170] cpufreq: Allow freq_table to be obtained for offline CPUs Users of freq table may want to access it for any CPU from policy->related_cpus mask. One such user is cpu-cooling layer. It gets a list of 'clip_cpus' (equivalent to policy->related_cpus) during registration and tries to get freq_table for the first CPU of this mask. If the CPU, for which it tries to fetch freq_table, is offline, cpufreq_frequency_get_table() fails. This happens because it relies on cpufreq_cpu_get_raw() for its functioning which returns policy only for online CPUs. The fix is to access the policy data structure for the given CPU directly (which also returns a valid policy for offline CPUs), but the policy itself has to be active (meaning that at least one CPU using it is online) for the frequency table to be returned. Because we will be using 'cpufreq_cpu_data' now, which is internal to the cpufreq core, move cpufreq_frequency_get_table() to cpufreq.c. Reported-and-tested-by: Pi-Cheng Chen Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 9 +++++++++ drivers/cpufreq/freq_table.c | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2c22e3902e72..26063afb3eba 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -169,6 +169,15 @@ struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(get_governor_parent_kobj); +struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) +{ + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + + return policy && !policy_is_inactive(policy) ? + policy->freq_table : NULL; +} +EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table); + static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) { u64 idle_time; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index df14766a8e06..dfbbf981ed56 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -297,15 +297,6 @@ int cpufreq_table_validate_and_show(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_table_validate_and_show); -struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu); - -struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) -{ - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - return policy ? policy->freq_table : NULL; -} -EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table); - MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq frequency table helpers"); MODULE_LICENSE("GPL"); From 1fb01ca93a1348a1469b8777326cd7632483de77 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 8 Jul 2015 15:26:39 +0800 Subject: [PATCH 085/170] ACPI / PCI: Fix regressions caused by resource_size_t overflow with 32-bit kernel Zoltan Boszormenyi reported this regression: "There's a Realtek RTL8111/8168/8411 (PCI ID 10ec:8168, Subsystem ID 1565:230e) network chip on the mainboard. After the r8169 driver loaded the IRQs in the machine went berserk. Keyboard keypressed arrived with considerable latency and duplicated, so no real work was possible. The machine responded to the power button but didn't actually power down. It just stuck at the powering down message. I had to press the power button for 4 seconds to power it down. The computer is a POS machine with a big battery inside. Because of this, either ACPI or the Realtek chip kept the bad state and after rebooting, the network chip didn't even show up in lspci. Not even the PXE ROM announced itself during boot. I had to disconnect the battery to beat some sense back to the computer. The regression happens with 4.0.5, 4.1.0-rc8 and 4.1.0-final. 3.18.16 was good." The regression is caused by commit 593669c2ac0f (x86/PCI/ACPI: Use common ACPI resource interfaces to simplify implementation). Since commit 593669c2ac0f, x86 PCI ACPI host bridge driver validates ACPI resources by first converting an ACPI resource to a 'struct resource' structure and then applying checks against the converted resource structure. The 'start' and 'end' fields in 'struct resource' are defined to be type of resource_size_t, which may be 32 bits or 64 bits depending on CONFIG_PHYS_ADDR_T_64BIT. This may cause incorrect resource validation results with 32-bit kernels because 64-bit ACPI resource descriptors may get truncated when converting to 32-bit 'start' and 'end' fields in 'struct resource'. It eventually affects PCI resource allocation subsystem and makes some PCI devices and the system behave abnormally due to incorrect resource assignment. So enhance the ACPI resource parsing interfaces to ignore ACPI resource descriptors with address/offset above 4G when running in 32-bit mode. With the fix applied, the behavior of the machine was restored to how 3.18.16 worked, i.e. the memory range that is over 4GB is ignored again, and lspci -vvxxx shows that everything is at the same memory window as they were with 3.18.16. Reported-and-tested-by: Boszormenyi Zoltan Fixes: 593669c2ac0f (x86/PCI/ACPI: Use common ACPI resource interfaces to simplify implementation) Signed-off-by: Jiang Liu Cc: 4.0+ # 4.0+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 10561ce16ed1..e8d281739cbc 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -194,6 +194,7 @@ static bool acpi_decode_space(struct resource_win *win, u8 iodec = attr->granularity == 0xfff ? ACPI_DECODE_10 : ACPI_DECODE_16; bool wp = addr->info.mem.write_protect; u64 len = attr->address_length; + u64 start, end, offset = 0; struct resource *res = &win->res; /* @@ -205,9 +206,6 @@ static bool acpi_decode_space(struct resource_win *win, pr_debug("ACPI: Invalid address space min_addr_fix %d, max_addr_fix %d, len %llx\n", addr->min_address_fixed, addr->max_address_fixed, len); - res->start = attr->minimum; - res->end = attr->maximum; - /* * For bridges that translate addresses across the bridge, * translation_offset is the offset that must be added to the @@ -215,12 +213,22 @@ static bool acpi_decode_space(struct resource_win *win, * primary side. Non-bridge devices must list 0 for all Address * Translation offset bits. */ - if (addr->producer_consumer == ACPI_PRODUCER) { - res->start += attr->translation_offset; - res->end += attr->translation_offset; - } else if (attr->translation_offset) { + if (addr->producer_consumer == ACPI_PRODUCER) + offset = attr->translation_offset; + else if (attr->translation_offset) pr_debug("ACPI: translation_offset(%lld) is invalid for non-bridge device.\n", attr->translation_offset); + start = attr->minimum + offset; + end = attr->maximum + offset; + + win->offset = offset; + res->start = start; + res->end = end; + if (sizeof(resource_size_t) < sizeof(u64) && + (offset != win->offset || start != res->start || end != res->end)) { + pr_warn("acpi resource window ([%#llx-%#llx] ignored, not CPU addressable)\n", + attr->minimum, attr->maximum); + return false; } switch (addr->resource_type) { @@ -237,8 +245,6 @@ static bool acpi_decode_space(struct resource_win *win, return false; } - win->offset = attr->translation_offset; - if (addr->producer_consumer == ACPI_PRODUCER) res->flags |= IORESOURCE_WINDOW; From fcc028c106e5750aa66dc43595b3f29f88801ff0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 9 Jul 2015 22:21:20 +0900 Subject: [PATCH 086/170] net: axienet: Fix devm_ioremap_resource return value check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Value returned by devm_ioremap_resource() was checked for non-NULL but devm_ioremap_resource() returns IOMEM_ERR_PTR, not NULL. In case of error this could lead to dereference of ERR_PTR. Signed-off-by: Krzysztof Kozlowski Cc: Fixes: 46aa27df8853 ("net: axienet: Use devm_* calls") Reviewed-by: Sören Brinkmann Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 4208dd7ef101..d95f9aae95e7 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1530,9 +1530,9 @@ static int axienet_probe(struct platform_device *pdev) /* Map device registers */ ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); lp->regs = devm_ioremap_resource(&pdev->dev, ethres); - if (!lp->regs) { + if (IS_ERR(lp->regs)) { dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n"); - ret = -ENOMEM; + ret = PTR_ERR(lp->regs); goto free_netdev; } @@ -1599,9 +1599,9 @@ static int axienet_probe(struct platform_device *pdev) goto free_netdev; } lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); - if (!lp->dma_regs) { + if (IS_ERR(lp->dma_regs)) { dev_err(&pdev->dev, "could not map DMA regs\n"); - ret = -ENOMEM; + ret = PTR_ERR(lp->dma_regs); goto free_netdev; } lp->rx_irq = irq_of_parse_and_map(np, 1); From a7d35f9d73e9ffa74a02304b817e579eec632f67 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Jul 2015 18:56:07 +0200 Subject: [PATCH 087/170] bridge: fix potential crash in __netdev_pick_tx() Commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") fixed an issue in normal forward path, caused by sender_cpu & napi_id skb fields being an union. Bridge is another point where skb can be forwarded, so we need the same cure. Bug triggers if packet was received on a NIC using skb_mark_napi_id() Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Signed-off-by: Eric Dumazet Reported-by: Bob Liu Tested-by: Bob Liu Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e97572b5d2cc..0ff6e1bbca91 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -42,6 +42,7 @@ int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) } else { skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); + skb_sender_cpu_clear(skb); dev_queue_xmit(skb); } From 5d75a747596be046546eeb9d6ba39a3af851a1af Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 12:17:36 +0200 Subject: [PATCH 088/170] x86: hyperv: add CPUID bit for crash handlers Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/hyperv.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 8fba544e9cc4..f36d56bd7632 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -108,6 +108,8 @@ #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4) /* Support for a virtual guest idle state is available */ #define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5) +/* Guest crash data handler available */ +#define HV_X64_GUEST_CRASH_MSR_AVAILABLE (1 << 10) /* * Implementation recommendations. Indicates which behaviors the hypervisor From d1fe9219551e914f26219afaca1063b280f25963 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 15:03:18 +0200 Subject: [PATCH 089/170] KVM: x86: reintroduce kvm_is_mmio_pfn The call to get_mt_mask was really using kvm_is_reserved_pfn to detect an MMIO-backed page. In this case, we want "false" to be returned for the zero page. Reintroduce a separate kvm_is_mmio_pfn predicate for this use only. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f807496b62c2..44171462bd2a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2479,6 +2479,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, return 0; } +static bool kvm_is_mmio_pfn(pfn_t pfn) +{ + if (pfn_valid(pfn)) + return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); + + return true; +} + static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int level, gfn_t gfn, pfn_t pfn, bool speculative, @@ -2506,7 +2514,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= PT_PAGE_SIZE_MASK; if (tdp_enabled) spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, - kvm_is_reserved_pfn(pfn)); + kvm_is_mmio_pfn(pfn)); if (host_writable) spte |= SPTE_HOST_WRITEABLE; From 370777daab3f024f1645177039955088e2e9ae73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 3 Jul 2015 15:49:28 +0200 Subject: [PATCH 090/170] KVM: VMX: fix vmwrite to invalid VMCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fpu_activate is called outside of vcpu_load(), which means it should not touch VMCS, but fpu_activate needs to. Avoid the call by moving it to a point where we know that the guest needs eager FPU and VMCS is loaded. This will get rid of the following trace vmwrite error: reg 6800 value 0 (err 1) [] dump_stack+0x19/0x1b [] vmwrite_error+0x2c/0x2e [kvm_intel] [] vmcs_writel+0x1f/0x30 [kvm_intel] [] vmx_fpu_activate.part.61+0x45/0xb0 [kvm_intel] [] vmx_fpu_activate+0x15/0x20 [kvm_intel] [] kvm_arch_vcpu_create+0x51/0x70 [kvm] [] kvm_vm_ioctl+0x1c1/0x760 [kvm] [] ? handle_mm_fault+0x49a/0xec0 [] do_vfs_ioctl+0x2e5/0x4c0 [] ? file_has_perm+0xae/0xc0 [] SyS_ioctl+0xa1/0xc0 [] system_call_fastpath+0x16/0x1b (Note: we also unconditionally activate FPU in vmx_vcpu_reset(), so the removed code added nothing.) Fixes: c447e76b4cab ("kvm/fpu: Enable eager restore kvm FPU for MPX") Cc: Reported-by: Vlastimil Holer Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 ++ arch/x86/kvm/x86.c | 5 ----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 64dd46793099..2fbea2544f24 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -98,6 +98,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); + if (vcpu->arch.eager_fpu) + kvm_x86_ops->fpu_activate(vcpu); /* * The existing code assumes virtual address is 48-bit in the canonical diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bbaf44e8f0d3..6bd19c7abc65 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7315,11 +7315,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu = kvm_x86_ops->vcpu_create(kvm, id); - /* - * Activate fpu unconditionally in case the guest needs eager FPU. It will be - * deactivated soon if it doesn't. - */ - kvm_x86_ops->fpu_activate(vcpu); return vcpu; } From 5544eb9b81940647b8fad1f251b37cbe2819ce44 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 15:41:58 +0200 Subject: [PATCH 091/170] KVM: count number of assigned devices If there are no assigned devices, the guest PAT are not providing any useful information and can be overridden to writeback; VMX always does this because it has the "IPAT" bit in its extended page table entries, but SVM does not have anything similar. Hook into VFIO and legacy device assignment so that they provide this information to KVM. Reviewed-by: Alex Williamson Tested-by: Joerg Roedel Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/iommu.c | 2 ++ arch/x86/kvm/x86.c | 18 ++++++++++++++++++ include/linux/kvm_host.h | 18 ++++++++++++++++++ virt/kvm/vfio.c | 5 +++++ 5 files changed, 45 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2a7f5d782c33..49ec9038ec14 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -604,6 +604,8 @@ struct kvm_arch { bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA atomic_t noncoherent_dma_count; +#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE + atomic_t assigned_device_count; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c index 7dbced309ddb..5c520ebf6343 100644 --- a/arch/x86/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c @@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev) goto out_unmap; } + kvm_arch_start_assignment(kvm); pci_set_dev_assigned(pdev); dev_info(&pdev->dev, "kvm assign device\n"); @@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev) iommu_detach_device(domain, &pdev->dev); pci_clear_dev_assigned(pdev); + kvm_arch_end_assignment(kvm); dev_info(&pdev->dev, "kvm deassign device\n"); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6bd19c7abc65..0024968b342d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8213,6 +8213,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) kvm_x86_ops->interrupt_allowed(vcpu); } +void kvm_arch_start_assignment(struct kvm *kvm) +{ + atomic_inc(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); + +void kvm_arch_end_assignment(struct kvm *kvm) +{ + atomic_dec(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); + +bool kvm_arch_has_assigned_device(struct kvm *kvm) +{ + return atomic_read(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); + void kvm_arch_register_noncoherent_dma(struct kvm *kvm) { atomic_inc(&kvm->arch.noncoherent_dma_count); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9564fd78c547..05e99b8ef465 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -734,6 +734,24 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) return false; } #endif +#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE +void kvm_arch_start_assignment(struct kvm *kvm); +void kvm_arch_end_assignment(struct kvm *kvm); +bool kvm_arch_has_assigned_device(struct kvm *kvm); +#else +static inline void kvm_arch_start_assignment(struct kvm *kvm) +{ +} + +static inline void kvm_arch_end_assignment(struct kvm *kvm) +{ +} + +static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +{ + return false; +} +#endif static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 620e37f741b8..1dd087da6f31 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -155,6 +155,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) list_add_tail(&kvg->node, &kv->group_list); kvg->vfio_group = vfio_group; + kvm_arch_start_assignment(dev->kvm); + mutex_unlock(&kv->lock); kvm_vfio_update_coherency(dev); @@ -190,6 +192,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) break; } + kvm_arch_end_assignment(dev->kvm); + mutex_unlock(&kv->lock); kvm_vfio_group_put_external_user(vfio_group); @@ -239,6 +243,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) kvm_vfio_group_put_external_user(kvg->vfio_group); list_del(&kvg->node); kfree(kvg); + kvm_arch_end_assignment(dev->kvm); } kvm_vfio_update_coherency(dev); From 3c2e7f7de3240216042b61073803b61b9b3cfb22 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 14:32:17 +0200 Subject: [PATCH 092/170] KVM: SVM: use NPT page attributes Right now, NPT page attributes are not used, and the final page attribute depends solely on gPAT (which however is not synced correctly), the guest MTRRs and the guest page attributes. However, we can do better by mimicking what is done for VMX. In the absence of PCI passthrough, the guest PAT can be ignored and the page attributes can be just WB. If passthrough is being used, instead, keep respecting the guest PAT, and emulate the guest MTRRs through the PAT field of the nested page tables. The only snag is that WP memory cannot be emulated correctly, because Linux's default PAT setting only includes the other types. Tested-by: Joerg Roedel Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 101 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 96 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 602b974a60a6..414ec25b673e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -865,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } +#define MTRR_TYPE_UC_MINUS 7 +#define MTRR2PROTVAL_INVALID 0xff + +static u8 mtrr2protval[8]; + +static u8 fallback_mtrr_type(int mtrr) +{ + /* + * WT and WP aren't always available in the host PAT. Treat + * them as UC and UC- respectively. Everything else should be + * there. + */ + switch (mtrr) + { + case MTRR_TYPE_WRTHROUGH: + return MTRR_TYPE_UNCACHABLE; + case MTRR_TYPE_WRPROT: + return MTRR_TYPE_UC_MINUS; + default: + BUG(); + } +} + +static void build_mtrr2protval(void) +{ + int i; + u64 pat; + + for (i = 0; i < 8; i++) + mtrr2protval[i] = MTRR2PROTVAL_INVALID; + + /* Ignore the invalid MTRR types. */ + mtrr2protval[2] = 0; + mtrr2protval[3] = 0; + + /* + * Use host PAT value to figure out the mapping from guest MTRR + * values to nested page table PAT/PCD/PWT values. We do not + * want to change the host PAT value every time we enter the + * guest. + */ + rdmsrl(MSR_IA32_CR_PAT, pat); + for (i = 0; i < 8; i++) { + u8 mtrr = pat >> (8 * i); + + if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID) + mtrr2protval[mtrr] = __cm_idx2pte(i); + } + + for (i = 0; i < 8; i++) { + if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) { + u8 fallback = fallback_mtrr_type(i); + mtrr2protval[i] = mtrr2protval[fallback]; + BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID); + } + } +} + static __init int svm_hardware_setup(void) { int cpu; @@ -931,6 +989,7 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); + build_mtrr2protval(); return 0; err: @@ -1085,6 +1144,42 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } +static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + + /* Unlike Intel, AMD takes the guest's CR0.CD into account. + * + * AMD does not have IPAT. To emulate it for the case of guests + * with no assigned devices, just set everything to WB. If guests + * have assigned devices, however, we cannot force WB for RAM + * pages only, so use the guest PAT directly. + */ + if (!kvm_arch_has_assigned_device(vcpu->kvm)) + *g_pat = 0x0606060606060606; + else + *g_pat = vcpu->arch.pat; +} + +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + u8 mtrr; + + /* + * 1. MMIO: always map as UC + * 2. No passthrough: always map as WB, and force guest PAT to WB as well + * 3. Passthrough: can't guarantee the result, try to trust guest. + */ + if (is_mmio) + return _PAGE_NOCACHE; + + if (!kvm_arch_has_assigned_device(vcpu->kvm)) + return 0; + + mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); + return mtrr2protval[mtrr]; +} + static void init_vmcb(struct vcpu_svm *svm, bool init_event) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1180,6 +1275,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; + svm_set_guest_pat(svm, &save->g_pat); save->cr3 = 0; save->cr4 = 0; } @@ -4088,11 +4184,6 @@ static bool svm_has_high_real_mode_segbase(void) return true; } -static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - return 0; -} - static void svm_cpuid_update(struct kvm_vcpu *vcpu) { } From e098223b789b4a618dacd79e5e0dad4a9d5018d1 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 20 Apr 2015 19:25:31 +0200 Subject: [PATCH 093/170] KVM: SVM: Sync g_pat with guest-written PAT value When hardware supports the g_pat VMCB field, we can use it for emulating the PAT configuration that the guest configures by writing to the corresponding MSR. Signed-off-by: Jan Kiszka Tested-by: Joerg Roedel Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 414ec25b673e..d36cfaf5a97a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3350,6 +3350,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_IA32_CR_PAT: + if (npt_enabled) { + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + return 1; + vcpu->arch.pat = data; + svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); + mark_dirty(svm->vmcb, VMCB_NPT); + break; + } + /* fall through */ default: return kvm_set_msr_common(vcpu, msr); } From fd717f11015f673487ffc826e59b2bad69d20fe5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 14:38:13 +0200 Subject: [PATCH 094/170] KVM: x86: apply guest MTRR virtualization on host reserved pages Currently guest MTRR is avoided if kvm_is_reserved_pfn returns true. However, the guest could prefer a different page type than UC for such pages. A good example is that pass-throughed VGA frame buffer is not always UC as host expected. This patch enables full use of virtual guest MTRRs. Suggested-by: Xiao Guangrong Tested-by: Joerg Roedel (on AMD) Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 7 ++----- arch/x86/kvm/vmx.c | 11 +++-------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d36cfaf5a97a..bbc678a66b18 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1166,14 +1166,11 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u8 mtrr; /* - * 1. MMIO: always map as UC + * 1. MMIO: trust guest MTRR, so same as item 3. * 2. No passthrough: always map as WB, and force guest PAT to WB as well * 3. Passthrough: can't guarantee the result, try to trust guest. */ - if (is_mmio) - return _PAGE_NOCACHE; - - if (!kvm_arch_has_assigned_device(vcpu->kvm)) + if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) return 0; mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e856dd566f4c..5b4e9384717a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8632,22 +8632,17 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u64 ipat = 0; /* For VT-d and EPT combination - * 1. MMIO: always map as UC + * 1. MMIO: guest may want to apply WC, trust it. * 2. EPT with VT-d: * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. + * result, try to trust guest. So the same as item 1. * b. VT-d with snooping control feature: snooping control feature of * VT-d engine can guarantee the cache correctness. Just set it * to WB to keep consistent with host. So the same as item 3. * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep * consistent with host MTRR */ - if (is_mmio) { - cache = MTRR_TYPE_UNCACHABLE; - goto exit; - } - - if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { ipat = VMX_EPT_IPAT_BIT; cache = MTRR_TYPE_WRBACK; goto exit; From ee4100da1616d6d151f97862b87e8a4e43d14b4c Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 9 Jul 2015 15:44:52 +0800 Subject: [PATCH 095/170] kvm: x86: fix load xsave feature warning [ 68.196974] WARNING: CPU: 1 PID: 2140 at arch/x86/kvm/x86.c:3161 kvm_arch_vcpu_ioctl+0xe88/0x1340 [kvm]() [ 68.196975] Modules linked in: snd_hda_codec_hdmi i915 rfcomm bnep bluetooth i2c_algo_bit rfkill nfsd drm_kms_helper nfs_acl nfs drm lockd grace sunrpc fscache snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_dummy snd_seq_oss x86_pkg_temp_thermal snd_seq_midi kvm_intel snd_seq_midi_event snd_rawmidi kvm snd_seq ghash_clmulni_intel fuse snd_timer aesni_intel parport_pc ablk_helper snd_seq_device cryptd ppdev snd lp parport lrw dcdbas gf128mul i2c_core glue_helper lpc_ich video shpchp mfd_core soundcore serio_raw acpi_cpufreq ext4 mbcache jbd2 sd_mod crc32c_intel ahci libahci libata e1000e ptp pps_core [ 68.197005] CPU: 1 PID: 2140 Comm: qemu-system-x86 Not tainted 4.2.0-rc1+ #2 [ 68.197006] Hardware name: Dell Inc. OptiPlex 7020/0F5C5X, BIOS A03 01/08/2015 [ 68.197007] ffffffffa03b0657 ffff8800d984bca8 ffffffff815915a2 0000000000000000 [ 68.197009] 0000000000000000 ffff8800d984bce8 ffffffff81057c0a 00007ff6d0001000 [ 68.197010] 0000000000000002 ffff880211c1a000 0000000000000004 ffff8800ce0288c0 [ 68.197012] Call Trace: [ 68.197017] [] dump_stack+0x45/0x57 [ 68.197020] [] warn_slowpath_common+0x8a/0xc0 [ 68.197022] [] warn_slowpath_null+0x1a/0x20 [ 68.197029] [] kvm_arch_vcpu_ioctl+0xe88/0x1340 [kvm] [ 68.197035] [] ? kvm_arch_vcpu_load+0x4e/0x1c0 [kvm] [ 68.197040] [] kvm_vcpu_ioctl+0xc6/0x5c0 [kvm] [ 68.197043] [] ? perf_pmu_enable+0x22/0x30 [ 68.197044] [] ? perf_event_context_sched_in+0x7e/0xb0 [ 68.197048] [] do_vfs_ioctl+0x2c2/0x4a0 [ 68.197050] [] ? finish_task_switch+0x173/0x220 [ 68.197053] [] ? selinux_file_ioctl+0x4f/0xd0 [ 68.197055] [] ? security_file_ioctl+0x43/0x60 [ 68.197057] [] SyS_ioctl+0x79/0x90 [ 68.197060] [] entry_SYSCALL_64_fastpath+0x12/0x6a [ 68.197061] ---[ end trace 558a5ebf9445fc80 ]--- After commit (0c4109bec0 'x86/fpu/xstate: Fix up bad get_xsave_addr() assumptions'), there is no assumption an xsave bit is present in the hardware (pcntxt_mask) that it is always present in a given xsave buffer. An enabled state to be present on 'pcntxt_mask', but *not* in 'xstate_bv' could happen when the last 'xsave' did not request that this feature be saved (unlikely) or because the "init optimization" caused it to not be saved. This patch kill the assumption. Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0024968b342d..5ef2560075bf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3157,8 +3157,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); memcpy(dest, src + offset, size); - } else - WARN_ON_ONCE(1); + } valid -= feature; } From e9e4dd3267d0c5234c5c0f47440456b10875dec9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 09:59:09 +0300 Subject: [PATCH 096/170] net: do not process device backlog during unregistration commit 381c759d9916 ("ipv4: Avoid crashing in ip_error") fixes a problem where processed packet comes from device with destroyed inetdev (dev->ip_ptr). This is not expected because inetdev_destroy is called in NETDEV_UNREGISTER phase and packets should not be processed after dev_close_many() and synchronize_net(). Above fix is still required because inetdev_destroy can be called for other reasons. But it shows the real problem: backlog can keep packets for long time and they do not hold reference to device. Such packets are then delivered to upper levels at the same time when device is unregistered. Calling flush_backlog after NETDEV_UNREGISTER_FINAL still accounts all packets from backlog but before that some packets continue to be delivered to upper levels long after the synchronize_net call which is supposed to wait the last ones. Also, as Eric pointed out, processed packets, mostly from other devices, can continue to add new packets to backlog. Fix the problem by moving flush_backlog early, after the device driver is stopped and before the synchronize_net() call. Then use netif_running check to make sure we do not add more packets to backlog. We have to do it in enqueue_to_backlog context when the local IRQ is disabled. As result, after the flush_backlog and synchronize_net sequence all packets should be accounted. Thanks to Eric W. Biederman for the test script and his valuable feedback! Reported-by: Vittorio Gambaletta Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") Cc: Eric W. Biederman Cc: Stephen Hemminger Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1e57efda055b..6dd126a69aa0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3448,6 +3448,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, local_irq_save(flags); rps_lock(sd); + if (!netif_running(skb->dev)) + goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (qlen) { @@ -3469,6 +3471,7 @@ enqueue: goto enqueue; } +drop: sd->dropped++; rps_unlock(sd); @@ -6135,6 +6138,7 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; + on_each_cpu(flush_backlog, dev, 1); } synchronize_net(); @@ -6770,8 +6774,6 @@ void netdev_run_todo(void) dev->reg_state = NETREG_UNREGISTERED; - on_each_cpu(flush_backlog, dev, 1); - netdev_wait_allrefs(dev); /* paranoia */ From 2c17d27c36dcce2b6bf689f41a46b9e909877c21 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 9 Jul 2015 09:59:10 +0300 Subject: [PATCH 097/170] net: call rcu_read_lock early in process_backlog Incoming packet should be either in backlog queue or in RCU read-side section. Otherwise, the final sequence of flush_backlog() and synchronize_net() may miss packets that can run without device reference: CPU 1 CPU 2 skb->dev: no reference process_backlog:__skb_dequeue process_backlog:local_irq_enable on_each_cpu for flush_backlog => IPI(hardirq): flush_backlog - packet not found in backlog CPU delayed ... synchronize_net - no ongoing RCU read-side sections netdev_run_todo, rcu_barrier: no ongoing callbacks __netif_receive_skb_core:rcu_read_lock - too late free dev process packet for freed dev Fixes: 6e583ce5242f ("net: eliminate refcounting in backlog queue") Cc: Eric W. Biederman Cc: Stephen Hemminger Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/core/dev.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6dd126a69aa0..a8e4dd430285 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3774,8 +3774,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) pt_prev = NULL; - rcu_read_lock(); - another_round: skb->skb_iif = skb->dev->ifindex; @@ -3785,7 +3783,7 @@ another_round: skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) - goto unlock; + goto out; } #ifdef CONFIG_NET_CLS_ACT @@ -3815,10 +3813,10 @@ skip_taps: if (static_key_false(&ingress_needed)) { skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto unlock; + goto out; if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) - goto unlock; + goto out; } #endif #ifdef CONFIG_NET_CLS_ACT @@ -3836,7 +3834,7 @@ ncls: if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) - goto unlock; + goto out; } rx_handler = rcu_dereference(skb->dev->rx_handler); @@ -3848,7 +3846,7 @@ ncls: switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: ret = NET_RX_SUCCESS; - goto unlock; + goto out; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3902,8 +3900,7 @@ drop: ret = NET_RX_DROP; } -unlock: - rcu_read_unlock(); +out: return ret; } @@ -3934,29 +3931,30 @@ static int __netif_receive_skb(struct sk_buff *skb) static int netif_receive_skb_internal(struct sk_buff *skb) { + int ret; + net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; + rcu_read_lock(); + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; - int cpu, ret; - - rcu_read_lock(); - - cpu = get_rps_cpu(skb->dev, skb, &rflow); + int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) { ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); return ret; } - rcu_read_unlock(); } #endif - return __netif_receive_skb(skb); + ret = __netif_receive_skb(skb); + rcu_read_unlock(); + return ret; } /** @@ -4501,8 +4499,10 @@ static int process_backlog(struct napi_struct *napi, int quota) struct sk_buff *skb; while ((skb = __skb_dequeue(&sd->process_queue))) { + rcu_read_lock(); local_irq_enable(); __netif_receive_skb(skb); + rcu_read_unlock(); local_irq_disable(); input_queue_head_incr(sd); if (++work >= quota) { From 51ed7f3e7d33824820837ad784801973f147c51a Mon Sep 17 00:00:00 2001 From: Satish Ashok Date: Thu, 9 Jul 2015 04:12:45 -0700 Subject: [PATCH 098/170] bridge: mdb: allow the user to delete mdb entry if there's a querier Until now when a querier was present static entries couldn't be deleted. Fix this and allow the user to manipulate the mdb with or without a querier. Signed-off-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 60868c212679..c11cf2611db0 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -423,19 +423,12 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; - if (ip.proto == htons(ETH_P_IP)) { - if (timer_pending(&br->ip4_other_query.timer)) - return -EBUSY; - + if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - } else { - if (timer_pending(&br->ip6_other_query.timer)) - return -EBUSY; - + else ip.u.ip6 = entry->addr.u.ip6; #endif - } spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); From 8220ea23243178c16b87fc3ccadf071647b64e04 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 10 Jul 2015 11:39:57 +0200 Subject: [PATCH 099/170] net: inet_diag: always export IPV6_V6ONLY sockopt for listening sockets Reconsidering my commit 20462155 "net: inet_diag: export IPV6_V6ONLY sockopt", I am not happy with the limitations it causes for socket analysing code in userspace. Exporting the value only if it is set makes it hard for userspace to decide whether the option is not set or the kernel does not support exporting the option at all. >From an auditor's perspective, the interesting question for listening AF_INET6 sockets is: "Does it NOT have IPV6_V6ONLY set?" Because it is the unexpected case. This patch allows to answer this question reliably. Signed-off-by: Phil Sutter Cc: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 9bc26677058e..c3b1f3a0f4cf 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -152,8 +152,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, inet6_sk(sk)->tclass) < 0) goto errout; - if (ipv6_only_sock(sk) && - nla_put_u8(skb, INET_DIAG_SKV6ONLY, 1)) + if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && + nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) goto errout; } #endif From 145c37084e3e6584b95f82361922965cc3af41bf Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Fri, 10 Jul 2015 21:20:28 +0900 Subject: [PATCH 100/170] Doc: z8530book: Fix typo in API-z8530-sync-txdma-open.html This patch fix a spelling typo found in API-z8530-sync-txdma-open.html. It is because this file was generated from comment in source, I have to fix comment in source. Signed-off-by: Masanari Iida Signed-off-by: David S. Miller --- drivers/net/wan/z85230.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c index feacc3b994b7..2f0bd6955f33 100644 --- a/drivers/net/wan/z85230.c +++ b/drivers/net/wan/z85230.c @@ -1044,7 +1044,7 @@ EXPORT_SYMBOL(z8530_sync_dma_close); * @dev: The network device to attach * @c: The Z8530 channel to configure in sync DMA mode. * - * Set up a Z85x30 device for synchronous DMA tranmission. One + * Set up a Z85x30 device for synchronous DMA transmission. One * ISA DMA channel must be available for this to work. The receive * side is run in PIO mode, but then it has the bigger FIFO. */ From 77b5a08427e87514c33730afc18cd02c9475e2c3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Mar 2015 08:37:46 -0700 Subject: [PATCH 101/170] bcache: don't embed 'return' statements in closure macros This is horribly confusing, it breaks the flow of the code without it being apparent in the caller. Signed-off-by: Jens Axboe Acked-by: Christoph Hellwig --- drivers/md/bcache/closure.h | 3 --- drivers/md/bcache/io.c | 1 + drivers/md/bcache/journal.c | 2 ++ drivers/md/bcache/request.c | 14 +++++++++++--- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index a08e3eeac3c5..79a6d63e8ed3 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -320,7 +320,6 @@ static inline void closure_wake_up(struct closure_waitlist *list) do { \ set_closure_fn(_cl, _fn, _wq); \ closure_sub(_cl, CLOSURE_RUNNING + 1); \ - return; \ } while (0) /** @@ -349,7 +348,6 @@ do { \ do { \ set_closure_fn(_cl, _fn, _wq); \ closure_queue(_cl); \ - return; \ } while (0) /** @@ -365,7 +363,6 @@ do { \ do { \ set_closure_fn(_cl, _destructor, NULL); \ closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ - return; \ } while (0) /** diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index cb64e64a4789..bf6a9ca18403 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -105,6 +105,7 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) } while (n != bio); continue_at(&s->cl, bch_bio_submit_split_done, NULL); + return; submit: generic_make_request(bio); } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index ce64fc851251..418607a6ba33 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -592,12 +592,14 @@ static void journal_write_unlocked(struct closure *cl) if (!w->need_write) { closure_return_with_destructor(cl, journal_write_unlock); + return; } else if (journal_full(&c->journal)) { journal_reclaim(c); spin_unlock(&c->journal.lock); btree_flush_write(c); continue_at(cl, journal_write, system_wq); + return; } c->journal.blocks_free -= set_blocks(w->data, block_bytes(c)); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 4afb2d26b148..f292790997d7 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -88,8 +88,10 @@ static void bch_data_insert_keys(struct closure *cl) if (journal_ref) atomic_dec_bug(journal_ref); - if (!op->insert_data_done) + if (!op->insert_data_done) { continue_at(cl, bch_data_insert_start, op->wq); + return; + } bch_keylist_free(&op->insert_keys); closure_return(cl); @@ -216,8 +218,10 @@ static void bch_data_insert_start(struct closure *cl) /* 1 for the device pointer and 1 for the chksum */ if (bch_keylist_realloc(&op->insert_keys, 3 + (op->csum ? 1 : 0), - op->c)) + op->c)) { continue_at(cl, bch_data_insert_keys, op->wq); + return; + } k = op->insert_keys.top; bkey_init(k); @@ -255,6 +259,7 @@ static void bch_data_insert_start(struct closure *cl) op->insert_data_done = true; continue_at(cl, bch_data_insert_keys, op->wq); + return; err: /* bch_alloc_sectors() blocks if s->writeback = true */ BUG_ON(op->writeback); @@ -576,8 +581,10 @@ static void cache_lookup(struct closure *cl) ret = bch_btree_map_keys(&s->op, s->iop.c, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0), cache_lookup_fn, MAP_END_KEY); - if (ret == -EAGAIN) + if (ret == -EAGAIN) { continue_at(cl, cache_lookup, bcache_wq); + return; + } closure_return(cl); } @@ -1085,6 +1092,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) continue_at_nobarrier(&s->cl, flash_dev_nodata, bcache_wq); + return; } else if (rw) { bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &KEY(d->id, bio->bi_iter.bi_sector, 0), From 22401ff17f993bda9acd14ac39213e6f9bb985d0 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Sat, 11 Jul 2015 17:30:01 +0200 Subject: [PATCH 102/170] cdc_ncm: update specs URL Update referenced specs link to reflect actual file version and location. Signed-off-by: Enrico Mioso Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 1991e4a24657..db40175b1a0b 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -6,7 +6,7 @@ * Original author: Hans Petter Selasky * * USB Host Driver for Network Control Model (NCM) - * http://www.usb.org/developers/devclass_docs/NCM10.zip + * http://www.usb.org/developers/docs/devclass_docs/NCM10_012011.zip * * The NCM encoding, decoding and initialization logic * derives from FreeBSD 8.x. if_cdce.c and if_cdcereg.h From c590032f9ab8d03aab235a7601db22a267c2d958 Mon Sep 17 00:00:00 2001 From: Petri Gynther Date: Fri, 10 Jul 2015 16:20:00 -0700 Subject: [PATCH 103/170] net: bcmgenet: fix accounting of packet drops vs errors bcmgenet driver needs to separate packet drops from packet errors. When the driver has to drop a *good* packet, due to lack of buffers or replacement skbs, increment only dev->stats.[rx|tx]_dropped. When the driver encounters a bad Rx packet or Tx error, increment only dev->stats.[rx|tx]_errors + relevant detailed error counter. Signed-off-by: Petri Gynther Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b43b2cb9b830..64c1e9db6b0b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1230,7 +1230,6 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, new_skb = skb_realloc_headroom(skb, sizeof(*status)); dev_kfree_skb(skb); if (!new_skb) { - dev->stats.tx_errors++; dev->stats.tx_dropped++; return NULL; } @@ -1465,7 +1464,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, if (unlikely(!skb)) { dev->stats.rx_dropped++; - dev->stats.rx_errors++; goto next; } @@ -1493,7 +1491,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, if (unlikely(!(dma_flag & DMA_EOP) || !(dma_flag & DMA_SOP))) { netif_err(priv, rx_status, dev, "dropping fragmented packet!\n"); - dev->stats.rx_dropped++; dev->stats.rx_errors++; dev_kfree_skb_any(skb); goto next; @@ -1515,7 +1512,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, dev->stats.rx_frame_errors++; if (dma_flag & DMA_RX_LG) dev->stats.rx_length_errors++; - dev->stats.rx_dropped++; dev->stats.rx_errors++; dev_kfree_skb_any(skb); goto next; From 2ee94014d9bd3868b1c0d17405f96d63bec83f28 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 10 Jul 2015 19:48:58 -0400 Subject: [PATCH 104/170] net: switchdev: don't abort unsupported operations There is no need to abort attribute setting or object addition, if the prepare phase returned operation not supported. Thus, abort these two transactions only if the error is not -EOPNOTSUPP. Signed-off-by: Vivien Didelot Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 84f77a054025..9f2add3cba26 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -171,8 +171,10 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) * released. */ - attr->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_attr_set(dev, attr); + if (err != -EOPNOTSUPP) { + attr->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_attr_set(dev, attr); + } return err; } @@ -249,8 +251,10 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) * released. */ - obj->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_obj_add(dev, obj); + if (err != -EOPNOTSUPP) { + obj->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_obj_add(dev, obj); + } return err; } From 8f5063e97f393d49611151d3cf7dcbeb41397f12 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 11 Jul 2015 18:02:10 -0700 Subject: [PATCH 105/170] net: dsa: Test array index before use port_index is used an index into an array, and this information comes from Device Tree, make sure that port_index is not equal to the array size before using it. Move the check against port_index earlier in the loop. Fixes: 5e95329b701c: ("dsa: add device tree bindings to register DSA switches") Reported-by: Dan Carpenter Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 392e29a0227d..52beeb8829dc 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -642,6 +642,8 @@ static int dsa_of_probe(struct device *dev) continue; port_index = be32_to_cpup(port_reg); + if (port_index >= DSA_MAX_PORTS) + break; port_name = of_get_property(port, "label", NULL); if (!port_name) @@ -666,8 +668,6 @@ static int dsa_of_probe(struct device *dev) goto out_free_chip; } - if (port_index == DSA_MAX_PORTS) - break; } } From c8cf89f73f3d9ecbdea479778f0ac714be79be33 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 11 Jul 2015 18:02:11 -0700 Subject: [PATCH 106/170] net: dsa: Fix off-by-one in switch address parsing cd->sw_addr is used as a MDIO bus address, which cannot exceed PHY_MAX_ADDR (32), our check was off-by-one. Fixes: 5e95329b701c ("dsa: add device tree bindings to register DSA switches") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 52beeb8829dc..b445d492c115 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -630,7 +630,7 @@ static int dsa_of_probe(struct device *dev) continue; cd->sw_addr = be32_to_cpup(sw_addr); - if (cd->sw_addr > PHY_MAX_ADDR) + if (cd->sw_addr >= PHY_MAX_ADDR) continue; if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) From 5e63e6baa159fa8c787cf783dbf3d77fbea97331 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:32:46 +0300 Subject: [PATCH 107/170] can: rcar_can: fix IRQ check rcar_can_probe() regards 0 as a wrong IRQ #, despite platform_get_irq() that it calls returns negative error code in that case. This leads to the following being printed to the console when attempting to open the device: error requesting interrupt fffffffa because rcar_can_open() calls request_irq() with a negative IRQ #, and that function naturally fails with -EINVAL. Check for the negative error codes instead and propagate them upstream instead of just returning -ENODEV. Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver") Signed-off-by: Sergei Shtylyov Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 7deb80dcbe8c..93017c09cfc3 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -758,8 +758,9 @@ static int rcar_can_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (!irq) { + if (irq < 0) { dev_err(&pdev->dev, "No IRQ resource\n"); + err = irq; goto fail; } From c1a4c87b06fa564d6e2760a12d4e5a09badc684b Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:33:53 +0300 Subject: [PATCH 108/170] can: rcar_can: print signed IRQ # Printing IRQ # using "%x" and "%u" unsigned formats isn't quite correct as 'ndev->irq' is of type *int*, so the "%d" format needs to be used instead. While fixing this, beautify the dev_info() message in rcar_can_probe() a bit. Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver") Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 93017c09cfc3..2f9ebad4ff56 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -526,7 +526,7 @@ static int rcar_can_open(struct net_device *ndev) napi_enable(&priv->napi); err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev); if (err) { - netdev_err(ndev, "error requesting interrupt %x\n", ndev->irq); + netdev_err(ndev, "error requesting interrupt %d\n", ndev->irq); goto out_close; } can_led_event(ndev, CAN_LED_EVENT_OPEN); @@ -824,7 +824,7 @@ static int rcar_can_probe(struct platform_device *pdev) devm_can_led_init(ndev); - dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n", + dev_info(&pdev->dev, "device registered (regs @ %p, IRQ%d)\n", priv->regs, ndev->irq); return 0; From 3255f68c132a8ed784b9fe1804ef4642a1d16b9a Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:34:55 +0300 Subject: [PATCH 109/170] can: rcar_can: fix typo in error message Fix typo in the first error message printed by rcar_can_open(). Based on the original patch by Vladimir Barinov. Fixes: 862e2b6af941 ("can: rcar_can: support all input clocks") Reported-by: Vladimir Barinov Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 2f9ebad4ff56..310a0cd20679 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -508,7 +508,8 @@ static int rcar_can_open(struct net_device *ndev) err = clk_prepare_enable(priv->clk); if (err) { - netdev_err(ndev, "failed to enable periperal clock, error %d\n", + netdev_err(ndev, + "failed to enable peripheral clock, error %d\n", err); goto out; } From ae185f19662565f5320e49247faab8752a977642 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:50:35 +0300 Subject: [PATCH 110/170] can: rcar_can: print request_irq() error code Also print the error code when the request_irq() call fails in rcar_can_open(), rewording the error message... Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 310a0cd20679..5e81afffc073 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -527,7 +527,8 @@ static int rcar_can_open(struct net_device *ndev) napi_enable(&priv->napi); err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev); if (err) { - netdev_err(ndev, "error requesting interrupt %d\n", ndev->irq); + netdev_err(ndev, "request_irq(%d) failed, error %d\n", + ndev->irq, err); goto out_close; } can_led_event(ndev, CAN_LED_EVENT_OPEN); From 585bc2ac4cc0e2398f5b128018247f51fa3e0e12 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 20 Jun 2015 03:51:34 +0300 Subject: [PATCH 111/170] can: rcar_can: unify error messages All the error messages in the driver but the ones from devm_clk_get() failures use similar format. Make those two messages consitent with others. Signed-off-by: Sergei Shtylyov Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 5e81afffc073..7bd54191f962 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -785,7 +785,8 @@ static int rcar_can_probe(struct platform_device *pdev) priv->clk = devm_clk_get(&pdev->dev, "clkp1"); if (IS_ERR(priv->clk)) { err = PTR_ERR(priv->clk); - dev_err(&pdev->dev, "cannot get peripheral clock: %d\n", err); + dev_err(&pdev->dev, "cannot get peripheral clock, error %d\n", + err); goto fail_clk; } @@ -797,7 +798,7 @@ static int rcar_can_probe(struct platform_device *pdev) priv->can_clk = devm_clk_get(&pdev->dev, clock_names[clock_select]); if (IS_ERR(priv->can_clk)) { err = PTR_ERR(priv->can_clk); - dev_err(&pdev->dev, "cannot get CAN clock: %d\n", err); + dev_err(&pdev->dev, "cannot get CAN clock, error %d\n", err); goto fail_clk; } From 033365191136c97f88c81b7bd0011414db28bb4e Mon Sep 17 00:00:00 2001 From: "J.D. Schroeder" Date: Wed, 8 Jul 2015 14:38:12 +0300 Subject: [PATCH 112/170] can: c_can: Fix default pinmux glitch at init The previous change 3973c526ae9c (net: can: c_can: Disable pins when CAN interface is down) causes a slight glitch on the pinctrl settings when used. Since commit ab78029 (drivers/pinctrl: grab default handles from device core), the device core will automatically set the default pins. This causes the pins to be momentarily set to the default and then to the sleep state in register_c_can_dev(). By adding an optional "enable" state, boards can set the default pin state to be disabled and avoid the glitch when the switch from default to sleep first occurs. If the "enable" state is not available c_can_pinctrl_select_state() falls back to using the "default" pinctrl state. [Roger Q] - Forward port to v4.2 and use pinctrl_get_select(). Signed-off-by: J.D. Schroeder Signed-off-by: Roger Quadros Reviewed-by: Grygorii Strashko Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 041525d2595c..5d214d135332 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -592,6 +592,7 @@ static int c_can_start(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); int err; + struct pinctrl *p; /* basic c_can configuration */ err = c_can_chip_config(dev); @@ -604,8 +605,13 @@ static int c_can_start(struct net_device *dev) priv->can.state = CAN_STATE_ERROR_ACTIVE; - /* activate pins */ - pinctrl_pm_select_default_state(dev->dev.parent); + /* Attempt to use "active" if available else use "default" */ + p = pinctrl_get_select(priv->device, "active"); + if (!IS_ERR(p)) + pinctrl_put(p); + else + pinctrl_pm_select_default_state(priv->device); + return 0; } From 2acb5c301edf39ab6d066687ce70da1166e4de9e Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 7 Jul 2015 17:27:57 +0300 Subject: [PATCH 113/170] ARM: dts: dra7x-evm: Prevent glitch on DCAN1 pinmux Driver core sets "default" pinmux on on probe and CAN driver sets "sleep" pinmux during register. This causes a small window where the CAN pins are in "default" state with the DCAN module being disabled. Change the "default" state to be like sleep so this glitch is avoided. Add a new "active" state that is used by the driver when CAN is actually active. Signed-off-by: Roger Quadros Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- arch/arm/boot/dts/dra7-evm.dts | 5 +++-- arch/arm/boot/dts/dra72-evm.dts | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index aa465904f6cc..096f68be99e2 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -686,7 +686,8 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts index 4e1b60581782..803738414086 100644 --- a/arch/arm/boot/dts/dra72-evm.dts +++ b/arch/arm/boot/dts/dra72-evm.dts @@ -587,9 +587,10 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; &qspi { From d3b58c47d330de8c29898fe9746f7530408f8a59 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 26 Jun 2015 11:58:19 +0200 Subject: [PATCH 114/170] can: replace timestamp as unique skb attribute Commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. Without timestamping to be required by user space applications this timestamp was not generated which lead to commit 36c01245eb8 "can: fix loss of CAN frames in raw_rcv" - which forces the timestamp to be set in all CAN related skbuffs by introducing several __net_timestamp() calls. This forces e.g. out of tree drivers which are not using alloc_can{,fd}_skb() to add __net_timestamp() after skbuff creation to prevent the frame loss fixed in mainline Linux. This patch removes the timestamp dependency and uses an atomic counter to create an unique identifier together with the skbuff pointer. Btw: the new skbcnt element introduced in struct can_skb_priv has to be initialized with zero in out-of-tree drivers which are not using alloc_can{,fd}_skb() too. Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 7 ++----- drivers/net/can/slcan.c | 2 +- drivers/net/can/vcan.c | 3 --- include/linux/can/skb.h | 2 ++ net/can/af_can.c | 12 +++++++----- net/can/bcm.c | 2 ++ net/can/raw.c | 7 ++++--- 7 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index e9b1810d319f..aede704605c6 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -440,9 +440,6 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; - if (!(skb->tstamp.tv64)) - __net_timestamp(skb); - netif_rx(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; @@ -578,7 +575,6 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) if (unlikely(!skb)) return NULL; - __net_timestamp(skb); skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -589,6 +585,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); memset(*cf, 0, sizeof(struct can_frame)); @@ -607,7 +604,6 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, if (unlikely(!skb)) return NULL; - __net_timestamp(skb); skb->protocol = htons(ETH_P_CANFD); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -618,6 +614,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cfd = (struct canfd_frame *)skb_put(skb, sizeof(struct canfd_frame)); memset(*cfd, 0, sizeof(struct canfd_frame)); diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index f64f5290d6f8..a23a7af8eb9a 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -207,7 +207,6 @@ static void slc_bump(struct slcan *sl) if (!skb) return; - __net_timestamp(skb); skb->dev = sl->dev; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; @@ -215,6 +214,7 @@ static void slc_bump(struct slcan *sl) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = sl->dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, sizeof(struct can_frame)), &cf, sizeof(struct can_frame)); diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 0ce868de855d..674f367087c5 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -78,9 +78,6 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb->ip_summed = CHECKSUM_UNNECESSARY; - if (!(skb->tstamp.tv64)) - __net_timestamp(skb); - netif_rx_ni(skb); } diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index b6a52a4b457a..51bb6532785c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -27,10 +27,12 @@ /** * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on + * @skbcnt: atomic counter to have an unique id together with skb pointer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; + int skbcnt; struct can_frame cf[0]; }; diff --git a/net/can/af_can.c b/net/can/af_can.c index 7933e62a7318..166d436196c1 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -89,6 +89,8 @@ struct timer_list can_stattimer; /* timer for statistics update */ struct s_stats can_stats; /* packet statistics */ struct s_pstats can_pstats; /* receive list statistics */ +static atomic_t skbcounter = ATOMIC_INIT(0); + /* * af_can socket functions */ @@ -310,12 +312,8 @@ int can_send(struct sk_buff *skb, int loop) return err; } - if (newskb) { - if (!(newskb->tstamp.tv64)) - __net_timestamp(newskb); - + if (newskb) netif_rx_ni(newskb); - } /* update statistics */ can_stats.tx_frames++; @@ -683,6 +681,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) can_stats.rx_frames++; can_stats.rx_frames_delta++; + /* create non-zero unique skb identifier together with *skb */ + while (!(can_skb_prv(skb)->skbcnt)) + can_skb_prv(skb)->skbcnt = atomic_inc_return(&skbcounter); + rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ diff --git a/net/can/bcm.c b/net/can/bcm.c index b523453585be..a1ba6875c2a2 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -261,6 +261,7 @@ static void bcm_can_tx(struct bcm_op *op) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); @@ -1217,6 +1218,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) } can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/raw.c b/net/can/raw.c index 31b9748cbb4e..2e67b1423cd3 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -75,7 +75,7 @@ MODULE_ALIAS("can-proto-1"); */ struct uniqframe { - ktime_t tstamp; + int skbcnt; const struct sk_buff *skb; unsigned int join_rx_count; }; @@ -133,7 +133,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && - ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { + this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { if (ro->join_filters) { this_cpu_inc(ro->uniq->join_rx_count); /* drop frame until all enabled filters matched */ @@ -144,7 +144,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) } } else { this_cpu_ptr(ro->uniq)->skb = oskb; - this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; + this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; this_cpu_ptr(ro->uniq)->join_rx_count = 1; /* drop first frame to check all enabled filters? */ if (ro->join_filters && ro->count > 1) @@ -749,6 +749,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) From d9ee489619744ee5ac246b8fb3dd65bb078d2f0a Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 14:21:21 +1000 Subject: [PATCH 115/170] m68knommu: force setting of CONFIG_CLOCK_FREQ for ColdFire It is possible to disable the clock selection at configuration time, but for ColdFire targets we always expect a clock frequency to be selected. This results in the following compile time error: CC arch/m68k/kernel/asm-offsets.s In file included from ./arch/m68k/include/asm/timex.h:14:0, from include/linux/timex.h:65, from include/linux/sched.h:19, from arch/m68k/kernel/asm-offsets.c:14: ./arch/m68k/include/asm/coldfire.h:25:2: error: #error "Don't know what your ColdFire CPU clock frequency is??" Remove CONFIG_CLOCK_SELECT completely and always enable CONFIG_CLOCK_FREQ for ColdFire. Signed-off-by: Greg Ungerer Acked-by: Geert Uytterhoeven --- arch/m68k/Kconfig.cpu | 14 +------------- arch/m68k/include/asm/coldfire.h | 2 +- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 33013dfcd3e1..7da594994765 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -416,22 +416,10 @@ config HAVE_MBAR config HAVE_IPSBAR bool -config CLOCK_SET - bool "Enable setting the CPU clock frequency" - depends on COLDFIRE - default n - help - On some CPU's you do not need to know what the core CPU clock - frequency is. On these you can disable clock setting. On some - traditional 68K parts, and on all ColdFire parts you need to set - the appropriate CPU clock frequency. On these devices many of the - onboard peripherals derive their timing from the master CPU clock - frequency. - config CLOCK_FREQ int "Set the core clock frequency" default "66666666" - depends on CLOCK_SET + depends on COLDFIRE help Define the CPU clock frequency in use. This is the core clock frequency, it may or may not be the same as the external clock diff --git a/arch/m68k/include/asm/coldfire.h b/arch/m68k/include/asm/coldfire.h index c94557b91448..50aa4dac9ca2 100644 --- a/arch/m68k/include/asm/coldfire.h +++ b/arch/m68k/include/asm/coldfire.h @@ -19,7 +19,7 @@ * in any case new boards come along from time to time that have yet * another different clocking frequency. */ -#ifdef CONFIG_CLOCK_SET +#ifdef CONFIG_CLOCK_FREQ #define MCF_CLK CONFIG_CLOCK_FREQ #else #error "Don't know what your ColdFire CPU clock frequency is??" From 15c2ca4e98a3cd265b17db126e2e6b752f99014d Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 15:01:53 +1000 Subject: [PATCH 116/170] m68knommu: improve the clock configuration defaults Create some intelligent default settings for each ColdFire SoC type in the configuration entry for CONFIG_CLOCK_FREQ. The ColdFire clock frequency is configurable at build time. There is a lot of variation in the frequency of operation on specific ColdFire based boards. But we can choose a default that matches the maximum frequency of clock operation for a particular ColdFire part. That is typically the most common clock setting. Signed-off-by: Greg Ungerer Acked-by: Geert Uytterhoeven --- arch/m68k/Kconfig.cpu | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 7da594994765..62f590e41880 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -418,6 +418,14 @@ config HAVE_IPSBAR config CLOCK_FREQ int "Set the core clock frequency" + default "25000000" if M5206 + default "54000000" if M5206e + default "166666666" if M520x + default "140000000" if M5249 + default "150000000" if M527x || M523x + default "90000000" if M5307 + default "50000000" if M5407 + default "266000000" if M54xx default "66666666" depends on COLDFIRE help From fa95a1dd0819c9041a873b10a6d83b5134964154 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 15:44:02 +1000 Subject: [PATCH 117/170] m68knommu: make ColdFire SoC selection a choice It would be nice if we could support multiple ColdFire SoC types in a single binary - but currently the code simply does not support it. Change the SoC selection config options to be a choice instead of individual selectable entries. This fixes problems with building allnoconfig, and means that a sane linux kernel is generated for a single ColdFire SoC type. Signed-off-by: Greg Ungerer Acked-by: Geert Uytterhoeven --- arch/m68k/Kconfig.cpu | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 62f590e41880..c496d48a8c8d 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -125,6 +125,13 @@ endif # M68KCLASSIC if COLDFIRE +choice + prompt "ColdFire SoC type" + default M520x + help + Select the type of ColdFire System-on-Chip (SoC) that you want + to build for. + config M5206 bool "MCF5206" depends on !MMU @@ -174,9 +181,6 @@ config M525x help Freescale (Motorola) Coldfire 5251/5253 processor support. -config M527x - bool - config M5271 bool "MCF5271" depends on !MMU @@ -223,9 +227,6 @@ config M5307 help Motorola ColdFire 5307 processor support. -config M53xx - bool - config M532x bool "MCF532x" depends on !MMU @@ -251,9 +252,6 @@ config M5407 help Motorola ColdFire 5407 processor support. -config M54xx - bool - config M547x bool "MCF547x" select M54xx @@ -280,6 +278,17 @@ config M5441x help Freescale Coldfire 54410/54415/54416/54417/54418 processor support. +endchoice + +config M527x + bool + +config M53xx + bool + +config M54xx + bool + endif # COLDFIRE From bfd302acc57ceed1b4d37926c8bbe3cb9d9f1098 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:27:29 +1000 Subject: [PATCH 118/170] m68knommu: update defconfig for m5208evb No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer --- arch/m68k/configs/m5208evb_defconfig | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/arch/m68k/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig index e7292f460af4..4c7b7938d53a 100644 --- a/arch/m68k/configs/m5208evb_defconfig +++ b/arch/m68k/configs/m5208evb_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,17 +12,12 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_M520x=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=166666666 -CONFIG_CLOCK_DIV=2 -CONFIG_M5208EVB=y +# CONFIG_MMU is not set # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x40000000 CONFIG_RAMSIZE=0x2000000 CONFIG_VECTORBASE=0x40000000 CONFIG_KERNELBASE=0x40020000 -CONFIG_RAM16BIT=y CONFIG_BINFMT_FLAT=y CONFIG_NET=y CONFIG_PACKET=y @@ -40,24 +31,19 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_FEC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_BAUDRATE=115200 CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set @@ -68,8 +54,6 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_FULLDEBUG=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" +CONFIG_FULLDEBUG=y From 0f28b05a4b127dd786d589a44d49ed6ade9a66fe Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:29:17 +1000 Subject: [PATCH 119/170] m68knommu: update defconfig for ColdFire m5249evb No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer --- arch/m68k/configs/m5249evb_defconfig | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/arch/m68k/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig index 0cd4b39f325b..a782f368650f 100644 --- a/arch/m68k/configs/m5249evb_defconfig +++ b/arch/m68k/configs/m5249evb_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,10 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5249=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=140000000 -CONFIG_CLOCK_DIV=2 CONFIG_M5249C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00800000 @@ -38,23 +32,18 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y @@ -62,7 +51,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set From 2e27f44383fb64db65eeeba6b25f9cc986284103 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:30:58 +1000 Subject: [PATCH 120/170] m68knommu: update defconfig for ColdFire m5272c3 No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer --- arch/m68k/configs/m5272c3_defconfig | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/m68k/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig index a60cb3509135..6f5fb92f5cbf 100644 --- a/arch/m68k/configs/m5272c3_defconfig +++ b/arch/m68k/configs/m5272c3_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,8 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5272=y -CONFIG_CLOCK_SET=y CONFIG_M5272C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00800000 @@ -36,23 +32,18 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_FEC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y @@ -61,6 +52,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" From 6845f6e10290877a820000c3647129837ef93c1b Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:31:30 +1000 Subject: [PATCH 121/170] m68knommu: update defconfig for ColdFire m5275evb No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer --- arch/m68k/configs/m5275evb_defconfig | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/arch/m68k/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig index e6502ab7cb2f..b5d7cd1ce856 100644 --- a/arch/m68k/configs/m5275evb_defconfig +++ b/arch/m68k/configs/m5275evb_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,11 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5275=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=150000000 -CONFIG_CLOCK_DIV=2 -CONFIG_M5275EVB=y # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00000000 @@ -39,24 +32,19 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_FEC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y @@ -65,8 +53,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set From 59c024b742f4b9ef9be08baebabf15ca7e843b5d Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:31:56 +1000 Subject: [PATCH 122/170] m68knommu: update defconfig for ColdFire m5307c3 No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer --- arch/m68k/configs/m5307c3_defconfig | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/arch/m68k/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig index 023812abd2e6..1b4c09461c40 100644 --- a/arch/m68k/configs/m5307c3_defconfig +++ b/arch/m68k/configs/m5307c3_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,10 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5307=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=90000000 -CONFIG_CLOCK_DIV=2 CONFIG_M5307C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00800000 @@ -38,16 +32,11 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y CONFIG_SLIP=y CONFIG_SLIP_COMPRESSED=y @@ -56,21 +45,17 @@ CONFIG_SLIP_COMPRESSED=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y # CONFIG_DNOTIFY is not set CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_FULLDEBUG=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set +CONFIG_FULLDEBUG=y From fee539223f72c0f969711673fd1ddf613a522d6e Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:33:08 +1000 Subject: [PATCH 123/170] m68knommu: update defconfig for ColdFire m5407c3 No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer --- arch/m68k/configs/m5407c3_defconfig | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/arch/m68k/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig index 557b39f3be90..275ad543d4bc 100644 --- a/arch/m68k/configs/m5407c3_defconfig +++ b/arch/m68k/configs/m5407c3_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -17,9 +13,8 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5407=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=50000000 CONFIG_M5407C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00000000 @@ -38,22 +33,17 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y # CONFIG_INPUT is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set @@ -63,8 +53,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set From 8700f09495dec922600836400d5e26a22f50bf19 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 7 Jul 2015 16:34:34 +1000 Subject: [PATCH 124/170] m68knommu: update defconfig for ColdFire m5475evb No change to active configuration settings, updated to match current Kconfigs only. Signed-off-by: Greg Ungerer --- arch/m68k/configs/m5475evb_defconfig | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig index c5018a68819b..78665b5bc2a9 100644 --- a/arch/m68k/configs/m5475evb_defconfig +++ b/arch/m68k/configs/m5475evb_defconfig @@ -1,11 +1,7 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_SYSCTL_SYSCALL=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -20,9 +16,6 @@ CONFIG_MODULES=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_COLDFIRE=y -CONFIG_M547x=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=266000000 # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x0 CONFIG_RAMSIZE=0x2000000 @@ -32,7 +25,6 @@ CONFIG_KERNELBASE=0x20000 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y From 03aa29f80e371c75370e83eded7b551951385122 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 8 Jul 2015 16:54:13 +1000 Subject: [PATCH 125/170] m68k: fix io functions for ColdFire/MMU/PCI case The inb/outb/... family of IO methods end up being multiply defined when building PCI support for the ColdFire. Compiling gives this: CC init/main.o In file included from ./arch/m68k/include/asm/io.h:4:0, from include/linux/bio.h:30, from include/linux/blkdev.h:18, from init/main.c:75: ./arch/m68k/include/asm/io_mm.h:420:0: warning: "inb" redefined ./arch/m68k/include/asm/io_mm.h:108:0: note: this is the location of the previous definition ... The ColdFire/PCI case defines its own IO access methods, so no others should be defined or used in this case. Conditionally disable other definitions that clash with it. Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/io_mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h index 618c85d3c786..f55cad529400 100644 --- a/arch/m68k/include/asm/io_mm.h +++ b/arch/m68k/include/asm/io_mm.h @@ -413,7 +413,8 @@ static inline void isa_delay(void) #define writew(val, addr) out_le16((addr), (val)) #endif /* CONFIG_ATARI_ROM_ISA */ -#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) +#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) && \ + !(defined(CONFIG_PCI) && defined(CONFIG_COLDFIRE)) /* * We need to define dummy functions for GENERIC_IOMAP support. */ From 67592f699cb309559575b89d727d510ea076521c Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 8 Jul 2015 17:02:08 +1000 Subject: [PATCH 126/170] m68k: enable PCI support for m5475evb defconfig The ColdFire M5475 on the m5475evb board supports a PCI bus, lets enable it for the defconfig to get better build and test coverage. Signed-off-by: Greg Ungerer --- arch/m68k/configs/m5475evb_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig index 78665b5bc2a9..4f4ccd13c11b 100644 --- a/arch/m68k/configs/m5475evb_defconfig +++ b/arch/m68k/configs/m5475evb_defconfig @@ -22,6 +22,7 @@ CONFIG_RAMSIZE=0x2000000 CONFIG_VECTORBASE=0x0 CONFIG_MBAR=0xff000000 CONFIG_KERNELBASE=0x20000 +CONFIG_PCI=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y From f51e2f1911122879eefefa4c592dea8bf794b39c Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 13 Jul 2015 10:25:17 +0300 Subject: [PATCH 127/170] ARC: make sure instruction_pointer() returns unsigned value Currently instruction_pointer() returns pt_regs->ret and so return value is of type "long", which implicitly stands for "signed long". While that's perfectly fine when dealing with 32-bit values if return value of instruction_pointer() gets assigned to 64-bit variable sign extension may happen. And at least in one real use-case it happens already. In perf_prepare_sample() return value of perf_instruction_pointer() (which is an alias to instruction_pointer() in case of ARC) is assigned to (struct perf_sample_data)->ip (which type is "u64"). And what we see if instuction pointer points to user-space application that in case of ARC lays below 0x8000_0000 "ip" gets set properly with leading 32 zeros. But if instruction pointer points to kernel address space that starts from 0x8000_0000 then "ip" is set with 32 leadig "f"-s. I.e. id instruction_pointer() returns 0x8100_0000, "ip" will be assigned with 0xffff_ffff__8100_0000. Which is obviously wrong. In particular that issuse broke output of perf, because perf was unable to associate addresses like 0xffff_ffff__8100_0000 with anything from /proc/kallsyms. That's what we used to see: ----------->8---------- 6.27% ls [unknown] [k] 0xffffffff8046c5cc 2.96% ls libuClibc-0.9.34-git.so [.] memcpy 2.25% ls libuClibc-0.9.34-git.so [.] memset 1.66% ls [unknown] [k] 0xffffffff80666536 1.54% ls libuClibc-0.9.34-git.so [.] 0x000224d6 1.18% ls libuClibc-0.9.34-git.so [.] 0x00022472 ----------->8---------- With that change perf output looks much better now: ----------->8---------- 8.21% ls [kernel.kallsyms] [k] memset 3.52% ls libuClibc-0.9.34-git.so [.] memcpy 2.11% ls libuClibc-0.9.34-git.so [.] malloc 1.88% ls libuClibc-0.9.34-git.so [.] memset 1.64% ls [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 1.41% ls [kernel.kallsyms] [k] __d_lookup_rcu ----------->8---------- Signed-off-by: Alexey Brodkin Cc: arc-linux-dev@synopsys.com Cc: stable@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 91755972b9a2..91694ec1ce95 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -106,7 +106,7 @@ struct callee_regs { long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; -#define instruction_pointer(regs) ((regs)->ret) +#define instruction_pointer(regs) (unsigned long)((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) /* return 1 if user mode or 0 if kernel mode */ From 624b71ee20acba269e348eb6bdd516d47b9d30fa Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sun, 12 Jul 2015 13:16:50 +0530 Subject: [PATCH 128/170] ARCv2: support HS38 releases Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 08d98eede6e1..18cc01591c96 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -142,11 +142,15 @@ static void read_arc_build_cfg_regs(void) } static const struct cpuinfo_data arc_cpu_tbl[] = { +#ifdef CONFIG_ISA_ARCOMPACT { {0x20, "ARC 600" }, 0x2F}, { {0x30, "ARC 700" }, 0x33}, { {0x34, "ARC 700 R4.10"}, 0x34}, { {0x35, "ARC 700 R4.11"}, 0x35}, - { {0x50, "ARC HS38" }, 0x51}, +#else + { {0x50, "ARC HS38 R2.0"}, 0x51}, + { {0x52, "ARC HS38 R2.1"}, 0x52}, +#endif { {0x00, NULL } } }; From f81a49d13b3014c2b7c424628779a8af93f25c04 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Fri, 10 Jul 2015 10:47:09 +0200 Subject: [PATCH 129/170] s390/dasd: fix kernel panic when alias is set offline The dasd device driver selects which (alias or base) device is used for a given requests when the request is build. If the chosen alias device is set offline before the request gets queued to the device queue the starting function may use device structures that are already freed. This might lead to a hanging offline process or a kernel panic. Add a check to the starting function that returns the request to the upper layer if the device is already in offline processing. In addition to that prevent that an alias device that's already in offline processing gets chosen as start device. Reviewed-by: Sebastian Ott Reviewed-by: Peter Oberparleiter Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 36 ++++++++++++++++++++++++++------- drivers/s390/block/dasd_alias.c | 3 ++- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 1aec8ff0b587..f73d2f579a7e 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1862,6 +1862,33 @@ static void __dasd_device_check_expire(struct dasd_device *device) } } +/* + * return 1 when device is not eligible for IO + */ +static int __dasd_device_is_unusable(struct dasd_device *device, + struct dasd_ccw_req *cqr) +{ + int mask = ~(DASD_STOPPED_DC_WAIT | DASD_UNRESUMED_PM); + + if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) { + /* dasd is being set offline. */ + return 1; + } + if (device->stopped) { + if (device->stopped & mask) { + /* stopped and CQR will not change that. */ + return 1; + } + if (!test_bit(DASD_CQR_VERIFY_PATH, &cqr->flags)) { + /* CQR is not able to change device to + * operational. */ + return 1; + } + /* CQR required to get device operational. */ + } + return 0; +} + /* * Take a look at the first request on the ccw queue and check * if it needs to be started. @@ -1876,13 +1903,8 @@ static void __dasd_device_start_head(struct dasd_device *device) cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist); if (cqr->status != DASD_CQR_QUEUED) return; - /* when device is stopped, return request to previous layer - * exception: only the disconnect or unresumed bits are set and the - * cqr is a path verification request - */ - if (device->stopped && - !(!(device->stopped & ~(DASD_STOPPED_DC_WAIT | DASD_UNRESUMED_PM)) - && test_bit(DASD_CQR_VERIFY_PATH, &cqr->flags))) { + /* if device is not usable return request to upper layer */ + if (__dasd_device_is_unusable(device, cqr)) { cqr->intrc = -EAGAIN; cqr->status = DASD_CQR_CLEARED; dasd_schedule_device_bh(device); diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index a2597e683e79..ee3a6faae22a 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -699,7 +699,8 @@ struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device) struct dasd_device, alias_list); spin_unlock_irqrestore(&lcu->lock, flags); alias_priv = (struct dasd_eckd_private *) alias_device->private; - if ((alias_priv->count < private->count) && !alias_device->stopped) + if ((alias_priv->count < private->count) && !alias_device->stopped && + !test_bit(DASD_FLAG_OFFLINE, &alias_device->flags)) return alias_device; else return NULL; From e47994dd44bcb4a77b4152bd0eada585934703c0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Jul 2015 15:02:37 +0200 Subject: [PATCH 130/170] s390/process: fix sfpc inline assembly The sfpc inline assembly within execve_tail() may incorrectly set bits 28-31 of the sfpc instruction to a value which is not zero. These bits however are currently unused and therefore should be zero so we won't get surprised if these bits will be used in the future. Therefore remove the second operand from the inline assembly. Cc: Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index dc5edc29b73a..8f587d871b9f 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, asmlinkage void execve_tail(void) { current->thread.fp_regs.fpc = 0; - asm volatile("sfpc %0,%0" : : "d" (0)); + asm volatile("sfpc %0" : : "d" (0)); } /* From cad49cfc44a5160e3fa09b18e4e7f7cacd13f27d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Jul 2015 08:40:49 +0200 Subject: [PATCH 131/170] s390/nmi: fix vector register corruption If a machine check happens, the machine has the vector facility installed and the extended save area exists, the cpu will save vector register contents into the extended save area. This is regardless of control register 0 contents, which enables and disables the vector facility during runtime. On each machine check we should validate the vector registers. The current code however tries to validate the registers only if the running task is using vector registers in user space. However even the current code is broken and causes vector register corruption on machine checks, if user space uses them: the prefix area contains a pointer (absolute address) to the machine check extended save area. In order to save some space the save area was put into an unused area of the second prefix page. When validating vector register contents the code uses the absolute address of the extended save area, which is wrong. Due to prefixing the vector instructions will then access contents using absolute addresses instead of real addresses, where the machine stored the contents. If the above would work there is still the problem that register validition would only happen if user space uses vector registers. If kernel space uses them also, this may also lead to vector register content corruption: if the kernel makes use of vector instructions, but the current running user space context does not, the machine check handler will validate floating point registers instead of vector registers. Given the fact that writing to a floating point register may change the upper halve of the corresponding vector register, we also experience vector register corruption in this case. Fix all of these issues, and always validate vector registers on each machine check, if the machine has the vector facility installed and the extended save area is defined. Cc: # 4.1+ Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ctl_reg.h | 5 +++- arch/s390/kernel/nmi.c | 51 +++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index cfad7fca01d6..d7697ab802f6 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -57,7 +57,10 @@ union ctlreg0 { unsigned long lap : 1; /* Low-address-protection control */ unsigned long : 4; unsigned long edat : 1; /* Enhanced-DAT-enablement control */ - unsigned long : 23; + unsigned long : 4; + unsigned long afp : 1; /* AFP-register control */ + unsigned long vx : 1; /* Vector enablement control */ + unsigned long : 17; }; }; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 505c17c0ae1a..56b550893593 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -21,6 +21,7 @@ #include #include #include +#include struct mcck_struct { int kill_task; @@ -129,26 +130,30 @@ static int notrace s390_revalidate_registers(struct mci *mci) } else asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); - asm volatile( - " ld 0,0(%0)\n" - " ld 1,8(%0)\n" - " ld 2,16(%0)\n" - " ld 3,24(%0)\n" - " ld 4,32(%0)\n" - " ld 5,40(%0)\n" - " ld 6,48(%0)\n" - " ld 7,56(%0)\n" - " ld 8,64(%0)\n" - " ld 9,72(%0)\n" - " ld 10,80(%0)\n" - " ld 11,88(%0)\n" - " ld 12,96(%0)\n" - " ld 13,104(%0)\n" - " ld 14,112(%0)\n" - " ld 15,120(%0)\n" - : : "a" (fpt_save_area)); - /* Revalidate vector registers */ - if (MACHINE_HAS_VX && current->thread.vxrs) { + if (!MACHINE_HAS_VX) { + /* Revalidate floating point registers */ + asm volatile( + " ld 0,0(%0)\n" + " ld 1,8(%0)\n" + " ld 2,16(%0)\n" + " ld 3,24(%0)\n" + " ld 4,32(%0)\n" + " ld 5,40(%0)\n" + " ld 6,48(%0)\n" + " ld 7,56(%0)\n" + " ld 8,64(%0)\n" + " ld 9,72(%0)\n" + " ld 10,80(%0)\n" + " ld 11,88(%0)\n" + " ld 12,96(%0)\n" + " ld 13,104(%0)\n" + " ld 14,112(%0)\n" + " ld 15,120(%0)\n" + : : "a" (fpt_save_area)); + } else { + /* Revalidate vector registers */ + union ctlreg0 cr0; + if (!mci->vr) { /* * Vector registers can't be restored and therefore @@ -156,8 +161,12 @@ static int notrace s390_revalidate_registers(struct mci *mci) */ kill_task = 1; } + cr0.val = S390_lowcore.cregs_save_area[0]; + cr0.afp = cr0.vx = 1; + __ctl_load(cr0.val, 0, 0); restore_vx_regs((__vector128 *) - S390_lowcore.vector_save_area_addr); + &S390_lowcore.vector_save_area); + __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Revalidate access registers */ asm volatile( From ed056764271e71004b3118619de1ebfefb2acf6b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:01 -0400 Subject: [PATCH 132/170] Revert "nfs: take extra reference to fl->fl_file when running a LOCKU operation" This reverts commit db2efec0caba4f81a22d95a34da640b86c313c8e. William reported that he was seeing instability with this patch, which is likely due to the fact that it can cause the kernel to take a new reference to a filp after the last reference has already been put. Revert this patch for now, as we'll need to fix this in another way. Cc: stable@vger.kernel.org Reported-by: William Dauchy Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" --- fs/nfs/nfs4proc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6f228b5af819..60be01f69b84 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5484,7 +5484,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, atomic_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); - get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); p->server = NFS_SERVER(inode); return p; @@ -5496,7 +5495,6 @@ static void nfs4_locku_release_calldata(void *data) nfs_free_seqid(calldata->arg.seqid); nfs4_put_lock_state(calldata->lsp); put_nfs_open_context(calldata->ctx); - fput(calldata->fl.fl_file); kfree(calldata); } From bcd7f78d078ff6197715c1ed070c92aca57ec12c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:02 -0400 Subject: [PATCH 133/170] locks: have flock_lock_file take an inode pointer instead of a filp ...and rename it to better describe how it works. In order to fix a use-after-free in NFS, we need to be able to remove locks from an inode after the filp associated with them may have already been freed. flock_lock_file already only dereferences the filp to get to the inode, so just change it so the callers do that. All of the callers already pass in a lock request that has the fl_file set properly, so we don't need to pass it in individually. With that change it now only dereferences the filp to get to the inode, so just push that out to the callers. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" --- fs/locks.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 653faabb07f4..4366b7c54e6d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -862,12 +862,11 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, * whether or not a lock was successfully freed by testing the return * value for -ENOENT. */ -static int flock_lock_file(struct file *filp, struct file_lock *request) +static int flock_lock_inode(struct inode *inode, struct file_lock *request) { struct file_lock *new_fl = NULL; struct file_lock *fl; struct file_lock_context *ctx; - struct inode *inode = file_inode(filp); int error = 0; bool found = false; LIST_HEAD(dispose); @@ -890,7 +889,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) goto find_conflict; list_for_each_entry(fl, &ctx->flc_flock, fl_list) { - if (filp != fl->fl_file) + if (request->fl_file != fl->fl_file) continue; if (request->fl_type == fl->fl_type) goto out; @@ -1862,7 +1861,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) int error; might_sleep(); for (;;) { - error = flock_lock_file(filp, fl); + error = flock_lock_inode(file_inode(filp), fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -2401,7 +2400,8 @@ locks_remove_flock(struct file *filp) .fl_type = F_UNLCK, .fl_end = OFFSET_MAX, }; - struct file_lock_context *flctx = file_inode(filp)->i_flctx; + struct inode *inode = file_inode(filp); + struct file_lock_context *flctx = inode->i_flctx; if (list_empty(&flctx->flc_flock)) return; @@ -2409,7 +2409,7 @@ locks_remove_flock(struct file *filp) if (filp->f_op->flock) filp->f_op->flock(filp, F_SETLKW, &fl); else - flock_lock_file(filp, &fl); + flock_lock_inode(inode, &fl); if (fl.fl_ops && fl.fl_ops->fl_release_private) fl.fl_ops->fl_release_private(&fl); From 29d01b22eaa18d8b46091d3c98c6001c49f78e4a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:02 -0400 Subject: [PATCH 134/170] locks: new helpers - flock_lock_inode_wait and posix_lock_inode_wait Allow callers to pass in an inode instead of a filp. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" --- fs/locks.c | 50 +++++++++++++++++++++++++++++++++++----------- include/linux/fs.h | 14 +++++++++++++ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 4366b7c54e6d..ba268a503c1b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1163,20 +1163,19 @@ int posix_lock_file(struct file *filp, struct file_lock *fl, EXPORT_SYMBOL(posix_lock_file); /** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to + * posix_lock_inode_wait - Apply a POSIX-style lock to a file + * @inode: inode of file to which lock request should be applied * @fl: The lock to be applied * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address + * Variant of posix_lock_file_wait that does not take a filp, and so can be + * used after the filp has already been torn down. */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep (); for (;;) { - error = posix_lock_file(filp, fl, NULL); + error = __posix_lock_file(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1188,6 +1187,21 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } +EXPORT_SYMBOL(posix_lock_inode_wait); + +/** + * posix_lock_file_wait - Apply a POSIX-style lock to a file + * @filp: The file to apply the lock to + * @fl: The lock to be applied + * + * Add a POSIX style lock to a file. + * We merge adjacent & overlapping locks whenever possible. + * POSIX locks are sorted by owner task, then by starting address + */ +int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return posix_lock_inode_wait(file_inode(filp), fl); +} EXPORT_SYMBOL(posix_lock_file_wait); /** @@ -1850,18 +1864,18 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) } /** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to + * flock_lock_inode_wait - Apply a FLOCK-style lock to a file + * @inode: inode of the file to apply to * @fl: The lock to be applied * - * Add a FLOCK style lock to a file. + * Apply a FLOCK style lock request to an inode. */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep(); for (;;) { - error = flock_lock_inode(file_inode(filp), fl); + error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1873,7 +1887,19 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } +EXPORT_SYMBOL(flock_lock_inode_wait); +/** + * flock_lock_file_wait - Apply a FLOCK-style lock to a file + * @filp: The file to apply the lock to + * @fl: The lock to be applied + * + * Add a FLOCK style lock to a file. + */ +int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return flock_lock_inode_wait(file_inode(filp), fl); +} EXPORT_SYMBOL(flock_lock_file_wait); /** diff --git a/include/linux/fs.h b/include/linux/fs.h index a0653e560c26..4c990edd1377 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1046,11 +1046,13 @@ extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); +extern int posix_lock_inode_wait(struct inode *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); +extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); @@ -1137,6 +1139,12 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl, return -ENOLCK; } +static inline int posix_lock_inode_wait(struct inode *inode, + struct file_lock *fl) +{ + return -ENOLCK; +} + static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) { return -ENOLCK; @@ -1163,6 +1171,12 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return 0; } +static inline int flock_lock_inode_wait(struct inode *inode, + struct file_lock *request) +{ + return -ENOLCK; +} + static inline int flock_lock_file_wait(struct file *filp, struct file_lock *request) { From 83bfff23e9ed19f37c4ef0bba84e75bd88e5cf21 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:03 -0400 Subject: [PATCH 135/170] nfs4: have do_vfs_lock take an inode pointer Now that we have file locking helpers that can deal with an inode instead of a filp, we can change the NFSv4 locking code to use that instead. This should fix the case where we have a filp that is closed while flock or OFD locks are set on it, and the task is signaled so that it doesn't wait for the LOCKU reply to come in before the filp is freed. At that point we can end up with a use-after-free with the current code, which relies on dereferencing the fl_file in the lock request. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" --- fs/nfs/nfs4proc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 60be01f69b84..8bee93469617 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5439,15 +5439,15 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock * return err; } -static int do_vfs_lock(struct file *file, struct file_lock *fl) +static int do_vfs_lock(struct inode *inode, struct file_lock *fl) { int res = 0; switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { case FL_POSIX: - res = posix_lock_file_wait(file, fl); + res = posix_lock_inode_wait(inode, fl); break; case FL_FLOCK: - res = flock_lock_file_wait(file, fl); + res = flock_lock_inode_wait(inode, fl); break; default: BUG(); @@ -5507,7 +5507,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) switch (task->tk_status) { case 0: renew_lease(calldata->server, calldata->timestamp); - do_vfs_lock(calldata->fl.fl_file, &calldata->fl); + do_vfs_lock(calldata->lsp->ls_state->inode, &calldata->fl); if (nfs4_update_lock_stateid(calldata->lsp, &calldata->res.stateid)) break; @@ -5615,7 +5615,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * mutex_lock(&sp->so_delegreturn_mutex); /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ down_read(&nfsi->rwsem); - if (do_vfs_lock(request->fl_file, request) == -ENOENT) { + if (do_vfs_lock(inode, request) == -ENOENT) { up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); goto out; @@ -5756,7 +5756,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->timestamp); if (data->arg.new_lock) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); - if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) { + if (do_vfs_lock(lsp->ls_state->inode, &data->fl) < 0) { rpc_restart_call_prepare(task); break; } @@ -5998,7 +5998,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; request->fl_flags |= FL_ACCESS; - status = do_vfs_lock(request->fl_file, request); + status = do_vfs_lock(state->inode, request); if (status < 0) goto out; down_read(&nfsi->rwsem); @@ -6006,7 +6006,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Yes: cache locks! */ /* ...but avoid races with delegation recall... */ request->fl_flags = fl_flags & ~FL_SLEEP; - status = do_vfs_lock(request->fl_file, request); + status = do_vfs_lock(state->inode, request); up_read(&nfsi->rwsem); goto out; } From ee296d7c5709440f8abd36b5b65c6b3e388538d9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:03 -0400 Subject: [PATCH 136/170] locks: inline posix_lock_file_wait and flock_lock_file_wait They just call file_inode and then the corresponding *_inode_file_wait function. Just make them static inlines instead. Signed-off-by: Jeff Layton --- fs/locks.c | 28 ---------------------------- include/linux/fs.h | 32 ++++++++++++++------------------ 2 files changed, 14 insertions(+), 46 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index ba268a503c1b..d3d558ba4da7 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1189,21 +1189,6 @@ int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) } EXPORT_SYMBOL(posix_lock_inode_wait); -/** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to - * @fl: The lock to be applied - * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address - */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return posix_lock_inode_wait(file_inode(filp), fl); -} -EXPORT_SYMBOL(posix_lock_file_wait); - /** * locks_mandatory_locked - Check for an active lock * @file: the file to check @@ -1889,19 +1874,6 @@ int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) } EXPORT_SYMBOL(flock_lock_inode_wait); -/** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to - * @fl: The lock to be applied - * - * Add a FLOCK style lock to a file. - */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return flock_lock_inode_wait(file_inode(filp), fl); -} -EXPORT_SYMBOL(flock_lock_file_wait); - /** * sys_flock: - flock() system call. * @fd: the file descriptor to lock. diff --git a/include/linux/fs.h b/include/linux/fs.h index 4c990edd1377..cc008c338f5a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1047,13 +1047,11 @@ extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_inode_wait(struct inode *, struct file_lock *); -extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); -extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); @@ -1145,11 +1143,6 @@ static inline int posix_lock_inode_wait(struct inode *inode, return -ENOLCK; } -static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return -ENOLCK; -} - static inline int posix_unblock_lock(struct file_lock *waiter) { return -ENOENT; @@ -1177,12 +1170,6 @@ static inline int flock_lock_inode_wait(struct inode *inode, return -ENOLCK; } -static inline int flock_lock_file_wait(struct file *filp, - struct file_lock *request) -{ - return -ENOLCK; -} - static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { return 0; @@ -1216,6 +1203,20 @@ static inline void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) {} #endif /* !CONFIG_FILE_LOCKING */ +static inline struct inode *file_inode(const struct file *f) +{ + return f->f_inode; +} + +static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return posix_lock_inode_wait(file_inode(filp), fl); +} + +static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return flock_lock_inode_wait(file_inode(filp), fl); +} struct fasync_struct { spinlock_t fa_lock; @@ -2025,11 +2026,6 @@ extern void ihold(struct inode * inode); extern void iput(struct inode *); extern int generic_update_time(struct inode *, struct timespec *, int); -static inline struct inode *file_inode(const struct file *f) -{ - return f->f_inode; -} - /* /sys/fs */ extern struct kobject *fs_kobj; From ba0ef85479c46a2ab354c2220bdb6152f7f4baf3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Jun 2015 13:15:31 -0600 Subject: [PATCH 137/170] tpm: Fix initialization of the cdev When a cdev is contained in a dynamic structure the cdev parent kobj should be set to the kobj that controls the lifetime of the enclosing structure. In TPM's case this is the embedded struct device. Also, cdev_init 0's the whole structure, so all sets must be after, not before. This fixes module ref counting and cdev. Cc: Fixes: 313d21eeab92 ("tpm: device class for tpm") Signed-off-by: Jason Gunthorpe Reviewed-by: Dmitry Torokhov Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm-chip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 283f00a7f036..1082d4bb016a 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -129,8 +129,9 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, device_initialize(&chip->dev); - chip->cdev.owner = chip->pdev->driver->owner; cdev_init(&chip->cdev, &tpm_fops); + chip->cdev.owner = chip->pdev->driver->owner; + chip->cdev.kobj.parent = &chip->dev.kobj; return chip; } From b371616b8537d6450ebca0819defbf53452bebf3 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 24 Jun 2015 17:14:55 +0300 Subject: [PATCH 138/170] tpm, tpm_crb: fail when TPM2 ACPI table contents look corrupted At least some versions of AMI BIOS have corrupted contents in the TPM2 ACPI table and namely the physical address of the control area is set to zero. This patch changes the driver to fail gracefully when we observe a zero address instead of continuing to ioremap. Cc: Signed-off-by: Jarkko Sakkinen Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_crb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 44f9d20c19ac..1267322595da 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -233,6 +233,14 @@ static int crb_acpi_add(struct acpi_device *device) return -ENODEV; } + /* At least some versions of AMI BIOS have a bug that TPM2 table has + * zero address for the control area and therefore we must fail. + */ + if (!buf->control_area_pa) { + dev_err(dev, "TPM2 ACPI table has a zero address for the control area\n"); + return -EINVAL; + } + if (buf->hdr.length < sizeof(struct acpi_tpm2)) { dev_err(dev, "TPM2 ACPI table has wrong size"); return -EINVAL; From 4139032b4860c06ff3a7687041f06535fed901ed Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Mon, 29 Jun 2015 09:57:00 -0400 Subject: [PATCH 139/170] IB: Add rdma_cap_ib_switch helper and use where appropriate Persuant to Liran's comments on node_type on linux-rdma mailing list: In an effort to reform the RDMA core and ULPs to minimize use of node_type in struct ib_device, an additional bit is added to struct ib_device for is_switch (IB switch). This is needed to be initialized by any IB switch device driver. This is a NEW requirement on such device drivers which are all "out of tree". In addition, an ib_switch helper was added to ib_verbs.h based on the is_switch device bit rather than node_type (although those should be consistent). The RDMA core (MAD, SMI, agent, sa_query, multicast, sysfs) as well as (IPoIB and SRP) ULPs are updated where appropriate to use this new helper. In some cases, the helper is now used under the covers of using rdma_[start end]_port rather than the open coding previously used. Reviewed-by: Sean Hefty Reviewed-By: Jason Gunthorpe Reviewed-by: Ira Weiny Tested-by: Ira Weiny Signed-off-by: Hal Rosenstock Signed-off-by: Doug Ledford --- drivers/infiniband/core/agent.c | 4 +- drivers/infiniband/core/mad.c | 45 ++++++++--------------- drivers/infiniband/core/multicast.c | 8 +--- drivers/infiniband/core/opa_smi.h | 4 +- drivers/infiniband/core/sa_query.c | 8 +--- drivers/infiniband/core/smi.c | 37 +++++++++---------- drivers/infiniband/core/smi.h | 4 +- drivers/infiniband/core/sysfs.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 +----- drivers/infiniband/ulp/srp/ib_srp.c | 12 +----- include/rdma/ib_verbs.h | 20 ++++++++-- 11 files changed, 66 insertions(+), 90 deletions(-) diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index c7dcfe4ca5f1..0429040304fd 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -88,7 +88,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * struct ib_ah *ah; struct ib_mad_send_wr_private *mad_send_wr; - if (device->node_type == RDMA_NODE_IB_SWITCH) + if (rdma_cap_ib_switch(device)) port_priv = ib_get_agent_port(device, 0); else port_priv = ib_get_agent_port(device, port_num); @@ -122,7 +122,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * memcpy(send_buf->mad, mad_hdr, resp_mad_len); send_buf->ah = ah; - if (device->node_type == RDMA_NODE_IB_SWITCH) { + if (rdma_cap_ib_switch(device)) { mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a4b1466c1bf6..c82d751aede1 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -769,7 +769,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, mad_agent_priv->qp_info->port_priv->port_num); - if (device->node_type == RDMA_NODE_IB_SWITCH && + if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) port_num = send_wr->wr.ud.port_num; else @@ -787,7 +787,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, if ((opa_get_smp_direction(opa_smp) ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == OPA_LID_PERMISSIVE && - opa_smi_handle_dr_smp_send(opa_smp, device->node_type, + opa_smi_handle_dr_smp_send(opa_smp, + rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid directed route\n"); @@ -810,7 +811,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } else { if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == IB_LID_PERMISSIVE && - smi_handle_dr_smp_send(smp, device->node_type, port_num) == + smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "Invalid directed route\n"); @@ -2030,7 +2031,7 @@ static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv struct ib_smp *smp = (struct ib_smp *)recv->mad; if (smi_handle_dr_smp_recv(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) @@ -2042,13 +2043,13 @@ static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv if (retsmi == IB_SMI_SEND) { /* don't forward */ if (smi_handle_dr_smp_send(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) return IB_SMI_DISCARD; - } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; @@ -2115,7 +2116,7 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, struct opa_smp *smp = (struct opa_smp *)recv->mad; if (opa_smi_handle_dr_smp_recv(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) @@ -2127,7 +2128,7 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, if (retsmi == IB_SMI_SEND) { /* don't forward */ if (opa_smi_handle_dr_smp_send(smp, - port_priv->device->node_type, + rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; @@ -2135,7 +2136,7 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, IB_SMI_DISCARD) return IB_SMI_DISCARD; - } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; @@ -2235,7 +2236,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, goto out; } - if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) + if (rdma_cap_ib_switch(port_priv->device)) port_num = wc->port_num; else port_num = port_priv->port_num; @@ -3297,17 +3298,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int start, end, i; + int start, i; - if (device->node_type == RDMA_NODE_IB_SWITCH) { - start = 0; - end = 0; - } else { - start = 1; - end = device->phys_port_cnt; - } + start = rdma_start_port(device); - for (i = start; i <= end; i++) { + for (i = start; i <= rdma_end_port(device); i++) { if (!rdma_cap_ib_mad(device, i)) continue; @@ -3342,17 +3337,9 @@ error: static void ib_mad_remove_device(struct ib_device *device) { - int start, end, i; + int i; - if (device->node_type == RDMA_NODE_IB_SWITCH) { - start = 0; - end = 0; - } else { - start = 1; - end = device->phys_port_cnt; - } - - for (i = start; i <= end; i++) { + for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { if (!rdma_cap_ib_mad(device, i)) continue; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 1244f02a5c6d..2cb865c7ce7a 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -812,12 +812,8 @@ static void mcast_add_one(struct ib_device *device) if (!dev) return; - if (device->node_type == RDMA_NODE_IB_SWITCH) - dev->start_port = dev->end_port = 0; - else { - dev->start_port = 1; - dev->end_port = device->phys_port_cnt; - } + dev->start_port = rdma_start_port(device); + dev->end_port = rdma_end_port(device); for (i = 0; i <= dev->end_port - dev->start_port; i++) { if (!rdma_cap_ib_mcast(device, dev->start_port + i)) diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h index 62d91bfa4cb7..3bfab3505a29 100644 --- a/drivers/infiniband/core/opa_smi.h +++ b/drivers/infiniband/core/opa_smi.h @@ -39,12 +39,12 @@ #include "smi.h" -enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, u8 node_type, +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch, int port_num, int phys_port_cnt); int opa_smi_get_fwd_port(struct opa_smp *smp); extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp); extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, - u8 node_type, int port_num); + bool is_switch, int port_num); /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 0fae85062a65..ca919f429666 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1156,12 +1156,8 @@ static void ib_sa_add_one(struct ib_device *device) int s, e, i; int count = 0; - if (device->node_type == RDMA_NODE_IB_SWITCH) - s = e = 0; - else { - s = 1; - e = device->phys_port_cnt; - } + s = rdma_start_port(device); + e = rdma_end_port(device); sa_dev = kzalloc(sizeof *sa_dev + (e - s + 1) * sizeof (struct ib_sa_port), diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 368a561d1a5d..f19b23817c2b 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -41,7 +41,7 @@ #include "smi.h" #include "opa_smi.h" -static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, +static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num, u8 *hop_ptr, u8 hop_cnt, const u8 *initial_path, const u8 *return_path, @@ -64,7 +64,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, /* C14-9:2 */ if (*hop_ptr && *hop_ptr < hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; /* return_path set when received */ @@ -77,7 +77,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, if (*hop_ptr == hop_cnt) { /* return_path set when received */ (*hop_ptr)++; - return (node_type == RDMA_NODE_IB_SWITCH || + return (is_switch || dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } @@ -96,7 +96,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, /* C14-13:2 */ if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; (*hop_ptr)--; @@ -108,7 +108,7 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, if (*hop_ptr == 1) { (*hop_ptr)--; /* C14-13:3 -- SMPs destined for SM shouldn't be here */ - return (node_type == RDMA_NODE_IB_SWITCH || + return (is_switch || dr_slid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } @@ -127,9 +127,9 @@ static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, * Return IB_SMI_DISCARD if the SMP should be discarded */ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, - u8 node_type, int port_num) + bool is_switch, int port_num) { - return __smi_handle_dr_smp_send(node_type, port_num, + return __smi_handle_dr_smp_send(is_switch, port_num, &smp->hop_ptr, smp->hop_cnt, smp->initial_path, smp->return_path, @@ -139,9 +139,9 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, } enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, - u8 node_type, int port_num) + bool is_switch, int port_num) { - return __smi_handle_dr_smp_send(node_type, port_num, + return __smi_handle_dr_smp_send(is_switch, port_num, &smp->hop_ptr, smp->hop_cnt, smp->route.dr.initial_path, smp->route.dr.return_path, @@ -152,7 +152,7 @@ enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, OPA_LID_PERMISSIVE); } -static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, +static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num, int phys_port_cnt, u8 *hop_ptr, u8 hop_cnt, const u8 *initial_path, @@ -173,7 +173,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, /* C14-9:2 -- intermediate hop */ if (*hop_ptr && *hop_ptr < hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; return_path[*hop_ptr] = port_num; @@ -188,7 +188,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, return_path[*hop_ptr] = port_num; /* hop_ptr updated when sending */ - return (node_type == RDMA_NODE_IB_SWITCH || + return (is_switch || dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } @@ -208,7 +208,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, /* C14-13:2 */ if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { - if (node_type != RDMA_NODE_IB_SWITCH) + if (!is_switch) return IB_SMI_DISCARD; /* hop_ptr updated when sending */ @@ -224,8 +224,7 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, return IB_SMI_HANDLE; } /* hop_ptr updated when sending */ - return (node_type == RDMA_NODE_IB_SWITCH ? - IB_SMI_HANDLE : IB_SMI_DISCARD); + return (is_switch ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ @@ -238,10 +237,10 @@ static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, * Adjust information for a received SMP * Return IB_SMI_DISCARD if the SMP should be dropped */ -enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, +enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch, int port_num, int phys_port_cnt) { - return __smi_handle_dr_smp_recv(node_type, port_num, phys_port_cnt, + return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt, &smp->hop_ptr, smp->hop_cnt, smp->initial_path, smp->return_path, @@ -254,10 +253,10 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, * Adjust information for a received SMP * Return IB_SMI_DISCARD if the SMP should be dropped */ -enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, u8 node_type, +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch, int port_num, int phys_port_cnt) { - return __smi_handle_dr_smp_recv(node_type, port_num, phys_port_cnt, + return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt, &smp->hop_ptr, smp->hop_cnt, smp->route.dr.initial_path, smp->route.dr.return_path, diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index aff96bac49b4..33c91c8a16e9 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -51,12 +51,12 @@ enum smi_forward_action { IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */ }; -enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, +enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch, int port_num, int phys_port_cnt); int smi_get_fwd_port(struct ib_smp *smp); extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp); extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, - u8 node_type, int port_num); + bool is_switch, int port_num); /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index ed6b6c85c334..0b84a9cdfe5b 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -870,7 +870,7 @@ int ib_device_register_sysfs(struct ib_device *device, goto err_put; } - if (device->node_type == RDMA_NODE_IB_SWITCH) { + if (rdma_cap_ib_switch(device)) { ret = add_port(device, 0, port_callback); if (ret) goto err_put; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index da149c278cb8..0d8336e91e40 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1684,7 +1684,7 @@ static void ipoib_add_one(struct ib_device *device) struct list_head *dev_list; struct net_device *dev; struct ipoib_dev_priv *priv; - int s, e, p; + int p; int count = 0; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); @@ -1693,15 +1693,7 @@ static void ipoib_add_one(struct ib_device *device) INIT_LIST_HEAD(dev_list); - if (device->node_type == RDMA_NODE_IB_SWITCH) { - s = 0; - e = 0; - } else { - s = 1; - e = device->phys_port_cnt; - } - - for (p = s; p <= e; ++p) { + for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { if (!rdma_protocol_ib(device, p)) continue; dev = ipoib_add_port("ib%d", device, p); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 267dc4f75502..d40e321768a0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3379,7 +3379,7 @@ static void srp_add_one(struct ib_device *device) struct srp_device *srp_dev; struct ib_device_attr *dev_attr; struct srp_host *host; - int mr_page_shift, s, e, p; + int mr_page_shift, p; u64 max_pages_per_mr; dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); @@ -3443,15 +3443,7 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->mr)) goto err_pd; - if (device->node_type == RDMA_NODE_IB_SWITCH) { - s = 0; - e = 0; - } else { - s = 1; - e = device->phys_port_cnt; - } - - for (p = s; p <= e; ++p) { + for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { host = srp_add_port(srp_dev, p); if (host) list_add_tail(&host->list, &srp_dev->dev_list); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 986fddb08579..b0f898e3b2e7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1745,6 +1745,7 @@ struct ib_device { char node_desc[64]; __be64 node_guid; u32 local_dma_lkey; + u16 is_switch:1; u8 node_type; u8 phys_port_cnt; @@ -1823,6 +1824,20 @@ int ib_query_port(struct ib_device *device, enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num); +/** + * rdma_cap_ib_switch - Check if the device is IB switch + * @device: Device to check + * + * Device driver is responsible for setting is_switch bit on + * in ib_device structure at init time. + * + * Return: true if the device is IB switch. + */ +static inline bool rdma_cap_ib_switch(const struct ib_device *device) +{ + return device->is_switch; +} + /** * rdma_start_port - Return the first valid port number for the device * specified @@ -1833,7 +1848,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, */ static inline u8 rdma_start_port(const struct ib_device *device) { - return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; + return rdma_cap_ib_switch(device) ? 0 : 1; } /** @@ -1846,8 +1861,7 @@ static inline u8 rdma_start_port(const struct ib_device *device) */ static inline u8 rdma_end_port(const struct ib_device *device) { - return (device->node_type == RDMA_NODE_IB_SWITCH) ? - 0 : device->phys_port_cnt; + return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt; } static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) From cd4cd565e049c6e734a12754d91d1142c25f6a64 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 25 Jun 2015 12:04:49 -0400 Subject: [PATCH 140/170] IB/mad: Fix compare between big endian and cpu endian The define OPA_LID_PERMISSIVE is big endian and was compared to the cpu endian variable opa_drslid. Problem caught by 0-day build infrastructure. Fixes: 8e4349d13f33 (IB/mad: Add final OPA MAD processing) Signed-off-by: Ira Weiny Reviewed-by: Mike Marciniszyn Reviewed-by: John, Jubin Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index c82d751aede1..786fc51bf04b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -795,7 +795,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); - if (opa_drslid != OPA_LID_PERMISSIVE && + if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && opa_drslid & 0xffff0000) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", From 3b8ab700af0a5c5e59c833f8a88f94b97499f5e5 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 25 Jun 2015 09:52:50 -0400 Subject: [PATCH 141/170] IB/mad: Remove improper use of BUG_ON We recently added BUG_ON's which were inappropriate for a condition which should never happen. Change these to be WARN_ON_ONCE as a debugging aid. Fixes: 4cd7c9479aff ('IB/mad: Add support for additional MAD info to/from drivers') Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ehca/ehca_sqp.c | 5 +++-- drivers/infiniband/hw/ipath/ipath_mad.c | 5 +++-- drivers/infiniband/hw/mlx4/mad.c | 5 +++-- drivers/infiniband/hw/mlx5/mad.c | 5 +++-- drivers/infiniband/hw/mthca/mthca_mad.c | 5 +++-- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 5 +++-- drivers/infiniband/hw/qib/qib_mad.c | 5 +++-- 7 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index 12b5bc23832b..376b031c2c7f 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -226,8 +226,9 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) return IB_MAD_RESULT_FAILURE; diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 948188e37f95..ad3a926ab3c5 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -1499,8 +1499,9 @@ int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 85a50df2f203..9a810e302f6b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -861,8 +861,9 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (rdma_port_get_link_layer(ibdev, port_num)) { case IB_LINK_LAYER_INFINIBAND: diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 01fc97db45d6..b84d13a487cc 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -68,8 +68,9 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 6b2418b74c99..7c3f2fb44ba5 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -209,8 +209,9 @@ int mthca_process_mad(struct ib_device *ibdev, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 4bafa15708d0..29b27675dd70 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -215,8 +215,9 @@ int ocrdma_process_mad(struct ib_device *ibdev, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_PERF_MGMT: diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 05e3242d8442..9625e7c438e5 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2412,8 +2412,9 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - BUG_ON(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad)); + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: From b356c1c1f90a347b6f67d9272dda7ecf47e0b46c Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Wed, 24 Jun 2015 10:12:13 +0530 Subject: [PATCH 142/170] IB/srpt: Convert use of __constant_cpu_to_beXX to cpu_to_beXX In little endian cases, the macro cpu_to_be{16,32,64} unfolds to __swab{16,32,64} which provides special case for constants. In big endian cases, __constant_cpu_to_be{16,32,64} and cpu_to_be{16,32,64} expand directly to the same expression. So, replace __constant_cpu_to_be{16,32,64} with cpu_to_be{16,32,64} with the goal of getting rid of the definitions of __constant_cpu_to_be{16,32,64} completely. The Coccinelle semantic patch that performs this transformation is as follows: @@expression x;@@ ( - __constant_cpu_to_be16(x) + cpu_to_be16(x) | - __constant_cpu_to_be32(x) + cpu_to_be32(x) | - __constant_cpu_to_be64(x) + cpu_to_be64(x) ) Signed-off-by: Vaishali Thakkar Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 71 +++++++++++++-------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 82897ca17f32..60ff0a2390e5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -302,7 +302,7 @@ static void srpt_get_iou(struct ib_dm_mad *mad) int i; ioui = (struct ib_dm_iou_info *)mad->data; - ioui->change_id = __constant_cpu_to_be16(1); + ioui->change_id = cpu_to_be16(1); ioui->max_controllers = 16; /* set present for slot 1 and empty for the rest */ @@ -330,13 +330,13 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, if (!slot || slot > 16) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); return; } if (slot > 2) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + = cpu_to_be16(DM_MAD_STATUS_NO_IOC); return; } @@ -348,10 +348,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot, iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver); iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); iocp->subsys_device_id = 0x0; - iocp->io_class = __constant_cpu_to_be16(SRP_REV16A_IB_IO_CLASS); - iocp->io_subclass = __constant_cpu_to_be16(SRP_IO_SUBCLASS); - iocp->protocol = __constant_cpu_to_be16(SRP_PROTOCOL); - iocp->protocol_version = __constant_cpu_to_be16(SRP_PROTOCOL_VERSION); + iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS); + iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); + iocp->protocol = cpu_to_be16(SRP_PROTOCOL); + iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION); iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); iocp->rdma_read_depth = 4; iocp->send_size = cpu_to_be32(srp_max_req_size); @@ -379,13 +379,13 @@ static void srpt_get_svc_entries(u64 ioc_guid, if (!slot || slot > 16) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); return; } if (slot > 2 || lo > hi || hi > 1) { mad->mad_hdr.status - = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + = cpu_to_be16(DM_MAD_STATUS_NO_IOC); return; } @@ -436,7 +436,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, break; default: rsp_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); break; } } @@ -493,11 +493,11 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, break; case IB_MGMT_METHOD_SET: dm_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); break; default: dm_mad->mad_hdr.status = - __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); + cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); break; } @@ -1535,7 +1535,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, memset(srp_rsp, 0, sizeof *srp_rsp); srp_rsp->opcode = SRP_RSP; srp_rsp->req_lim_delta = - __constant_cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); + cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); srp_rsp->tag = tag; srp_rsp->status = status; @@ -1585,8 +1585,8 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, memset(srp_rsp, 0, sizeof *srp_rsp); srp_rsp->opcode = SRP_RSP; - srp_rsp->req_lim_delta = __constant_cpu_to_be32(1 - + atomic_xchg(&ch->req_lim_delta, 0)); + srp_rsp->req_lim_delta = + cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); srp_rsp->tag = tag; srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; @@ -1630,7 +1630,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len) switch (len) { case 8: if ((*((__be64 *)lun) & - __constant_cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) + cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) goto out_err; break; case 4: @@ -2449,8 +2449,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, } if (it_iu_len > srp_max_req_size || it_iu_len < 64) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); ret = -EINVAL; pr_err("rejected SRP_LOGIN_REQ because its" " length (%d bytes) is out of range (%d .. %d)\n", @@ -2459,8 +2459,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, } if (!sport->enabled) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); ret = -EINVAL; pr_err("rejected SRP_LOGIN_REQ because the target port" " has not yet been enabled\n"); @@ -2505,8 +2505,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid) || *(__be64 *)(req->target_port_id + 8) != cpu_to_be64(srpt_service_guid)) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); ret = -ENOMEM; pr_err("rejected SRP_LOGIN_REQ because it" " has an invalid target port identifier.\n"); @@ -2515,8 +2515,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ch = kzalloc(sizeof *ch, GFP_KERNEL); if (!ch) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because no memory.\n"); ret = -ENOMEM; goto reject; @@ -2552,8 +2552,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ret = srpt_create_ch_ib(ch); if (ret) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because creating" " a new RDMA channel failed.\n"); goto free_ring; @@ -2561,8 +2561,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, ret = srpt_ch_qp_rtr(ch, ch->qp); if (ret) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because enabling" " RTR failed (error code = %d)\n", ret); goto destroy_ib; @@ -2580,15 +2579,15 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, if (!nacl) { pr_info("Rejected login because no ACL has been" " configured yet for initiator %s.\n", ch->sess_name); - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); goto destroy_ib; } ch->sess = transport_init_session(TARGET_PROT_NORMAL); if (IS_ERR(ch->sess)) { - rej->reason = __constant_cpu_to_be32( - SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + rej->reason = cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_debug("Failed to create session\n"); goto deregister_session; } @@ -2604,8 +2603,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, rsp->max_it_iu_len = req->req_it_iu_len; rsp->max_ti_iu_len = req->req_it_iu_len; ch->max_ti_iu_len = it_iu_len; - rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT - | SRP_BUF_FORMAT_INDIRECT); + rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); rsp->req_lim_delta = cpu_to_be32(ch->rq_size); atomic_set(&ch->req_lim, ch->rq_size); atomic_set(&ch->req_lim_delta, 0); @@ -2655,8 +2654,8 @@ free_ch: reject: rej->opcode = SRP_LOGIN_REJ; rej->tag = req->tag; - rej->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT - | SRP_BUF_FORMAT_INDIRECT); + rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, (void *)rej, sizeof *rej); From 3fdf70acec13efc7ecf254f5a364df06348bfd2c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 25 Jun 2015 13:34:15 +0300 Subject: [PATCH 143/170] IB/srp: Avoid using uninitialized variable We might return res which is not initialized. Also reduce code duplication by exporting srp_parse_tmo so srp_tmo_set can reuse it. Detected by Coverity. Signed-off-by: Sagi Grimberg Signed-off-by: Jenny Falkovich Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 11 ++++------- drivers/scsi/scsi_transport_srp.c | 3 ++- include/scsi/scsi_transport_srp.h | 1 + 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d40e321768a0..31a20b462266 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -161,13 +161,10 @@ static int srp_tmo_set(const char *val, const struct kernel_param *kp) { int tmo, res; - if (strncmp(val, "off", 3) != 0) { - res = kstrtoint(val, 0, &tmo); - if (res) - goto out; - } else { - tmo = -1; - } + res = srp_parse_tmo(&tmo, val); + if (res) + goto out; + if (kp->arg == &srp_reconnect_delay) res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, srp_dev_loss_tmo); diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index a85292b1d09d..e3cd3ece4412 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -203,7 +203,7 @@ static ssize_t srp_show_tmo(char *buf, int tmo) return tmo >= 0 ? sprintf(buf, "%d\n", tmo) : sprintf(buf, "off\n"); } -static int srp_parse_tmo(int *tmo, const char *buf) +int srp_parse_tmo(int *tmo, const char *buf) { int res = 0; @@ -214,6 +214,7 @@ static int srp_parse_tmo(int *tmo, const char *buf) return res; } +EXPORT_SYMBOL(srp_parse_tmo); static ssize_t show_reconnect_delay(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index cdb05dd1d440..d40d3ef25707 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -119,6 +119,7 @@ extern struct srp_rport *srp_rport_add(struct Scsi_Host *, extern void srp_rport_del(struct srp_rport *); extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, int dev_loss_tmo); +int srp_parse_tmo(int *tmo, const char *buf); extern int srp_reconnect_rport(struct srp_rport *rport); extern void srp_start_tl_fail_timers(struct srp_rport *rport); extern void srp_remove_host(struct Scsi_Host *); From be4b499323bf7291b491c6df51baae62f45b8404 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 25 Jun 2015 17:13:22 +0300 Subject: [PATCH 144/170] IB/cm: Do not queue work to a device that's going away Whenever ib_cm gets remove_one call, like when there is a hot-unplug event, the driver should mark itself as going_down and confirm that no new works are going to be queued for that device. so, the order of the actions are: 1. mark the going_down bit. 2. flush the wq. 3. [make sure no new works for that device.] 4. unregister mad agent. otherwise, works that are already queued can be scheduled after the mad agent was freed. Signed-off-by: Erez Shitrit Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 61 ++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index dbddddd6fb5d..3a972ebf3c0d 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -169,6 +169,7 @@ struct cm_device { struct ib_device *ib_device; struct device *device; u8 ack_delay; + int going_down; struct cm_port *port[0]; }; @@ -805,6 +806,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; unsigned long flags; + struct cm_device *cm_dev; + + cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client); + if (!cm_dev) + return; spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); @@ -818,8 +824,14 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) */ cm_id_priv->id.state = IB_CM_TIMEWAIT; wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); - queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, - msecs_to_jiffies(wait_time)); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!cm_dev->going_down) + queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, + msecs_to_jiffies(wait_time)); + spin_unlock_irq(&cm.lock); + cm_id_priv->timewait_info = NULL; } @@ -3305,6 +3317,11 @@ static int cm_establish(struct ib_cm_id *cm_id) struct cm_work *work; unsigned long flags; int ret = 0; + struct cm_device *cm_dev; + + cm_dev = ib_get_client_data(cm_id->device, &cm_client); + if (!cm_dev) + return -ENODEV; work = kmalloc(sizeof *work, GFP_ATOMIC); if (!work) @@ -3343,7 +3360,17 @@ static int cm_establish(struct ib_cm_id *cm_id) work->remote_id = cm_id->remote_id; work->mad_recv_wc = NULL; work->cm_event.event = IB_CM_USER_ESTABLISHED; - queue_delayed_work(cm.wq, &work->work, 0); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!cm_dev->going_down) { + queue_delayed_work(cm.wq, &work->work, 0); + } else { + kfree(work); + ret = -ENODEV; + } + spin_unlock_irq(&cm.lock); + out: return ret; } @@ -3394,6 +3421,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, enum ib_cm_event_type event; u16 attr_id; int paths = 0; + int going_down = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { case CM_REQ_ATTR_ID: @@ -3452,7 +3480,19 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; work->port = port; - queue_delayed_work(cm.wq, &work->work, 0); + + /* Check if the device started its remove_one */ + spin_lock_irq(&cm.lock); + if (!port->cm_dev->going_down) + queue_delayed_work(cm.wq, &work->work, 0); + else + going_down = 1; + spin_unlock_irq(&cm.lock); + + if (going_down) { + kfree(work); + ib_free_recv_mad(mad_recv_wc); + } } static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, @@ -3771,7 +3811,7 @@ static void cm_add_one(struct ib_device *ib_device) cm_dev->ib_device = ib_device; cm_get_ack_delay(cm_dev); - + cm_dev->going_down = 0; cm_dev->device = device_create(&cm_class, &ib_device->dev, MKDEV(0, 0), NULL, "%s", ib_device->name); @@ -3864,14 +3904,23 @@ static void cm_remove_one(struct ib_device *ib_device) list_del(&cm_dev->list); write_unlock_irqrestore(&cm.device_lock, flags); + spin_lock_irq(&cm.lock); + cm_dev->going_down = 1; + spin_unlock_irq(&cm.lock); + for (i = 1; i <= ib_device->phys_port_cnt; i++) { if (!rdma_cap_ib_cm(ib_device, i)) continue; port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); - ib_unregister_mad_agent(port->mad_agent); + /* + * We flush the queue here after the going_down set, this + * verify that no new works will be queued in the recv handler, + * after that we can call the unregister_mad_agent + */ flush_workqueue(cm.wq); + ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } device_unregister(cm_dev->device); From 8a7ff14dcb40bade309cd2ad04c746bb1d169c4e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 1 Jul 2015 14:31:02 +0300 Subject: [PATCH 145/170] IB/mlx4: Do not attemp to report HCA clock offset on VFs mlx4 VFs can provide CQE raw time-stamping services, but they don't have the hca core clock mapped to their PCI bars. As such, we should not attempt to query and report the clock offset to user space for VFs. Doing so causes query_device over VFs to fail with -ENOSUPP. Fixes: 4b664c4355b2 ('IB/mlx4: Add support for CQ time-stamping') Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 067a691ecbed..f419a728a22b 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -253,14 +253,15 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; props->timestamp_mask = 0xFFFFFFFFFFFFULL; - err = mlx4_get_internal_clock_params(dev->dev, &clock_params); - if (err) - goto out; + if (!mlx4_is_slave(dev->dev)) + err = mlx4_get_internal_clock_params(dev->dev, &clock_params); if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) { - resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; resp.response_length += sizeof(resp.hca_core_clock_offset); - resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; + if (!err && !mlx4_is_slave(dev->dev)) { + resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; + resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; + } } if (uhw->outlen) { From 58e9cc90cda7bc732856a2ad3210328fbc4f6ca2 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 1 Jul 2015 14:31:01 +0300 Subject: [PATCH 146/170] IB/IPoIB: Fix bad error flow in ipoib_add_port() Error values of ib_query_port() and ib_query_device() weren't propagated correctly. Because of that, ipoib_add_port() could return NULL value, which escaped the IS_ERR() check in ipoib_add_one() and we crashed. Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0d8336e91e40..da3e7e7d5e80 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1577,7 +1577,8 @@ static struct net_device *ipoib_add_port(const char *format, SET_NETDEV_DEV(priv->dev, hca->dma_device); priv->dev->dev_id = port - 1; - if (!ib_query_port(hca, port, &attr)) + result = ib_query_port(hca, port, &attr); + if (!result) priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); else { printk(KERN_WARNING "%s: ib_query_port %d failed\n", @@ -1598,7 +1599,8 @@ static struct net_device *ipoib_add_port(const char *format, goto device_init_failed; } - if (ipoib_set_dev_features(priv, hca)) + result = ipoib_set_dev_features(priv, hca); + if (result) goto device_init_failed; /* From a7f2f24cd716d7f1119d46929f41a8436f9c252f Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 2 Jul 2015 12:47:44 -0500 Subject: [PATCH 147/170] RDMA/core: Fixes for port mapper client registration Fixes to allow clients to make remove mapping requests, after they have provided the user space service with the mapping information, they are using when the service is restarted. 1) Adding IWPM_REG_VALID, IWPM_REG_INCOMPL and IWPM_REG_UNDEF registration types for the port mapper clients and functions to set/check the registration type. 2) If the port mapper user space service is not available to register the client, then its registration stays IWPM_REG_UNDEF and the registration isn't checked until the service becomes available (no mappings are possible, if the user space service isn't running). 3) After the service is restarted, the user space port mapper pid is set to valid and the client registration is set to IWPM_REG_INCOMPL to allow the client to make remove mapping requests. Signed-off-by: Tatyana Nikolova Reviewed-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwpm_msg.c | 33 ++++++++++++++--------------- drivers/infiniband/core/iwpm_util.c | 12 +++++++++-- drivers/infiniband/core/iwpm_util.h | 28 ++++++++++++++++++------ 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index e6ffa2e66c1a..22a3abee2a54 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -67,7 +67,8 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto pid_query_error; } - if (iwpm_registered_client(nl_client)) + if (iwpm_check_registration(nl_client, IWPM_REG_VALID) || + iwpm_user_pid == IWPM_PID_UNAVAILABLE) return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client); if (!skb) { @@ -106,7 +107,6 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ - iwpm_set_registered(nl_client, 1); iwpm_user_pid = IWPM_PID_UNAVAILABLE; err_str = "Unable to send a nlmsg"; goto pid_query_error; @@ -144,12 +144,12 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto add_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) { err_str = "Unregistered port mapper client"; goto add_mapping_error; } - if (!iwpm_valid_pid()) - return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client); if (!skb) { err_str = "Unable to create a nlmsg"; @@ -214,12 +214,12 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) err_str = "Invalid port mapper client"; goto query_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) { err_str = "Unregistered port mapper client"; goto query_mapping_error; } - if (!iwpm_valid_pid()) - return 0; ret = -ENOMEM; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client); if (!skb) { @@ -288,12 +288,12 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) err_str = "Invalid port mapper client"; goto remove_mapping_error; } - if (!iwpm_registered_client(nl_client)) { + if (!iwpm_valid_pid()) + return 0; + if (iwpm_check_registration(nl_client, IWPM_REG_UNDEF)) { err_str = "Unregistered port mapper client"; goto remove_mapping_error; } - if (!iwpm_valid_pid()) - return 0; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client); if (!skb) { ret = -ENOMEM; @@ -388,7 +388,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb) pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", __func__, iwpm_user_pid); if (iwpm_valid_client(nl_client)) - iwpm_set_registered(nl_client, 1); + iwpm_set_registration(nl_client, IWPM_REG_VALID); register_pid_response_exit: nlmsg_request->request_done = 1; /* always for found nlmsg_request */ @@ -644,7 +644,6 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX]; const char *msg_type = "Mapping Info response"; - int iwpm_pid; u8 nl_client; char *iwpm_name; u16 iwpm_version; @@ -669,14 +668,14 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) __func__, nl_client); return ret; } - iwpm_set_registered(nl_client, 0); + iwpm_set_registration(nl_client, IWPM_REG_INCOMPL); atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + iwpm_user_pid = cb->nlh->nlmsg_pid; if (!iwpm_mapinfo_available()) return 0; - iwpm_pid = cb->nlh->nlmsg_pid; pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", - __func__, iwpm_pid); - ret = iwpm_send_mapinfo(nl_client, iwpm_pid); + __func__, iwpm_user_pid); + ret = iwpm_send_mapinfo(nl_client, iwpm_user_pid); return ret; } EXPORT_SYMBOL(iwpm_mapping_info_cb); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index a626795bf9c7..5fb089e91353 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -78,6 +78,7 @@ init_exit: mutex_unlock(&iwpm_admin_lock); if (!ret) { iwpm_set_valid(nl_client, 1); + iwpm_set_registration(nl_client, IWPM_REG_UNDEF); pr_debug("%s: Mapinfo and reminfo tables are created\n", __func__); } @@ -106,6 +107,7 @@ int iwpm_exit(u8 nl_client) } mutex_unlock(&iwpm_admin_lock); iwpm_set_valid(nl_client, 0); + iwpm_set_registration(nl_client, IWPM_REG_UNDEF); return 0; } EXPORT_SYMBOL(iwpm_exit); @@ -397,17 +399,23 @@ void iwpm_set_valid(u8 nl_client, int valid) } /* valid client */ -int iwpm_registered_client(u8 nl_client) +u32 iwpm_get_registration(u8 nl_client) { return iwpm_admin.reg_list[nl_client]; } /* valid client */ -void iwpm_set_registered(u8 nl_client, int reg) +void iwpm_set_registration(u8 nl_client, u32 reg) { iwpm_admin.reg_list[nl_client] = reg; } +/* valid client */ +u32 iwpm_check_registration(u8 nl_client, u32 reg) +{ + return (iwpm_get_registration(nl_client) & reg); +} + int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, struct sockaddr_storage *b_sockaddr) { diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index ee2d9ff095be..b7b9e194ce81 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -58,6 +58,10 @@ #define IWPM_PID_UNDEFINED -1 #define IWPM_PID_UNAVAILABLE -2 +#define IWPM_REG_UNDEF 0x01 +#define IWPM_REG_VALID 0x02 +#define IWPM_REG_INCOMPL 0x04 + struct iwpm_nlmsg_request { struct list_head inprocess_list; __u32 nlmsg_seq; @@ -88,7 +92,7 @@ struct iwpm_admin_data { atomic_t refcount; atomic_t nlmsg_seq; int client_list[RDMA_NL_NUM_CLIENTS]; - int reg_list[RDMA_NL_NUM_CLIENTS]; + u32 reg_list[RDMA_NL_NUM_CLIENTS]; }; /** @@ -159,19 +163,31 @@ int iwpm_valid_client(u8 nl_client); void iwpm_set_valid(u8 nl_client, int valid); /** - * iwpm_registered_client - Check if the port mapper client is registered + * iwpm_check_registration - Check if the client registration + * matches the given one * @nl_client: The index of the netlink client + * @reg: The given registration type to compare with * * Call iwpm_register_pid() to register a client + * Returns true if the client registration matches reg, + * otherwise returns false */ -int iwpm_registered_client(u8 nl_client); +u32 iwpm_check_registration(u8 nl_client, u32 reg); /** - * iwpm_set_registered - Set the port mapper client to registered or not + * iwpm_set_registration - Set the client registration * @nl_client: The index of the netlink client - * @reg: 1 if registered or 0 if not + * @reg: Registration type to set */ -void iwpm_set_registered(u8 nl_client, int reg); +void iwpm_set_registration(u8 nl_client, u32 reg); + +/** + * iwpm_get_registration + * @nl_client: The index of the netlink client + * + * Returns the client registration type + */ +u32 iwpm_get_registration(u8 nl_client); /** * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of From 165d68228906f2866a52069ef1176ab70fa9e216 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 2 Jul 2015 12:49:40 -0500 Subject: [PATCH 148/170] RDMA/nes: Fix for resolving the neigh Neighbor resolution doesn't work without this fix Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_cm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 9047af429906..8a3ad170d790 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1520,8 +1520,9 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi int rc = arpindex; struct net_device *netdev; struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; + __be32 dst_ipaddr = htonl(dst_ip); - rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0); + rt = ip_route_output(&init_net, dst_ipaddr, nesvnic->local_ipaddr, 0, 0); if (IS_ERR(rt)) { printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n", __func__, dst_ip); @@ -1533,7 +1534,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi else netdev = nesvnic->netdev; - neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev); + neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr); rcu_read_lock(); if (neigh) { From 0a691272509d59ea0adbaa0c420ca0a71c9d0419 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 2 Jul 2015 12:52:29 -0500 Subject: [PATCH 149/170] RDMA/nes: Fix for incorrect recording of the MAC address Fix for incorrect recording of the MAC address Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 02120d340d50..4713dd7ed764 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -3861,7 +3861,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) | (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32( - (((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]); + (((u32)mac_addr[0]) << 8) | (u32)mac_addr[1]); } else { cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0; From 4fabb59449aa44a585b3603ffdadd4c5f4d0c033 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Mon, 6 Jul 2015 14:35:11 +0800 Subject: [PATCH 150/170] rds: rds_ib_device.refcount overflow Fixes: 3e0249f9c05c ("RDS/IB: add refcount tracking to struct rds_ib_device") There lacks a dropping on rds_ib_device.refcount in case rds_ib_alloc_fmr failed(mr pool running out). this lead to the refcount overflow. A complain in line 117(see following) is seen. From vmcore: s_ib_rdma_mr_pool_depleted is 2147485544 and rds_ibdev->refcount is -2147475448. That is the evidence the mr pool is used up. so rds_ib_alloc_fmr is very likely to return ERR_PTR(-EAGAIN). 115 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) 116 { 117 BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0); 118 if (atomic_dec_and_test(&rds_ibdev->refcount)) 119 queue_work(rds_wq, &rds_ibdev->free_work); 120 } fix is to drop refcount when rds_ib_alloc_fmr failed. Signed-off-by: Wengang Wang Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford --- net/rds/ib_rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 273b8bff6ba4..657ba9f5d308 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -759,8 +759,10 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, } ibmr = rds_ib_alloc_fmr(rds_ibdev); - if (IS_ERR(ibmr)) + if (IS_ERR(ibmr)) { + rds_ib_dev_put(rds_ibdev); return ibmr; + } ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); if (ret == 0) From 31b57b87fddf547bf3e98306b41bc82e111396fa Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Tue, 7 Jul 2015 17:45:12 +0300 Subject: [PATCH 151/170] IB/ucma: Fix lockdep warning in ucma_lock_files The ucma_lock_files() locks the mut mutex on two files, e.g. for migrating an ID. Use mutex_lock_nested() to prevent the warning below. ============================================= [ INFO: possible recursive locking detected ] 4.1.0-rc6-hmm+ #40 Tainted: G O --------------------------------------------- pingpong_rpc_se/10260 is trying to acquire lock: (&file->mut){+.+.+.}, at: [] ucma_migrate_id+0xc5/0x248 [rdma_ucm] but task is already holding lock: (&file->mut){+.+.+.}, at: [] ucma_migrate_id+0xbb/0x248 [rdma_ucm] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&file->mut); lock(&file->mut); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by pingpong_rpc_se/10260: #0: (&file->mut){+.+.+.}, at: [] ucma_migrate_id+0xbb/0x248 [rdma_ucm] stack backtrace: CPU: 0 PID: 10260 Comm: pingpong_rpc_se Tainted: G O 4.1.0-rc6-hmm+ #40 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007 ffff8801f85b63d0 ffff880195677b58 ffffffff81668f49 0000000000000001 ffffffff825cbbe0 ffff880195677c38 ffffffff810bb991 ffff880100000000 ffff880100000000 ffff880100000001 ffff8801f85b7010 ffffffff8121bee9 Call Trace: [] dump_stack+0x4f/0x6e [] __lock_acquire+0x741/0x1820 [] ? dput+0x29/0x320 [] lock_acquire+0xc8/0x240 [] ? ucma_migrate_id+0xc5/0x248 [rdma_ucm] [] ? mutex_lock_nested+0x291/0x3e0 [] mutex_lock_nested+0x65/0x3e0 [] ? ucma_migrate_id+0xc5/0x248 [rdma_ucm] [] ? trace_hardirqs_on+0xd/0x10 [] ? mutex_unlock+0xe/0x10 [] ucma_migrate_id+0xc5/0x248 [rdma_ucm] [] ucma_write+0xa4/0xb0 [rdma_ucm] [] __vfs_write+0x34/0x100 [] ? __audit_syscall_entry+0xac/0x110 [] ? current_kernel_time+0xc5/0xe0 [] ? security_file_permission+0x23/0x90 [] ? rw_verify_area+0x5d/0xe0 [] vfs_write+0xab/0x120 [] SyS_write+0x59/0xd0 [] ? __audit_syscall_entry+0xac/0x110 [] system_call_fastpath+0x12/0x76 Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ad45469f7582..a700cdb13f3d 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1354,10 +1354,10 @@ static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) /* Acquire mutex's based on pointer comparison to prevent deadlock. */ if (file1 < file2) { mutex_lock(&file1->mut); - mutex_lock(&file2->mut); + mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); } else { mutex_lock(&file2->mut); - mutex_lock(&file1->mut); + mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); } } From 8b7cce0dae956b329d09099f8739821936cf7c0f Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Tue, 7 Jul 2015 17:45:13 +0300 Subject: [PATCH 152/170] IB/ipoib: Prevent lockdep warning in __ipoib_ib_dev_flush __ipoib_ib_dev_flush calls itself recursively on child devices, and lockdep complains about locking vlan_rwsem twice (see below). Use down_read_nested instead of down_read to prevent the warning. ============================================= [ INFO: possible recursive locking detected ] 4.1.0-rc4+ #36 Tainted: G O --------------------------------------------- kworker/u20:2/261 is trying to acquire lock: (&priv->vlan_rwsem){.+.+..}, at: [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] but task is already holding lock: (&priv->vlan_rwsem){.+.+..}, at: [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&priv->vlan_rwsem); lock(&priv->vlan_rwsem); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/u20:2/261: #0: ("%s""ipoib_flush"){.+.+..}, at: [] process_one_work+0x15c/0x760 #1: ((&priv->flush_heavy)){+.+...}, at: [] process_one_work+0x15c/0x760 #2: (&priv->vlan_rwsem){.+.+..}, at: [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] stack backtrace: CPU: 3 PID: 261 Comm: kworker/u20:2 Tainted: G O 4.1.0-rc4+ #36 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007 Workqueue: ipoib_flush ipoib_ib_dev_flush_heavy [ib_ipoib] ffff8801c6c54790 ffff8801c9927af8 ffffffff81665238 0000000000000001 ffffffff825b5b30 ffff8801c9927bd8 ffffffff810bba51 ffff880100000000 ffffffff00000001 ffff880100000001 ffff8801c6c55428 ffff8801c6c54790 Call Trace: [] dump_stack+0x4f/0x6f [] __lock_acquire+0x741/0x1820 [] lock_acquire+0xc8/0x240 [] ? __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] [] down_read+0x4c/0x70 [] ? __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] [] __ipoib_ib_dev_flush+0x3a/0x2b0 [ib_ipoib] [] __ipoib_ib_dev_flush+0x5a/0x2b0 [ib_ipoib] [] ipoib_ib_dev_flush_heavy+0x1a/0x20 [ib_ipoib] [] process_one_work+0x201/0x760 [] ? process_one_work+0x15c/0x760 [] worker_thread+0x120/0x4d0 [] ? process_one_work+0x760/0x760 [] ? process_one_work+0x760/0x760 [] kthread+0xfe/0x120 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x42/0x70 [] ? __init_kthread_worker+0x70/0x70 Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 63b92cbb29ad..058150ff5aa1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -985,20 +985,21 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv) } static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, - enum ipoib_flush_level level) + enum ipoib_flush_level level, + int nesting) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; int result; - down_read(&priv->vlan_rwsem); + down_read_nested(&priv->vlan_rwsem, nesting); /* * Flush any child interfaces too -- they might be up even if * the parent is down. */ list_for_each_entry(cpriv, &priv->child_intfs, list) - __ipoib_ib_dev_flush(cpriv, level); + __ipoib_ib_dev_flush(cpriv, level, nesting + 1); up_read(&priv->vlan_rwsem); @@ -1076,7 +1077,7 @@ void ipoib_ib_dev_flush_light(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_light); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0); } void ipoib_ib_dev_flush_normal(struct work_struct *work) @@ -1084,7 +1085,7 @@ void ipoib_ib_dev_flush_normal(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_normal); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0); } void ipoib_ib_dev_flush_heavy(struct work_struct *work) @@ -1092,7 +1093,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_heavy); - __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY); + __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); } void ipoib_ib_dev_cleanup(struct net_device *dev) From 59d40dd92ca00df317ad44fa44c27fe38c58e477 Mon Sep 17 00:00:00 2001 From: Carol L Soto Date: Thu, 11 Jun 2015 11:06:41 -0500 Subject: [PATCH 153/170] IB/ucm: Fix bitmap wrap when devnum > IB_UCM_MAX_DEVICES ib_ucm_release_dev clears the wrong bit if devnum is greater than IB_UCM_MAX_DEVICES. Signed-off-by: Carol L Soto Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 62c24b1452b8..009481073644 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1193,6 +1193,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } +static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); static void ib_ucm_release_dev(struct device *dev) { struct ib_ucm_device *ucm_dev; @@ -1202,7 +1203,7 @@ static void ib_ucm_release_dev(struct device *dev) if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) clear_bit(ucm_dev->devnum, dev_map); else - clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map); + clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map); kfree(ucm_dev); } @@ -1226,7 +1227,6 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static dev_t overflow_maj; -static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); static int find_overflow_devnum(void) { int ret; From c42687784b9a6b9733fee701ed236a5fe088fac4 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Sun, 12 Jul 2015 01:24:09 -0700 Subject: [PATCH 154/170] IB/ipoib: Scatter-Gather support in connected mode By default, IPoIB-CM driver uses 64k MTU. Larger MTU gives better performance. This MTU plus overhead puts the memory allocation for IP based packets at 32 4k pages (order 5), which have to be contiguous. When the system memory under pressure, it was observed that allocating 128k contiguous physical memory is difficult and causes serious errors (such as system becomes unusable). This enhancement resolve the issue by removing the physically contiguous memory requirement using Scatter/Gather feature that exists in Linux stack. With this fix Scatter-Gather will be supported also in connected mode. This change reverts some of the change made in commit e112373fd6aa ("IPoIB/cm: Reduce connected mode TX object size"). The ability to use SG in IPoIB CM is possible because the coupling between NETIF_F_SG and NETIF_F_CSUM was removed in commit ec5f06156423 ("net: Kill link between CSUM and SG features.") Signed-off-by: Yuval Shaia Acked-by: Christian Marie Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 29 +++++++++++++++++- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 33 +++++++++------------ drivers/infiniband/ulp/ipoib/ipoib_ib.c | 36 +++++++---------------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 4 files changed, 54 insertions(+), 46 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bd94b0a6e9e5..79859c4d43c9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -239,7 +239,7 @@ struct ipoib_cm_tx { struct net_device *dev; struct ipoib_neigh *neigh; struct ipoib_path *path; - struct ipoib_cm_tx_buf *tx_ring; + struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; unsigned long flags; @@ -504,6 +504,33 @@ int ipoib_mcast_stop_thread(struct net_device *dev); void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev); +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); +void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req); + +static inline void ipoib_build_sge(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req) +{ + int i, off; + struct sk_buff *skb = tx_req->skb; + skb_frag_t *frags = skb_shinfo(skb)->frags; + int nr_frags = skb_shinfo(skb)->nr_frags; + u64 *mapping = tx_req->mapping; + + if (skb_headlen(skb)) { + priv->tx_sge[0].addr = mapping[0]; + priv->tx_sge[0].length = skb_headlen(skb); + off = 1; + } else + off = 0; + + for (i = 0; i < nr_frags; ++i) { + priv->tx_sge[i + off].addr = mapping[i + off]; + priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); + } + priv->tx_wr.num_sge = nr_frags + off; +} + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev); int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index cf32a778e7d0..ee39be6ccfb0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -694,14 +694,12 @@ repost: static inline int post_send(struct ipoib_dev_priv *priv, struct ipoib_cm_tx *tx, unsigned int wr_id, - u64 addr, int len) + struct ipoib_tx_buf *tx_req) { struct ib_send_wr *bad_wr; - priv->tx_sge[0].addr = addr; - priv->tx_sge[0].length = len; + ipoib_build_sge(priv, tx_req); - priv->tx_wr.num_sge = 1; priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); @@ -710,8 +708,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_cm_tx_buf *tx_req; - u64 addr; + struct ipoib_tx_buf *tx_req; int rc; if (unlikely(skb->len > tx->mtu)) { @@ -735,24 +732,21 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ */ tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; tx_req->skb = skb; - addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { + + if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { ++dev->stats.tx_errors; dev_kfree_skb_any(skb); return; } - tx_req->mapping = addr; - skb_orphan(skb); skb_dst_drop(skb); - rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), - addr, skb->len); + rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req); if (unlikely(rc)) { ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; - ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); } else { dev->trans_start = jiffies; @@ -777,7 +771,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx *tx = wc->qp->qp_context; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; - struct ipoib_cm_tx_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long flags; ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", @@ -791,7 +785,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &tx->tx_ring[wr_id]; - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); /* FIXME: is this right? Shouldn't we only increment on success? */ ++dev->stats.tx_packets; @@ -1036,6 +1030,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; + if (dev->features & NETIF_F_SG) + attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; + tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", @@ -1170,7 +1167,7 @@ err_tx: static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); - struct ipoib_cm_tx_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long begin; ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", @@ -1197,8 +1194,7 @@ timeout: while ((int) p->tx_tail - (int) p->tx_head < 0) { tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, - DMA_TO_DEVICE); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; netif_tx_lock_bh(p->dev); @@ -1455,7 +1451,6 @@ static void ipoib_cm_stale_task(struct work_struct *work) spin_unlock_irq(&priv->lock); } - static ssize_t show_mode(struct device *d, struct device_attribute *attr, char *buf) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 058150ff5aa1..d266667ca9b8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -263,8 +263,7 @@ repost: "for buf %d\n", wr_id); } -static int ipoib_dma_map_tx(struct ib_device *ca, - struct ipoib_tx_buf *tx_req) +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req) { struct sk_buff *skb = tx_req->skb; u64 *mapping = tx_req->mapping; @@ -305,8 +304,8 @@ partial_error: return -EIO; } -static void ipoib_dma_unmap_tx(struct ib_device *ca, - struct ipoib_tx_buf *tx_req) +void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv, + struct ipoib_tx_buf *tx_req) { struct sk_buff *skb = tx_req->skb; u64 *mapping = tx_req->mapping; @@ -314,7 +313,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, int off; if (skb_headlen(skb)) { - ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE); + ib_dma_unmap_single(priv->ca, mapping[0], skb_headlen(skb), + DMA_TO_DEVICE); off = 1; } else off = 0; @@ -322,8 +322,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - ib_dma_unmap_page(ca, mapping[i + off], skb_frag_size(frag), - DMA_TO_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[i + off], + skb_frag_size(frag), DMA_TO_DEVICE); } } @@ -389,7 +389,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) tx_req = &priv->tx_ring[wr_id]; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); ++dev->stats.tx_packets; dev->stats.tx_bytes += tx_req->skb->len; @@ -514,24 +514,10 @@ static inline int post_send(struct ipoib_dev_priv *priv, void *head, int hlen) { struct ib_send_wr *bad_wr; - int i, off; struct sk_buff *skb = tx_req->skb; - skb_frag_t *frags = skb_shinfo(skb)->frags; - int nr_frags = skb_shinfo(skb)->nr_frags; - u64 *mapping = tx_req->mapping; - if (skb_headlen(skb)) { - priv->tx_sge[0].addr = mapping[0]; - priv->tx_sge[0].length = skb_headlen(skb); - off = 1; - } else - off = 0; + ipoib_build_sge(priv, tx_req); - for (i = 0; i < nr_frags; ++i) { - priv->tx_sge[i + off].addr = mapping[i + off]; - priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); - } - priv->tx_wr.num_sge = nr_frags + off; priv->tx_wr.wr_id = wr_id; priv->tx_wr.wr.ud.remote_qpn = qpn; priv->tx_wr.wr.ud.ah = address; @@ -617,7 +603,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; --priv->tx_outstanding; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(skb); if (netif_queue_stopped(dev)) netif_wake_queue(dev); @@ -868,7 +854,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) while ((int) priv->tx_tail - (int) priv->tx_head < 0) { tx_req = &priv->tx_ring[priv->tx_tail & (ipoib_sendq_size - 1)]; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; --priv->tx_outstanding; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index da3e7e7d5e80..59604a7e5e3e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -190,7 +190,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu struct ipoib_dev_priv *priv = netdev_priv(dev); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) - features &= ~(NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO); + features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO); return features; } From edcd2a7474ba3b47e54c3c9a300287342de74766 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 7 Jun 2015 13:36:11 +0300 Subject: [PATCH 155/170] IB/ipoib: Set MTU to max allowed by mode when mode changes When switching between modes (datagram / connected) change the MTU accordingly. datagram mode up to 4K, connected mode up to (64K - 0x10). Signed-off-by: ELi Cohen Signed-off-by: Erez Shitrit Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 59604a7e5e3e..b2943c84a5dd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -232,6 +232,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); netdev_update_features(dev); + dev_set_mtu(dev, ipoib_cm_max_mtu(dev)); rtnl_unlock(); priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; From cb1ff431c3dd904cda37d6d07d4a3ea29840d621 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Tue, 16 Jun 2015 17:13:05 +0530 Subject: [PATCH 156/170] IB/ipath: Convert use of __constant_ to In little endian cases, the macros be16_to_cpu and cpu_to_be64 unfolds to __swab{16,64} which provides special case for constants. In big endian cases, __constant_be16_to_cpu and be16_to_cpu expand directly to the same expression. The same applies for __constant_cpu_to_be64 and cpu_to_be64. So, replace __constant_be16_to_cpu with be16_to_cpu and __constant_cpu_to_be64 with cpu_to_be64, with the goal of getting rid of the definition of __constant_be16_to_cpu and __constant_cpu_to_be64 completely. Signed-off-by: Vaishali Thakkar Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ipath/ipath_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 48253b839a6f..30ba49c4a98c 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -2044,9 +2044,9 @@ int ipath_register_ib_device(struct ipath_devdata *dd) spin_lock_init(&idev->qp_table.lock); spin_lock_init(&idev->lk_table.lock); - idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); + idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE); /* Set the prefix to the default value (see ch. 4.1.1) */ - idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL); + idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL); ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size); if (ret) From 43bfb9729ea88d46e3f4d3ad7b17106c7b071fcb Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 25 Jun 2015 17:45:38 +0300 Subject: [PATCH 157/170] IB/mlx4: Fix use of flow-counters for process_mad For IB links, reading HCA flow counters through iboe_process_mad() should be used when mlx4_ib_process_mad() is invoked only for VFs PMA queries and exactly nothing else. Fixes: 7193a141eb74 ('IB/mlx4: Set VF to read from QP counters') Reported-by: Linus Torvalds Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9a810e302f6b..68b3dfa922bf 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -860,22 +860,31 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct mlx4_ib_dev *dev = to_mdev(ibdev); const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; + enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || *out_mad_size != sizeof(*out_mad))) return IB_MAD_RESULT_FAILURE; - switch (rdma_port_get_link_layer(ibdev, port_num)) { - case IB_LINK_LAYER_INFINIBAND: - if (!mlx4_is_slave(dev->dev)) - return ib_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); - case IB_LINK_LAYER_ETHERNET: - return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); - default: - return -EINVAL; + /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA + * queries, should be called only by VFs and for that specific purpose + */ + if (link == IB_LINK_LAYER_INFINIBAND) { + if (mlx4_is_slave(dev->dev) && + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS) + return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); } + + if (link == IB_LINK_LAYER_ETHERNET) + return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + + return -EINVAL; } static void send_handler(struct ib_mad_agent *agent, From a39a98ff4cc8b514fe6fa551f6ed59cd60e07da2 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Wed, 8 Jul 2015 09:43:35 +0530 Subject: [PATCH 158/170] IB/mlx4: Optimize freeing of items on error unwind On failure, we loop through all possible pointers and test them before calling kfree. But really, why even attempt to free items we didn't allocate when we can easily loop through exactly and only the devices for which the original memory allocation succeeded and free just those. Signed-off-by: Maninder Singh Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f419a728a22b..064454aee863 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2670,17 +2670,15 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); if (!dm) { pr_err("failed to allocate memory for tunneling qp update\n"); - goto out; + return; } for (i = 0; i < ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { pr_err("failed to allocate memory for tunneling qp update work struct\n"); - for (i = 0; i < dev->caps.num_ports; i++) { - if (dm[i]) - kfree(dm[i]); - } + while (--i >= 0) + kfree(dm[i]); goto out; } } From 9bbf282da87294e1bda0ccb4e351bfdf5fc076cd Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 9 Jul 2015 10:16:12 -0400 Subject: [PATCH 159/170] IB/mlx4: Fix memory leak in do_slave_init We create a number of work structs to be queued up to a workqueue, and on completion of the workqueue handler, the workqueue handler frees the allocated memory. If, however, we don't queue the work struct because the device is going down, then we need to free the memory ourselves. Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 064454aee863..1c59e4749736 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2692,6 +2692,8 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); if (!ibdev->sriov.is_going_down) queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); + else + kfree(dm[i]); spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); } out: From d9a047aeffcef5755952d18f2901d8777d84019d Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 9 Jul 2015 10:21:08 -0400 Subject: [PATCH 160/170] IB/mlx4: Optimize do_slave_init There is little chance our memory allocation will fail, so we can combine initializing the work structs with allocating them instead of looping through all of them once to allocate and again to initialize. Then when we need to actually find out if our device is up or in the process of going down, have all of our work structs batched up, take the spin_lock once and only once, and do all of the batch under the one spin_lock invocation instead of incurring all of the locked memory cycles we would otherwise incur to take/release the spin_lock over and over again. Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1c59e4749736..8be6db816460 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2681,20 +2681,22 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) kfree(dm[i]); goto out; } - } - /* initialize or tear down tunnel QPs for the slave */ - for (i = 0; i < ports; i++) { INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); dm[i]->port = first_port + i + 1; dm[i]->slave = slave; dm[i]->do_init = do_init; dm[i]->dev = ibdev; - spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); - if (!ibdev->sriov.is_going_down) + } + /* initialize or tear down tunnel QPs for the slave */ + spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); + if (!ibdev->sriov.is_going_down) { + for (i = 0; i < ports; i++) queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); - else - kfree(dm[i]); spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + } else { + spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + for (i = 0; i < ports; i++) + kfree(dm[i]); } out: kfree(dm); From 45d254206f141f560e76319191f912d0a3d27bd3 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jul 2015 17:21:15 +0200 Subject: [PATCH 161/170] IB/core: Destroy multcast_idr on module exit Destroy multcast_idr on module exit, reclaiming the allocated memory. This was detected by the following semantic patch (written by Luis Rodriguez ) @ defines_module_init @ declarer name module_init, module_exit; declarer name DEFINE_IDR; identifier init; @@ module_init(init); @ defines_module_exit @ identifier exit; @@ module_exit(exit); @ declares_idr depends on defines_module_init && defines_module_exit @ identifier idr; @@ DEFINE_IDR(idr); @ on_exit_calls_destroy depends on declares_idr && defines_module_exit @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... idr_destroy(&idr); ... } @ missing_module_idr_destroy depends on declares_idr && defines_module_exit && !on_exit_calls_destroy @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... +idr_destroy(&idr); } Signed-off-by: Johannes Thumshirn Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index a700cdb13f3d..29b21213ea75 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1616,6 +1616,7 @@ static void __exit ucma_cleanup(void) device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); idr_destroy(&ctx_idr); + idr_destroy(&multicast_idr); } module_init(ucma_init); From d8b2ba7c5928173fe1c12bd2545f5ed85d1c3c7a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 8 Jul 2015 17:23:00 +0200 Subject: [PATCH 162/170] IB/core: Destroy ocrdma_dev_id IDR on module exit Destroy ocrdma_dev_id IDR on module exit, reclaiming the allocated memory. This was detected by the following semantic patch (written by Luis Rodriguez ) @ defines_module_init @ declarer name module_init, module_exit; declarer name DEFINE_IDR; identifier init; @@ module_init(init); @ defines_module_exit @ identifier exit; @@ module_exit(exit); @ declares_idr depends on defines_module_init && defines_module_exit @ identifier idr; @@ DEFINE_IDR(idr); @ on_exit_calls_destroy depends on declares_idr && defines_module_exit @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... idr_destroy(&idr); ... } @ missing_module_idr_destroy depends on declares_idr && defines_module_exit && !on_exit_calls_destroy @ identifier declares_idr.idr, defines_module_exit.exit; @@ exit(void) { ... +idr_destroy(&idr); } Signed-off-by: Johannes Thumshirn Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 8a1398b253a2..d98a707a5eb9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -696,6 +696,7 @@ static void __exit ocrdma_exit_module(void) ocrdma_unregister_inet6addr_notifier(); ocrdma_unregister_inetaddr_notifier(); ocrdma_rem_debugfs(); + idr_destroy(&ocrdma_dev_id); } module_init(ocrdma_init_module); From 8642d7f8364d9fa84a83629129e78402c169c54a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 13 Jul 2015 16:44:54 +0200 Subject: [PATCH 163/170] intel_scu_ipc: move local memory initialization out of a mutex '{ }' and memset will both reset the cbuf buffer. Only once is enough and this can be done outside fo the mutex. Signed-off-by: Christophe JAILLET Signed-off-by: Darren Hart --- drivers/platform/x86/intel_scu_ipc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 001b199a8c33..187d1086d15c 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -216,13 +216,13 @@ static int pwr_reg_rdwr(u16 *addr, u8 *data, u32 count, u32 op, u32 id) int nc; u32 offset = 0; int err; - u8 cbuf[IPC_WWBUF_SIZE] = { }; + u8 cbuf[IPC_WWBUF_SIZE]; u32 *wbuf = (u32 *)&cbuf; - mutex_lock(&ipclock); - memset(cbuf, 0, sizeof(cbuf)); + mutex_lock(&ipclock); + if (ipcdev.pdev == NULL) { mutex_unlock(&ipclock); return -ENODEV; From 26456955719b79cc4a011b18221aa68f599f6b6c Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 15 Jul 2015 12:52:47 -0500 Subject: [PATCH 164/170] jfs: clean up jfs_rename and fix out of order unlock The end of jfs_rename(), which is also used by the error paths, included a call to IWRITE_UNLOCK(new_ip) after labels out1, out2 and out3. If we come in through these labels, IWRITE_LOCK() has not been called yet. In moving that call to the correct spot, I also moved some exceptional truncate code earlier as well, since the early error paths don't need to deal with it, and I renamed out4: to out_tx: so a future patch by Jan Kara doesn't need to deal with renumbering or confusing out-of-order labels. Signed-off-by: Dave Kleikamp --- fs/jfs/namei.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 66db7bc0ed10..f7b40370a856 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1161,7 +1161,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, rc = dtModify(tid, new_dir, &new_dname, &ino, old_ip->i_ino, JFS_RENAME); if (rc) - goto out4; + goto out_tx; drop_nlink(new_ip); if (S_ISDIR(new_ip->i_mode)) { drop_nlink(new_ip); @@ -1186,7 +1186,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if ((new_size = commitZeroLink(tid, new_ip)) < 0) { txAbort(tid, 1); /* Marks FS Dirty */ rc = new_size; - goto out4; + goto out_tx; } tblk = tid_to_tblock(tid); tblk->xflag |= COMMIT_DELETE; @@ -1204,7 +1204,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (rc) { jfs_err("jfs_rename didn't expect dtSearch to fail " "w/rc = %d", rc); - goto out4; + goto out_tx; } ino = old_ip->i_ino; @@ -1212,7 +1212,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (rc) { if (rc == -EIO) jfs_err("jfs_rename: dtInsert returned -EIO"); - goto out4; + goto out_tx; } if (S_ISDIR(old_ip->i_mode)) inc_nlink(new_dir); @@ -1227,7 +1227,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, jfs_err("jfs_rename did not expect dtDelete to return rc = %d", rc); txAbort(tid, 1); /* Marks Filesystem dirty */ - goto out4; + goto out_tx; } if (S_ISDIR(old_ip->i_mode)) { drop_nlink(old_dir); @@ -1286,7 +1286,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, rc = txCommit(tid, ipcount, iplist, commit_flag); - out4: + out_tx: txEnd(tid); if (new_ip) mutex_unlock(&JFS_IP(new_ip)->commit_mutex); @@ -1309,13 +1309,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (new_ip && (new_ip->i_nlink == 0)) set_cflag(COMMIT_Nolink, new_ip); - out3: - free_UCSname(&new_dname); - out2: - free_UCSname(&old_dname); - out1: - if (new_ip && !S_ISDIR(new_ip->i_mode)) - IWRITE_UNLOCK(new_ip); /* * Truncating the directory index table is not guaranteed. It * may need to be done iteratively @@ -1326,7 +1319,13 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, clear_cflag(COMMIT_Stale, old_dir); } - + if (new_ip && !S_ISDIR(new_ip->i_mode)) + IWRITE_UNLOCK(new_ip); + out3: + free_UCSname(&new_dname); + out2: + free_UCSname(&old_dname); + out1: jfs_info("jfs_rename: returning %d", rc); return rc; } From 7bee607472aa2e5a36dfe143e5a625be06125f53 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 14 Jul 2015 11:57:48 -0600 Subject: [PATCH 165/170] NVMe: Reread partitions on metadata formats This patch has the driver automatically reread partitions if a namespace has a separate metadata format. Previously revalidating a disk was sufficient to get the correct capacity set on such formatted drives, but partitions that may exist would not have been surfaced. Reported-by: Paul Grabinar Signed-off-by: Keith Busch Cc: Matthew Wilcox Tested-by: Paul Grabinar Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index d1d6141920d3..7920c2741b47 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2108,8 +2108,17 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) goto out_free_disk; add_disk(ns->disk); - if (ns->ms) - revalidate_disk(ns->disk); + if (ns->ms) { + struct block_device *bd = bdget_disk(ns->disk, 0); + if (!bd) + return; + if (blkdev_get(bd, FMODE_READ, NULL)) { + bdput(bd); + return; + } + blkdev_reread_part(bd); + blkdev_put(bd, FMODE_READ); + } return; out_free_disk: kfree(disk); From e56f698bd0720e17f10f39e8b0b5b446ad0ab22c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 16 Jul 2015 19:53:22 +0800 Subject: [PATCH 166/170] blk-mq: set default timeout as 30 seconds It is reasonable to set default timeout of request as 30 seconds instead of 30000 ticks, which may be 300 seconds if HZ is 100, for example, some arm64 based systems may choose 100 HZ. Signed-off-by: Ming Lei Fixes: c76cbbcf4044 ("blk-mq: put blk_queue_rq_timeout together in blk_mq_init_queue()" Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f53779692c77..7d842db59699 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1998,7 +1998,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, goto err_hctxs; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); - blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30000); + blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); q->nr_queues = nr_cpu_ids; q->nr_hw_queues = set->nr_hw_queues; From c2efefb33abfb245395199137ece3c1e3df47f51 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Jul 2015 22:53:43 +0200 Subject: [PATCH 167/170] ACPI / scan: Move sysfs-related device code to a separate file To reduce the size of scan.c and improve the readability of it, move all code related to device sysfs, modalias creation etc. to a new file called device_sysfs.c. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Makefile | 2 +- drivers/acpi/device_sysfs.c | 521 ++++++++++++++++++++++++++++++++++++ drivers/acpi/internal.h | 9 + drivers/acpi/scan.c | 483 +-------------------------------- 4 files changed, 532 insertions(+), 483 deletions(-) create mode 100644 drivers/acpi/device_sysfs.c diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 8321430d7f24..08ac1100e2dc 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -24,7 +24,7 @@ acpi-y += nvs.o # Power management related files acpi-y += wakeup.o acpi-$(CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT) += sleep.o -acpi-y += device_pm.o +acpi-y += device_sysfs.o device_pm.o acpi-$(CONFIG_ACPI_SLEEP) += proc.o diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c new file mode 100644 index 000000000000..4ab4582e586b --- /dev/null +++ b/drivers/acpi/device_sysfs.c @@ -0,0 +1,521 @@ +/* + * drivers/acpi/device_sysfs.c - ACPI device sysfs attributes and modalias. + * + * Copyright (C) 2015, Intel Corp. + * Author: Mika Westerberg + * Author: Rafael J. Wysocki + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include + +#include "internal.h" + +/** + * create_pnp_modalias - Create hid/cid(s) string for modalias and uevent + * @acpi_dev: ACPI device object. + * @modalias: Buffer to print into. + * @size: Size of the buffer. + * + * Creates hid/cid(s) string needed for modalias and uevent + * e.g. on a device with hid:IBM0001 and cid:ACPI0001 you get: + * char *modalias: "acpi:IBM0001:ACPI0001" + * Return: 0: no _HID and no _CID + * -EINVAL: output error + * -ENOMEM: output is truncated +*/ +static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, + int size) +{ + int len; + int count; + struct acpi_hardware_id *id; + + /* + * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should + * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the + * device's list. + */ + count = 0; + list_for_each_entry(id, &acpi_dev->pnp.ids, list) + if (strcmp(id->id, ACPI_DT_NAMESPACE_HID)) + count++; + + if (!count) + return 0; + + len = snprintf(modalias, size, "acpi:"); + if (len <= 0) + return len; + + size -= len; + + list_for_each_entry(id, &acpi_dev->pnp.ids, list) { + if (!strcmp(id->id, ACPI_DT_NAMESPACE_HID)) + continue; + + count = snprintf(&modalias[len], size, "%s:", id->id); + if (count < 0) + return -EINVAL; + + if (count >= size) + return -ENOMEM; + + len += count; + size -= count; + } + modalias[len] = '\0'; + return len; +} + +/** + * create_of_modalias - Creates DT compatible string for modalias and uevent + * @acpi_dev: ACPI device object. + * @modalias: Buffer to print into. + * @size: Size of the buffer. + * + * Expose DT compatible modalias as of:NnameTCcompatible. This function should + * only be called for devices having ACPI_DT_NAMESPACE_HID in their list of + * ACPI/PNP IDs. + */ +static int create_of_modalias(struct acpi_device *acpi_dev, char *modalias, + int size) +{ + struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; + const union acpi_object *of_compatible, *obj; + int len, count; + int i, nval; + char *c; + + acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); + /* DT strings are all in lower case */ + for (c = buf.pointer; *c != '\0'; c++) + *c = tolower(*c); + + len = snprintf(modalias, size, "of:N%sT", (char *)buf.pointer); + ACPI_FREE(buf.pointer); + + if (len <= 0) + return len; + + of_compatible = acpi_dev->data.of_compatible; + if (of_compatible->type == ACPI_TYPE_PACKAGE) { + nval = of_compatible->package.count; + obj = of_compatible->package.elements; + } else { /* Must be ACPI_TYPE_STRING. */ + nval = 1; + obj = of_compatible; + } + for (i = 0; i < nval; i++, obj++) { + count = snprintf(&modalias[len], size, "C%s", + obj->string.pointer); + if (count < 0) + return -EINVAL; + + if (count >= size) + return -ENOMEM; + + len += count; + size -= count; + } + modalias[len] = '\0'; + return len; +} + +int __acpi_device_uevent_modalias(struct acpi_device *adev, + struct kobj_uevent_env *env) +{ + int len; + + if (!adev) + return -ENODEV; + + if (list_empty(&adev->pnp.ids)) + return 0; + + if (add_uevent_var(env, "MODALIAS=")) + return -ENOMEM; + + len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], + sizeof(env->buf) - env->buflen); + if (len < 0) + return len; + + env->buflen += len; + if (!adev->data.of_compatible) + return 0; + + if (len > 0 && add_uevent_var(env, "MODALIAS=")) + return -ENOMEM; + + len = create_of_modalias(adev, &env->buf[env->buflen - 1], + sizeof(env->buf) - env->buflen); + if (len < 0) + return len; + + env->buflen += len; + + return 0; +} + +/** + * acpi_device_uevent_modalias - uevent modalias for ACPI-enumerated devices. + * + * Create the uevent modalias field for ACPI-enumerated devices. + * + * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with + * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001". + */ +int acpi_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) +{ + return __acpi_device_uevent_modalias(acpi_companion_match(dev), env); +} +EXPORT_SYMBOL_GPL(acpi_device_uevent_modalias); + +static int __acpi_device_modalias(struct acpi_device *adev, char *buf, int size) +{ + int len, count; + + if (!adev) + return -ENODEV; + + if (list_empty(&adev->pnp.ids)) + return 0; + + len = create_pnp_modalias(adev, buf, size - 1); + if (len < 0) { + return len; + } else if (len > 0) { + buf[len++] = '\n'; + size -= len; + } + if (!adev->data.of_compatible) + return len; + + count = create_of_modalias(adev, buf + len, size - 1); + if (count < 0) { + return count; + } else if (count > 0) { + len += count; + buf[len++] = '\n'; + } + + return len; +} + +/** + * acpi_device_modalias - modalias sysfs attribute for ACPI-enumerated devices. + * + * Create the modalias sysfs attribute for ACPI-enumerated devices. + * + * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with + * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001". + */ +int acpi_device_modalias(struct device *dev, char *buf, int size) +{ + return __acpi_device_modalias(acpi_companion_match(dev), buf, size); +} +EXPORT_SYMBOL_GPL(acpi_device_modalias); + +static ssize_t +acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { + return __acpi_device_modalias(to_acpi_device(dev), buf, 1024); +} +static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL); + +static ssize_t real_power_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_device *adev = to_acpi_device(dev); + int state; + int ret; + + ret = acpi_device_get_power(adev, &state); + if (ret) + return ret; + + return sprintf(buf, "%s\n", acpi_power_state_string(state)); +} + +static DEVICE_ATTR(real_power_state, 0444, real_power_state_show, NULL); + +static ssize_t power_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_device *adev = to_acpi_device(dev); + + return sprintf(buf, "%s\n", acpi_power_state_string(adev->power.state)); +} + +static DEVICE_ATTR(power_state, 0444, power_state_show, NULL); + +static ssize_t +acpi_eject_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct acpi_device *acpi_device = to_acpi_device(d); + acpi_object_type not_used; + acpi_status status; + + if (!count || buf[0] != '1') + return -EINVAL; + + if ((!acpi_device->handler || !acpi_device->handler->hotplug.enabled) + && !acpi_device->driver) + return -ENODEV; + + status = acpi_get_type(acpi_device->handle, ¬_used); + if (ACPI_FAILURE(status) || !acpi_device->flags.ejectable) + return -ENODEV; + + get_device(&acpi_device->dev); + status = acpi_hotplug_schedule(acpi_device, ACPI_OST_EC_OSPM_EJECT); + if (ACPI_SUCCESS(status)) + return count; + + put_device(&acpi_device->dev); + acpi_evaluate_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT, + ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); + return status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN; +} + +static DEVICE_ATTR(eject, 0200, NULL, acpi_eject_store); + +static ssize_t +acpi_device_hid_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + + return sprintf(buf, "%s\n", acpi_device_hid(acpi_dev)); +} +static DEVICE_ATTR(hid, 0444, acpi_device_hid_show, NULL); + +static ssize_t acpi_device_uid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_device *acpi_dev = to_acpi_device(dev); + + return sprintf(buf, "%s\n", acpi_dev->pnp.unique_id); +} +static DEVICE_ATTR(uid, 0444, acpi_device_uid_show, NULL); + +static ssize_t acpi_device_adr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_device *acpi_dev = to_acpi_device(dev); + + return sprintf(buf, "0x%08x\n", + (unsigned int)(acpi_dev->pnp.bus_address)); +} +static DEVICE_ATTR(adr, 0444, acpi_device_adr_show, NULL); + +static ssize_t +acpi_device_path_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + struct acpi_buffer path = {ACPI_ALLOCATE_BUFFER, NULL}; + int result; + + result = acpi_get_name(acpi_dev->handle, ACPI_FULL_PATHNAME, &path); + if (result) + goto end; + + result = sprintf(buf, "%s\n", (char*)path.pointer); + kfree(path.pointer); +end: + return result; +} +static DEVICE_ATTR(path, 0444, acpi_device_path_show, NULL); + +/* sysfs file that shows description text from the ACPI _STR method */ +static ssize_t description_show(struct device *dev, + struct device_attribute *attr, + char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + int result; + + if (acpi_dev->pnp.str_obj == NULL) + return 0; + + /* + * The _STR object contains a Unicode identifier for a device. + * We need to convert to utf-8 so it can be displayed. + */ + result = utf16s_to_utf8s( + (wchar_t *)acpi_dev->pnp.str_obj->buffer.pointer, + acpi_dev->pnp.str_obj->buffer.length, + UTF16_LITTLE_ENDIAN, buf, + PAGE_SIZE); + + buf[result++] = '\n'; + + return result; +} +static DEVICE_ATTR(description, 0444, description_show, NULL); + +static ssize_t +acpi_device_sun_show(struct device *dev, struct device_attribute *attr, + char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + acpi_status status; + unsigned long long sun; + + status = acpi_evaluate_integer(acpi_dev->handle, "_SUN", NULL, &sun); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return sprintf(buf, "%llu\n", sun); +} +static DEVICE_ATTR(sun, 0444, acpi_device_sun_show, NULL); + +static ssize_t status_show(struct device *dev, struct device_attribute *attr, + char *buf) { + struct acpi_device *acpi_dev = to_acpi_device(dev); + acpi_status status; + unsigned long long sta; + + status = acpi_evaluate_integer(acpi_dev->handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return sprintf(buf, "%llu\n", sta); +} +static DEVICE_ATTR_RO(status); + +/** + * acpi_device_setup_files - Create sysfs attributes of an ACPI device. + * @dev: ACPI device object. + */ +int acpi_device_setup_files(struct acpi_device *dev) +{ + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + acpi_status status; + int result = 0; + + /* + * Devices gotten from FADT don't have a "path" attribute + */ + if (dev->handle) { + result = device_create_file(&dev->dev, &dev_attr_path); + if (result) + goto end; + } + + if (!list_empty(&dev->pnp.ids)) { + result = device_create_file(&dev->dev, &dev_attr_hid); + if (result) + goto end; + + result = device_create_file(&dev->dev, &dev_attr_modalias); + if (result) + goto end; + } + + /* + * If device has _STR, 'description' file is created + */ + if (acpi_has_method(dev->handle, "_STR")) { + status = acpi_evaluate_object(dev->handle, "_STR", + NULL, &buffer); + if (ACPI_FAILURE(status)) + buffer.pointer = NULL; + dev->pnp.str_obj = buffer.pointer; + result = device_create_file(&dev->dev, &dev_attr_description); + if (result) + goto end; + } + + if (dev->pnp.type.bus_address) + result = device_create_file(&dev->dev, &dev_attr_adr); + if (dev->pnp.unique_id) + result = device_create_file(&dev->dev, &dev_attr_uid); + + if (acpi_has_method(dev->handle, "_SUN")) { + result = device_create_file(&dev->dev, &dev_attr_sun); + if (result) + goto end; + } + + if (acpi_has_method(dev->handle, "_STA")) { + result = device_create_file(&dev->dev, &dev_attr_status); + if (result) + goto end; + } + + /* + * If device has _EJ0, 'eject' file is created that is used to trigger + * hot-removal function from userland. + */ + if (acpi_has_method(dev->handle, "_EJ0")) { + result = device_create_file(&dev->dev, &dev_attr_eject); + if (result) + return result; + } + + if (dev->flags.power_manageable) { + result = device_create_file(&dev->dev, &dev_attr_power_state); + if (result) + return result; + + if (dev->power.flags.power_resources) + result = device_create_file(&dev->dev, + &dev_attr_real_power_state); + } + +end: + return result; +} + +/** + * acpi_device_remove_files - Remove sysfs attributes of an ACPI device. + * @dev: ACPI device object. + */ +void acpi_device_remove_files(struct acpi_device *dev) +{ + if (dev->flags.power_manageable) { + device_remove_file(&dev->dev, &dev_attr_power_state); + if (dev->power.flags.power_resources) + device_remove_file(&dev->dev, + &dev_attr_real_power_state); + } + + /* + * If device has _STR, remove 'description' file + */ + if (acpi_has_method(dev->handle, "_STR")) { + kfree(dev->pnp.str_obj); + device_remove_file(&dev->dev, &dev_attr_description); + } + /* + * If device has _EJ0, remove 'eject' file. + */ + if (acpi_has_method(dev->handle, "_EJ0")) + device_remove_file(&dev->dev, &dev_attr_eject); + + if (acpi_has_method(dev->handle, "_SUN")) + device_remove_file(&dev->dev, &dev_attr_sun); + + if (dev->pnp.unique_id) + device_remove_file(&dev->dev, &dev_attr_uid); + if (dev->pnp.type.bus_address) + device_remove_file(&dev->dev, &dev_attr_adr); + device_remove_file(&dev->dev, &dev_attr_modalias); + device_remove_file(&dev->dev, &dev_attr_hid); + if (acpi_has_method(dev->handle, "_STA")) + device_remove_file(&dev->dev, &dev_attr_status); + if (dev->handle) + device_remove_file(&dev->dev, &dev_attr_path); +} diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 4683a96932b9..c529454532dc 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -93,11 +93,20 @@ int acpi_device_add(struct acpi_device *device, void (*release)(struct device *)); void acpi_init_device_object(struct acpi_device *device, acpi_handle handle, int type, unsigned long long sta); +int acpi_device_setup_files(struct acpi_device *dev); +void acpi_device_remove_files(struct acpi_device *dev); void acpi_device_add_finalize(struct acpi_device *device); void acpi_free_pnp_ids(struct acpi_device_pnp *pnp); bool acpi_device_is_present(struct acpi_device *adev); bool acpi_device_is_battery(struct acpi_device *adev); +/* -------------------------------------------------------------------------- + Device Matching and Notification + -------------------------------------------------------------------------- */ +struct acpi_device *acpi_companion_match(const struct device *dev); +int __acpi_device_uevent_modalias(struct acpi_device *adev, + struct kobj_uevent_env *env); + /* -------------------------------------------------------------------------- Power Resource -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ec256352f423..099831fc8449 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -115,117 +115,6 @@ int acpi_scan_add_handler_with_hotplug(struct acpi_scan_handler *handler, return 0; } -/** - * create_pnp_modalias - Create hid/cid(s) string for modalias and uevent - * @acpi_dev: ACPI device object. - * @modalias: Buffer to print into. - * @size: Size of the buffer. - * - * Creates hid/cid(s) string needed for modalias and uevent - * e.g. on a device with hid:IBM0001 and cid:ACPI0001 you get: - * char *modalias: "acpi:IBM0001:ACPI0001" - * Return: 0: no _HID and no _CID - * -EINVAL: output error - * -ENOMEM: output is truncated -*/ -static int create_pnp_modalias(struct acpi_device *acpi_dev, char *modalias, - int size) -{ - int len; - int count; - struct acpi_hardware_id *id; - - /* - * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should - * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the - * device's list. - */ - count = 0; - list_for_each_entry(id, &acpi_dev->pnp.ids, list) - if (strcmp(id->id, ACPI_DT_NAMESPACE_HID)) - count++; - - if (!count) - return 0; - - len = snprintf(modalias, size, "acpi:"); - if (len <= 0) - return len; - - size -= len; - - list_for_each_entry(id, &acpi_dev->pnp.ids, list) { - if (!strcmp(id->id, ACPI_DT_NAMESPACE_HID)) - continue; - - count = snprintf(&modalias[len], size, "%s:", id->id); - if (count < 0) - return -EINVAL; - - if (count >= size) - return -ENOMEM; - - len += count; - size -= count; - } - modalias[len] = '\0'; - return len; -} - -/** - * create_of_modalias - Creates DT compatible string for modalias and uevent - * @acpi_dev: ACPI device object. - * @modalias: Buffer to print into. - * @size: Size of the buffer. - * - * Expose DT compatible modalias as of:NnameTCcompatible. This function should - * only be called for devices having ACPI_DT_NAMESPACE_HID in their list of - * ACPI/PNP IDs. - */ -static int create_of_modalias(struct acpi_device *acpi_dev, char *modalias, - int size) -{ - struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; - const union acpi_object *of_compatible, *obj; - int len, count; - int i, nval; - char *c; - - acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf); - /* DT strings are all in lower case */ - for (c = buf.pointer; *c != '\0'; c++) - *c = tolower(*c); - - len = snprintf(modalias, size, "of:N%sT", (char *)buf.pointer); - ACPI_FREE(buf.pointer); - - if (len <= 0) - return len; - - of_compatible = acpi_dev->data.of_compatible; - if (of_compatible->type == ACPI_TYPE_PACKAGE) { - nval = of_compatible->package.count; - obj = of_compatible->package.elements; - } else { /* Must be ACPI_TYPE_STRING. */ - nval = 1; - obj = of_compatible; - } - for (i = 0; i < nval; i++, obj++) { - count = snprintf(&modalias[len], size, "C%s", - obj->string.pointer); - if (count < 0) - return -EINVAL; - - if (count >= size) - return -ENOMEM; - - len += count; - size -= count; - } - modalias[len] = '\0'; - return len; -} - /* * acpi_companion_match() - Can we match via ACPI companion device * @dev: Device in question @@ -247,7 +136,7 @@ static int create_of_modalias(struct acpi_device *acpi_dev, char *modalias, * resources available from it but they will be matched normally using functions * provided by their bus types (and analogously for their modalias). */ -static struct acpi_device *acpi_companion_match(const struct device *dev) +struct acpi_device *acpi_companion_match(const struct device *dev) { struct acpi_device *adev; struct mutex *physical_node_lock; @@ -276,103 +165,6 @@ static struct acpi_device *acpi_companion_match(const struct device *dev) return adev; } -static int __acpi_device_uevent_modalias(struct acpi_device *adev, - struct kobj_uevent_env *env) -{ - int len; - - if (!adev) - return -ENODEV; - - if (list_empty(&adev->pnp.ids)) - return 0; - - if (add_uevent_var(env, "MODALIAS=")) - return -ENOMEM; - - len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], - sizeof(env->buf) - env->buflen); - if (len < 0) - return len; - - env->buflen += len; - if (!adev->data.of_compatible) - return 0; - - if (len > 0 && add_uevent_var(env, "MODALIAS=")) - return -ENOMEM; - - len = create_of_modalias(adev, &env->buf[env->buflen - 1], - sizeof(env->buf) - env->buflen); - if (len < 0) - return len; - - env->buflen += len; - - return 0; -} - -/* - * Creates uevent modalias field for ACPI enumerated devices. - * Because the other buses does not support ACPI HIDs & CIDs. - * e.g. for a device with hid:IBM0001 and cid:ACPI0001 you get: - * "acpi:IBM0001:ACPI0001" - */ -int acpi_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) -{ - return __acpi_device_uevent_modalias(acpi_companion_match(dev), env); -} -EXPORT_SYMBOL_GPL(acpi_device_uevent_modalias); - -static int __acpi_device_modalias(struct acpi_device *adev, char *buf, int size) -{ - int len, count; - - if (!adev) - return -ENODEV; - - if (list_empty(&adev->pnp.ids)) - return 0; - - len = create_pnp_modalias(adev, buf, size - 1); - if (len < 0) { - return len; - } else if (len > 0) { - buf[len++] = '\n'; - size -= len; - } - if (!adev->data.of_compatible) - return len; - - count = create_of_modalias(adev, buf + len, size - 1); - if (count < 0) { - return count; - } else if (count > 0) { - len += count; - buf[len++] = '\n'; - } - - return len; -} - -/* - * Creates modalias sysfs attribute for ACPI enumerated devices. - * Because the other buses does not support ACPI HIDs & CIDs. - * e.g. for a device with hid:IBM0001 and cid:ACPI0001 you get: - * "acpi:IBM0001:ACPI0001" - */ -int acpi_device_modalias(struct device *dev, char *buf, int size) -{ - return __acpi_device_modalias(acpi_companion_match(dev), buf, size); -} -EXPORT_SYMBOL_GPL(acpi_device_modalias); - -static ssize_t -acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { - return __acpi_device_modalias(to_acpi_device(dev), buf, 1024); -} -static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL); - bool acpi_scan_is_offline(struct acpi_device *adev, bool uevent) { struct acpi_device_physical_node *pn; @@ -701,279 +493,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src) unlock_device_hotplug(); } -static ssize_t real_power_state_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct acpi_device *adev = to_acpi_device(dev); - int state; - int ret; - - ret = acpi_device_get_power(adev, &state); - if (ret) - return ret; - - return sprintf(buf, "%s\n", acpi_power_state_string(state)); -} - -static DEVICE_ATTR(real_power_state, 0444, real_power_state_show, NULL); - -static ssize_t power_state_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct acpi_device *adev = to_acpi_device(dev); - - return sprintf(buf, "%s\n", acpi_power_state_string(adev->power.state)); -} - -static DEVICE_ATTR(power_state, 0444, power_state_show, NULL); - -static ssize_t -acpi_eject_store(struct device *d, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct acpi_device *acpi_device = to_acpi_device(d); - acpi_object_type not_used; - acpi_status status; - - if (!count || buf[0] != '1') - return -EINVAL; - - if ((!acpi_device->handler || !acpi_device->handler->hotplug.enabled) - && !acpi_device->driver) - return -ENODEV; - - status = acpi_get_type(acpi_device->handle, ¬_used); - if (ACPI_FAILURE(status) || !acpi_device->flags.ejectable) - return -ENODEV; - - get_device(&acpi_device->dev); - status = acpi_hotplug_schedule(acpi_device, ACPI_OST_EC_OSPM_EJECT); - if (ACPI_SUCCESS(status)) - return count; - - put_device(&acpi_device->dev); - acpi_evaluate_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT, - ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); - return status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN; -} - -static DEVICE_ATTR(eject, 0200, NULL, acpi_eject_store); - -static ssize_t -acpi_device_hid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct acpi_device *acpi_dev = to_acpi_device(dev); - - return sprintf(buf, "%s\n", acpi_device_hid(acpi_dev)); -} -static DEVICE_ATTR(hid, 0444, acpi_device_hid_show, NULL); - -static ssize_t acpi_device_uid_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - - return sprintf(buf, "%s\n", acpi_dev->pnp.unique_id); -} -static DEVICE_ATTR(uid, 0444, acpi_device_uid_show, NULL); - -static ssize_t acpi_device_adr_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - - return sprintf(buf, "0x%08x\n", - (unsigned int)(acpi_dev->pnp.bus_address)); -} -static DEVICE_ATTR(adr, 0444, acpi_device_adr_show, NULL); - -static ssize_t -acpi_device_path_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_buffer path = {ACPI_ALLOCATE_BUFFER, NULL}; - int result; - - result = acpi_get_name(acpi_dev->handle, ACPI_FULL_PATHNAME, &path); - if (result) - goto end; - - result = sprintf(buf, "%s\n", (char*)path.pointer); - kfree(path.pointer); -end: - return result; -} -static DEVICE_ATTR(path, 0444, acpi_device_path_show, NULL); - -/* sysfs file that shows description text from the ACPI _STR method */ -static ssize_t description_show(struct device *dev, - struct device_attribute *attr, - char *buf) { - struct acpi_device *acpi_dev = to_acpi_device(dev); - int result; - - if (acpi_dev->pnp.str_obj == NULL) - return 0; - - /* - * The _STR object contains a Unicode identifier for a device. - * We need to convert to utf-8 so it can be displayed. - */ - result = utf16s_to_utf8s( - (wchar_t *)acpi_dev->pnp.str_obj->buffer.pointer, - acpi_dev->pnp.str_obj->buffer.length, - UTF16_LITTLE_ENDIAN, buf, - PAGE_SIZE); - - buf[result++] = '\n'; - - return result; -} -static DEVICE_ATTR(description, 0444, description_show, NULL); - -static ssize_t -acpi_device_sun_show(struct device *dev, struct device_attribute *attr, - char *buf) { - struct acpi_device *acpi_dev = to_acpi_device(dev); - acpi_status status; - unsigned long long sun; - - status = acpi_evaluate_integer(acpi_dev->handle, "_SUN", NULL, &sun); - if (ACPI_FAILURE(status)) - return -ENODEV; - - return sprintf(buf, "%llu\n", sun); -} -static DEVICE_ATTR(sun, 0444, acpi_device_sun_show, NULL); - -static ssize_t status_show(struct device *dev, struct device_attribute *attr, - char *buf) { - struct acpi_device *acpi_dev = to_acpi_device(dev); - acpi_status status; - unsigned long long sta; - - status = acpi_evaluate_integer(acpi_dev->handle, "_STA", NULL, &sta); - if (ACPI_FAILURE(status)) - return -ENODEV; - - return sprintf(buf, "%llu\n", sta); -} -static DEVICE_ATTR_RO(status); - -static int acpi_device_setup_files(struct acpi_device *dev) -{ - struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; - acpi_status status; - int result = 0; - - /* - * Devices gotten from FADT don't have a "path" attribute - */ - if (dev->handle) { - result = device_create_file(&dev->dev, &dev_attr_path); - if (result) - goto end; - } - - if (!list_empty(&dev->pnp.ids)) { - result = device_create_file(&dev->dev, &dev_attr_hid); - if (result) - goto end; - - result = device_create_file(&dev->dev, &dev_attr_modalias); - if (result) - goto end; - } - - /* - * If device has _STR, 'description' file is created - */ - if (acpi_has_method(dev->handle, "_STR")) { - status = acpi_evaluate_object(dev->handle, "_STR", - NULL, &buffer); - if (ACPI_FAILURE(status)) - buffer.pointer = NULL; - dev->pnp.str_obj = buffer.pointer; - result = device_create_file(&dev->dev, &dev_attr_description); - if (result) - goto end; - } - - if (dev->pnp.type.bus_address) - result = device_create_file(&dev->dev, &dev_attr_adr); - if (dev->pnp.unique_id) - result = device_create_file(&dev->dev, &dev_attr_uid); - - if (acpi_has_method(dev->handle, "_SUN")) { - result = device_create_file(&dev->dev, &dev_attr_sun); - if (result) - goto end; - } - - if (acpi_has_method(dev->handle, "_STA")) { - result = device_create_file(&dev->dev, &dev_attr_status); - if (result) - goto end; - } - - /* - * If device has _EJ0, 'eject' file is created that is used to trigger - * hot-removal function from userland. - */ - if (acpi_has_method(dev->handle, "_EJ0")) { - result = device_create_file(&dev->dev, &dev_attr_eject); - if (result) - return result; - } - - if (dev->flags.power_manageable) { - result = device_create_file(&dev->dev, &dev_attr_power_state); - if (result) - return result; - - if (dev->power.flags.power_resources) - result = device_create_file(&dev->dev, - &dev_attr_real_power_state); - } - -end: - return result; -} - -static void acpi_device_remove_files(struct acpi_device *dev) -{ - if (dev->flags.power_manageable) { - device_remove_file(&dev->dev, &dev_attr_power_state); - if (dev->power.flags.power_resources) - device_remove_file(&dev->dev, - &dev_attr_real_power_state); - } - - /* - * If device has _STR, remove 'description' file - */ - if (acpi_has_method(dev->handle, "_STR")) { - kfree(dev->pnp.str_obj); - device_remove_file(&dev->dev, &dev_attr_description); - } - /* - * If device has _EJ0, remove 'eject' file. - */ - if (acpi_has_method(dev->handle, "_EJ0")) - device_remove_file(&dev->dev, &dev_attr_eject); - - if (acpi_has_method(dev->handle, "_SUN")) - device_remove_file(&dev->dev, &dev_attr_sun); - - if (dev->pnp.unique_id) - device_remove_file(&dev->dev, &dev_attr_uid); - if (dev->pnp.type.bus_address) - device_remove_file(&dev->dev, &dev_attr_adr); - device_remove_file(&dev->dev, &dev_attr_modalias); - device_remove_file(&dev->dev, &dev_attr_hid); - if (acpi_has_method(dev->handle, "_STA")) - device_remove_file(&dev->dev, &dev_attr_status); - if (dev->handle) - device_remove_file(&dev->dev, &dev_attr_path); -} /* -------------------------------------------------------------------------- ACPI Bus operations -------------------------------------------------------------------------- */ From 68c6b148daa6e45a85b31ef60ed9c9bfd556fff0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Jul 2015 22:53:52 +0200 Subject: [PATCH 168/170] ACPI / scan: Move device matching code to bus.c To reduce the size of scan.c and improve the readability of it, move code related device matching into drivers/acpi/bus.c. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 194 ++++++++++++++++++++++++++++++++++++++++++++ drivers/acpi/scan.c | 189 ------------------------------------------ 2 files changed, 194 insertions(+), 189 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 513e7230e3d0..ce805809b00b 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -423,6 +423,200 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) acpi_evaluate_ost(handle, type, ost_code, NULL); } +/* -------------------------------------------------------------------------- + Device Matching + -------------------------------------------------------------------------- */ + +/* + * acpi_companion_match() - Can we match via ACPI companion device + * @dev: Device in question + * + * Check if the given device has an ACPI companion and if that companion has + * a valid list of PNP IDs, and if the device is the first (primary) physical + * device associated with it. Return the companion pointer if that's the case + * or NULL otherwise. + * + * If multiple physical devices are attached to a single ACPI companion, we need + * to be careful. The usage scenario for this kind of relationship is that all + * of the physical devices in question use resources provided by the ACPI + * companion. A typical case is an MFD device where all the sub-devices share + * the parent's ACPI companion. In such cases we can only allow the primary + * (first) physical device to be matched with the help of the companion's PNP + * IDs. + * + * Additional physical devices sharing the ACPI companion can still use + * resources available from it but they will be matched normally using functions + * provided by their bus types (and analogously for their modalias). + */ +struct acpi_device *acpi_companion_match(const struct device *dev) +{ + struct acpi_device *adev; + struct mutex *physical_node_lock; + + adev = ACPI_COMPANION(dev); + if (!adev) + return NULL; + + if (list_empty(&adev->pnp.ids)) + return NULL; + + physical_node_lock = &adev->physical_node_lock; + mutex_lock(physical_node_lock); + if (list_empty(&adev->physical_node_list)) { + adev = NULL; + } else { + const struct acpi_device_physical_node *node; + + node = list_first_entry(&adev->physical_node_list, + struct acpi_device_physical_node, node); + if (node->dev != dev) + adev = NULL; + } + mutex_unlock(physical_node_lock); + + return adev; +} + +/** + * acpi_of_match_device - Match device object using the "compatible" property. + * @adev: ACPI device object to match. + * @of_match_table: List of device IDs to match against. + * + * If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of + * identifiers and a _DSD object with the "compatible" property, use that + * property to match against the given list of identifiers. + */ +static bool acpi_of_match_device(struct acpi_device *adev, + const struct of_device_id *of_match_table) +{ + const union acpi_object *of_compatible, *obj; + int i, nval; + + if (!adev) + return false; + + of_compatible = adev->data.of_compatible; + if (!of_match_table || !of_compatible) + return false; + + if (of_compatible->type == ACPI_TYPE_PACKAGE) { + nval = of_compatible->package.count; + obj = of_compatible->package.elements; + } else { /* Must be ACPI_TYPE_STRING. */ + nval = 1; + obj = of_compatible; + } + /* Now we can look for the driver DT compatible strings */ + for (i = 0; i < nval; i++, obj++) { + const struct of_device_id *id; + + for (id = of_match_table; id->compatible[0]; id++) + if (!strcasecmp(obj->string.pointer, id->compatible)) + return true; + } + + return false; +} + +static bool __acpi_match_device_cls(const struct acpi_device_id *id, + struct acpi_hardware_id *hwid) +{ + int i, msk, byte_shift; + char buf[3]; + + if (!id->cls) + return false; + + /* Apply class-code bitmask, before checking each class-code byte */ + for (i = 1; i <= 3; i++) { + byte_shift = 8 * (3 - i); + msk = (id->cls_msk >> byte_shift) & 0xFF; + if (!msk) + continue; + + sprintf(buf, "%02x", (id->cls >> byte_shift) & msk); + if (strncmp(buf, &hwid->id[(i - 1) * 2], 2)) + return false; + } + return true; +} + +static const struct acpi_device_id *__acpi_match_device( + struct acpi_device *device, + const struct acpi_device_id *ids, + const struct of_device_id *of_ids) +{ + const struct acpi_device_id *id; + struct acpi_hardware_id *hwid; + + /* + * If the device is not present, it is unnecessary to load device + * driver for it. + */ + if (!device || !device->status.present) + return NULL; + + list_for_each_entry(hwid, &device->pnp.ids, list) { + /* First, check the ACPI/PNP IDs provided by the caller. */ + for (id = ids; id->id[0] || id->cls; id++) { + if (id->id[0] && !strcmp((char *) id->id, hwid->id)) + return id; + else if (id->cls && __acpi_match_device_cls(id, hwid)) + return id; + } + + /* + * Next, check ACPI_DT_NAMESPACE_HID and try to match the + * "compatible" property if found. + * + * The id returned by the below is not valid, but the only + * caller passing non-NULL of_ids here is only interested in + * whether or not the return value is NULL. + */ + if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id) + && acpi_of_match_device(device, of_ids)) + return id; + } + return NULL; +} + +/** + * acpi_match_device - Match a struct device against a given list of ACPI IDs + * @ids: Array of struct acpi_device_id object to match against. + * @dev: The device structure to match. + * + * Check if @dev has a valid ACPI handle and if there is a struct acpi_device + * object for that handle and use that object to match against a given list of + * device IDs. + * + * Return a pointer to the first matching ID on success or %NULL on failure. + */ +const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, + const struct device *dev) +{ + return __acpi_match_device(acpi_companion_match(dev), ids, NULL); +} +EXPORT_SYMBOL_GPL(acpi_match_device); + +int acpi_match_device_ids(struct acpi_device *device, + const struct acpi_device_id *ids) +{ + return __acpi_match_device(device, ids, NULL) ? 0 : -ENOENT; +} +EXPORT_SYMBOL(acpi_match_device_ids); + +bool acpi_driver_match_device(struct device *dev, + const struct device_driver *drv) +{ + if (!drv->acpi_match_table) + return acpi_of_match_device(ACPI_COMPANION(dev), + drv->of_match_table); + + return !!__acpi_match_device(acpi_companion_match(dev), + drv->acpi_match_table, drv->of_match_table); +} +EXPORT_SYMBOL_GPL(acpi_driver_match_device); + /* -------------------------------------------------------------------------- Initialization/Cleanup -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 099831fc8449..68877bc22357 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -115,55 +115,6 @@ int acpi_scan_add_handler_with_hotplug(struct acpi_scan_handler *handler, return 0; } -/* - * acpi_companion_match() - Can we match via ACPI companion device - * @dev: Device in question - * - * Check if the given device has an ACPI companion and if that companion has - * a valid list of PNP IDs, and if the device is the first (primary) physical - * device associated with it. Return the companion pointer if that's the case - * or NULL otherwise. - * - * If multiple physical devices are attached to a single ACPI companion, we need - * to be careful. The usage scenario for this kind of relationship is that all - * of the physical devices in question use resources provided by the ACPI - * companion. A typical case is an MFD device where all the sub-devices share - * the parent's ACPI companion. In such cases we can only allow the primary - * (first) physical device to be matched with the help of the companion's PNP - * IDs. - * - * Additional physical devices sharing the ACPI companion can still use - * resources available from it but they will be matched normally using functions - * provided by their bus types (and analogously for their modalias). - */ -struct acpi_device *acpi_companion_match(const struct device *dev) -{ - struct acpi_device *adev; - struct mutex *physical_node_lock; - - adev = ACPI_COMPANION(dev); - if (!adev) - return NULL; - - if (list_empty(&adev->pnp.ids)) - return NULL; - - physical_node_lock = &adev->physical_node_lock; - mutex_lock(physical_node_lock); - if (list_empty(&adev->physical_node_list)) { - adev = NULL; - } else { - const struct acpi_device_physical_node *node; - - node = list_first_entry(&adev->physical_node_list, - struct acpi_device_physical_node, node); - if (node->dev != dev) - adev = NULL; - } - mutex_unlock(physical_node_lock); - - return adev; -} bool acpi_scan_is_offline(struct acpi_device *adev, bool uevent) { @@ -497,146 +448,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src) ACPI Bus operations -------------------------------------------------------------------------- */ -/** - * acpi_of_match_device - Match device object using the "compatible" property. - * @adev: ACPI device object to match. - * @of_match_table: List of device IDs to match against. - * - * If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of - * identifiers and a _DSD object with the "compatible" property, use that - * property to match against the given list of identifiers. - */ -static bool acpi_of_match_device(struct acpi_device *adev, - const struct of_device_id *of_match_table) -{ - const union acpi_object *of_compatible, *obj; - int i, nval; - - if (!adev) - return false; - - of_compatible = adev->data.of_compatible; - if (!of_match_table || !of_compatible) - return false; - - if (of_compatible->type == ACPI_TYPE_PACKAGE) { - nval = of_compatible->package.count; - obj = of_compatible->package.elements; - } else { /* Must be ACPI_TYPE_STRING. */ - nval = 1; - obj = of_compatible; - } - /* Now we can look for the driver DT compatible strings */ - for (i = 0; i < nval; i++, obj++) { - const struct of_device_id *id; - - for (id = of_match_table; id->compatible[0]; id++) - if (!strcasecmp(obj->string.pointer, id->compatible)) - return true; - } - - return false; -} - -static bool __acpi_match_device_cls(const struct acpi_device_id *id, - struct acpi_hardware_id *hwid) -{ - int i, msk, byte_shift; - char buf[3]; - - if (!id->cls) - return false; - - /* Apply class-code bitmask, before checking each class-code byte */ - for (i = 1; i <= 3; i++) { - byte_shift = 8 * (3 - i); - msk = (id->cls_msk >> byte_shift) & 0xFF; - if (!msk) - continue; - - sprintf(buf, "%02x", (id->cls >> byte_shift) & msk); - if (strncmp(buf, &hwid->id[(i - 1) * 2], 2)) - return false; - } - return true; -} - -static const struct acpi_device_id *__acpi_match_device( - struct acpi_device *device, - const struct acpi_device_id *ids, - const struct of_device_id *of_ids) -{ - const struct acpi_device_id *id; - struct acpi_hardware_id *hwid; - - /* - * If the device is not present, it is unnecessary to load device - * driver for it. - */ - if (!device || !device->status.present) - return NULL; - - list_for_each_entry(hwid, &device->pnp.ids, list) { - /* First, check the ACPI/PNP IDs provided by the caller. */ - for (id = ids; id->id[0] || id->cls; id++) { - if (id->id[0] && !strcmp((char *) id->id, hwid->id)) - return id; - else if (id->cls && __acpi_match_device_cls(id, hwid)) - return id; - } - - /* - * Next, check ACPI_DT_NAMESPACE_HID and try to match the - * "compatible" property if found. - * - * The id returned by the below is not valid, but the only - * caller passing non-NULL of_ids here is only interested in - * whether or not the return value is NULL. - */ - if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id) - && acpi_of_match_device(device, of_ids)) - return id; - } - return NULL; -} - -/** - * acpi_match_device - Match a struct device against a given list of ACPI IDs - * @ids: Array of struct acpi_device_id object to match against. - * @dev: The device structure to match. - * - * Check if @dev has a valid ACPI handle and if there is a struct acpi_device - * object for that handle and use that object to match against a given list of - * device IDs. - * - * Return a pointer to the first matching ID on success or %NULL on failure. - */ -const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, - const struct device *dev) -{ - return __acpi_match_device(acpi_companion_match(dev), ids, NULL); -} -EXPORT_SYMBOL_GPL(acpi_match_device); - -int acpi_match_device_ids(struct acpi_device *device, - const struct acpi_device_id *ids) -{ - return __acpi_match_device(device, ids, NULL) ? 0 : -ENOENT; -} -EXPORT_SYMBOL(acpi_match_device_ids); - -bool acpi_driver_match_device(struct device *dev, - const struct device_driver *drv) -{ - if (!drv->acpi_match_table) - return acpi_of_match_device(ACPI_COMPANION(dev), - drv->of_match_table); - - return !!__acpi_match_device(acpi_companion_match(dev), - drv->acpi_match_table, drv->of_match_table); -} -EXPORT_SYMBOL_GPL(acpi_driver_match_device); - static void acpi_free_power_resources_lists(struct acpi_device *device) { int i; From 5894b0c46e49b5ecc25f22b2d1b8232aab00ce97 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Jul 2015 22:54:01 +0200 Subject: [PATCH 169/170] ACPI / scan: Move bus operations and notification routines to bus.c To reduce the size of scan.c and improve the readability of it, move code related to device notification, the definitions of the ACPI bus operations and the driver management code to drivers/acpi/bus.c. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 184 ++++++++++++++++++++++++++++++++++++++++++++ drivers/acpi/scan.c | 183 ------------------------------------------- 2 files changed, 184 insertions(+), 183 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index ce805809b00b..2f50fc4be1d4 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -423,6 +423,65 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) acpi_evaluate_ost(handle, type, ost_code, NULL); } +static void acpi_device_notify(acpi_handle handle, u32 event, void *data) +{ + struct acpi_device *device = data; + + device->driver->ops.notify(device, event); +} + +static void acpi_device_notify_fixed(void *data) +{ + struct acpi_device *device = data; + + /* Fixed hardware devices have no handles */ + acpi_device_notify(NULL, ACPI_FIXED_HARDWARE_EVENT, device); +} + +static u32 acpi_device_fixed_event(void *data) +{ + acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_device_notify_fixed, data); + return ACPI_INTERRUPT_HANDLED; +} + +static int acpi_device_install_notify_handler(struct acpi_device *device) +{ + acpi_status status; + + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) + status = + acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, + acpi_device_fixed_event, + device); + else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) + status = + acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, + acpi_device_fixed_event, + device); + else + status = acpi_install_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, + acpi_device_notify, + device); + + if (ACPI_FAILURE(status)) + return -EINVAL; + return 0; +} + +static void acpi_device_remove_notify_handler(struct acpi_device *device) +{ + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) + acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, + acpi_device_fixed_event); + else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) + acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, + acpi_device_fixed_event); + else + acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + acpi_device_notify); +} + /* -------------------------------------------------------------------------- Device Matching -------------------------------------------------------------------------- */ @@ -617,6 +676,131 @@ bool acpi_driver_match_device(struct device *dev, } EXPORT_SYMBOL_GPL(acpi_driver_match_device); +/* -------------------------------------------------------------------------- + ACPI Driver Management + -------------------------------------------------------------------------- */ + +/** + * acpi_bus_register_driver - register a driver with the ACPI bus + * @driver: driver being registered + * + * Registers a driver with the ACPI bus. Searches the namespace for all + * devices that match the driver's criteria and binds. Returns zero for + * success or a negative error status for failure. + */ +int acpi_bus_register_driver(struct acpi_driver *driver) +{ + int ret; + + if (acpi_disabled) + return -ENODEV; + driver->drv.name = driver->name; + driver->drv.bus = &acpi_bus_type; + driver->drv.owner = driver->owner; + + ret = driver_register(&driver->drv); + return ret; +} + +EXPORT_SYMBOL(acpi_bus_register_driver); + +/** + * acpi_bus_unregister_driver - unregisters a driver with the ACPI bus + * @driver: driver to unregister + * + * Unregisters a driver with the ACPI bus. Searches the namespace for all + * devices that match the driver's criteria and unbinds. + */ +void acpi_bus_unregister_driver(struct acpi_driver *driver) +{ + driver_unregister(&driver->drv); +} + +EXPORT_SYMBOL(acpi_bus_unregister_driver); + +/* -------------------------------------------------------------------------- + ACPI Bus operations + -------------------------------------------------------------------------- */ + +static int acpi_bus_match(struct device *dev, struct device_driver *drv) +{ + struct acpi_device *acpi_dev = to_acpi_device(dev); + struct acpi_driver *acpi_drv = to_acpi_driver(drv); + + return acpi_dev->flags.match_driver + && !acpi_match_device_ids(acpi_dev, acpi_drv->ids); +} + +static int acpi_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return __acpi_device_uevent_modalias(to_acpi_device(dev), env); +} + +static int acpi_device_probe(struct device *dev) +{ + struct acpi_device *acpi_dev = to_acpi_device(dev); + struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver); + int ret; + + if (acpi_dev->handler && !acpi_is_pnp_device(acpi_dev)) + return -EINVAL; + + if (!acpi_drv->ops.add) + return -ENOSYS; + + ret = acpi_drv->ops.add(acpi_dev); + if (ret) + return ret; + + acpi_dev->driver = acpi_drv; + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Driver [%s] successfully bound to device [%s]\n", + acpi_drv->name, acpi_dev->pnp.bus_id)); + + if (acpi_drv->ops.notify) { + ret = acpi_device_install_notify_handler(acpi_dev); + if (ret) { + if (acpi_drv->ops.remove) + acpi_drv->ops.remove(acpi_dev); + + acpi_dev->driver = NULL; + acpi_dev->driver_data = NULL; + return ret; + } + } + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found driver [%s] for device [%s]\n", + acpi_drv->name, acpi_dev->pnp.bus_id)); + get_device(dev); + return 0; +} + +static int acpi_device_remove(struct device * dev) +{ + struct acpi_device *acpi_dev = to_acpi_device(dev); + struct acpi_driver *acpi_drv = acpi_dev->driver; + + if (acpi_drv) { + if (acpi_drv->ops.notify) + acpi_device_remove_notify_handler(acpi_dev); + if (acpi_drv->ops.remove) + acpi_drv->ops.remove(acpi_dev); + } + acpi_dev->driver = NULL; + acpi_dev->driver_data = NULL; + + put_device(dev); + return 0; +} + +struct bus_type acpi_bus_type = { + .name = "acpi", + .match = acpi_bus_match, + .probe = acpi_device_probe, + .remove = acpi_device_remove, + .uevent = acpi_device_uevent, +}; + /* -------------------------------------------------------------------------- Initialization/Cleanup -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 68877bc22357..f541f689ae80 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -444,10 +444,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src) unlock_device_hotplug(); } -/* -------------------------------------------------------------------------- - ACPI Bus operations - -------------------------------------------------------------------------- */ - static void acpi_free_power_resources_lists(struct acpi_device *device) { int i; @@ -474,144 +470,6 @@ static void acpi_device_release(struct device *dev) kfree(acpi_dev); } -static int acpi_bus_match(struct device *dev, struct device_driver *drv) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_driver *acpi_drv = to_acpi_driver(drv); - - return acpi_dev->flags.match_driver - && !acpi_match_device_ids(acpi_dev, acpi_drv->ids); -} - -static int acpi_device_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - return __acpi_device_uevent_modalias(to_acpi_device(dev), env); -} - -static void acpi_device_notify(acpi_handle handle, u32 event, void *data) -{ - struct acpi_device *device = data; - - device->driver->ops.notify(device, event); -} - -static void acpi_device_notify_fixed(void *data) -{ - struct acpi_device *device = data; - - /* Fixed hardware devices have no handles */ - acpi_device_notify(NULL, ACPI_FIXED_HARDWARE_EVENT, device); -} - -static u32 acpi_device_fixed_event(void *data) -{ - acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_device_notify_fixed, data); - return ACPI_INTERRUPT_HANDLED; -} - -static int acpi_device_install_notify_handler(struct acpi_device *device) -{ - acpi_status status; - - if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) - status = - acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, - acpi_device_fixed_event, - device); - else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) - status = - acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, - acpi_device_fixed_event, - device); - else - status = acpi_install_notify_handler(device->handle, - ACPI_DEVICE_NOTIFY, - acpi_device_notify, - device); - - if (ACPI_FAILURE(status)) - return -EINVAL; - return 0; -} - -static void acpi_device_remove_notify_handler(struct acpi_device *device) -{ - if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) - acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, - acpi_device_fixed_event); - else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) - acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, - acpi_device_fixed_event); - else - acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, - acpi_device_notify); -} - -static int acpi_device_probe(struct device *dev) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver); - int ret; - - if (acpi_dev->handler && !acpi_is_pnp_device(acpi_dev)) - return -EINVAL; - - if (!acpi_drv->ops.add) - return -ENOSYS; - - ret = acpi_drv->ops.add(acpi_dev); - if (ret) - return ret; - - acpi_dev->driver = acpi_drv; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Driver [%s] successfully bound to device [%s]\n", - acpi_drv->name, acpi_dev->pnp.bus_id)); - - if (acpi_drv->ops.notify) { - ret = acpi_device_install_notify_handler(acpi_dev); - if (ret) { - if (acpi_drv->ops.remove) - acpi_drv->ops.remove(acpi_dev); - - acpi_dev->driver = NULL; - acpi_dev->driver_data = NULL; - return ret; - } - } - - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found driver [%s] for device [%s]\n", - acpi_drv->name, acpi_dev->pnp.bus_id)); - get_device(dev); - return 0; -} - -static int acpi_device_remove(struct device * dev) -{ - struct acpi_device *acpi_dev = to_acpi_device(dev); - struct acpi_driver *acpi_drv = acpi_dev->driver; - - if (acpi_drv) { - if (acpi_drv->ops.notify) - acpi_device_remove_notify_handler(acpi_dev); - if (acpi_drv->ops.remove) - acpi_drv->ops.remove(acpi_dev); - } - acpi_dev->driver = NULL; - acpi_dev->driver_data = NULL; - - put_device(dev); - return 0; -} - -struct bus_type acpi_bus_type = { - .name = "acpi", - .match = acpi_bus_match, - .probe = acpi_device_probe, - .remove = acpi_device_remove, - .uevent = acpi_device_uevent, -}; - static void acpi_device_del(struct acpi_device *device) { mutex_lock(&acpi_device_lock); @@ -858,47 +716,6 @@ struct acpi_device *acpi_get_next_child(struct device *dev, return next == head ? NULL : list_entry(next, struct acpi_device, node); } -/* -------------------------------------------------------------------------- - Driver Management - -------------------------------------------------------------------------- */ -/** - * acpi_bus_register_driver - register a driver with the ACPI bus - * @driver: driver being registered - * - * Registers a driver with the ACPI bus. Searches the namespace for all - * devices that match the driver's criteria and binds. Returns zero for - * success or a negative error status for failure. - */ -int acpi_bus_register_driver(struct acpi_driver *driver) -{ - int ret; - - if (acpi_disabled) - return -ENODEV; - driver->drv.name = driver->name; - driver->drv.bus = &acpi_bus_type; - driver->drv.owner = driver->owner; - - ret = driver_register(&driver->drv); - return ret; -} - -EXPORT_SYMBOL(acpi_bus_register_driver); - -/** - * acpi_bus_unregister_driver - unregisters a driver with the ACPI bus - * @driver: driver to unregister - * - * Unregisters a driver with the ACPI bus. Searches the namespace for all - * devices that match the driver's criteria and unbinds. - */ -void acpi_bus_unregister_driver(struct acpi_driver *driver) -{ - driver_unregister(&driver->drv); -} - -EXPORT_SYMBOL(acpi_bus_unregister_driver); - /* -------------------------------------------------------------------------- Device Enumeration -------------------------------------------------------------------------- */ From 1dcc3d3362b0c97e48290f7786be85b4cec2a147 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Jul 2015 22:54:09 +0200 Subject: [PATCH 170/170] ACPI / bus: Move ACPI bus type registration Move the registration of the ACPI bus type to acpi_bus_init() and avoid using ACPI going forward if it fails (too many things depend on the presence of the ACPI bus type). Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 4 +++- drivers/acpi/scan.c | 6 ------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 2f50fc4be1d4..7a3ad929f095 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1039,7 +1039,9 @@ static int __init acpi_bus_init(void) */ acpi_root_dir = proc_mkdir(ACPI_BUS_FILE_ROOT, NULL); - return 0; + result = bus_register(&acpi_bus_type); + if (!result) + return 0; /* Mimic structured exception handling */ error1: diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index f541f689ae80..2fe5a37c385c 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1891,12 +1891,6 @@ int __init acpi_scan_init(void) { int result; - result = bus_register(&acpi_bus_type); - if (result) { - /* We don't want to quit even if we failed to add suspend/resume */ - printk(KERN_ERR PREFIX "Could not register bus type\n"); - } - acpi_pci_root_init(); acpi_pci_link_init(); acpi_processor_init();