From fe5e8a1acc7fd877b6706053cf88c418c33fe7a3 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 5 Dec 2013 15:48:01 +0530 Subject: [PATCH 01/61] RDMA/ocrdma: Fix AV_VALID bit position Fix ah->av->valid bit position and big endian portability. Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 1 + drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index ee499d942257..69da5dd1ca8a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -84,6 +84,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); if (vlan_enabled) ah->av->valid |= OCRDMA_AV_VLAN_VALID; + ah->av->valid = cpu_to_le32(ah->av->valid); return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 9f9570ec3c2e..27089032c669 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -1694,7 +1694,7 @@ struct ocrdma_grh { u16 rsvd; } __packed; -#define OCRDMA_AV_VALID Bit(0) +#define OCRDMA_AV_VALID Bit(7) #define OCRDMA_AV_VLAN_VALID Bit(1) struct ocrdma_av { From be8348df6efac6b602f2ad3210139bccf0dbe3d7 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 5 Dec 2013 09:46:07 +0530 Subject: [PATCH 02/61] RDMA/ocrdma: Fix OCRDMA_GEN2_FAMILY macro definition OCRDMA_GEN2_FAMILY is wrongly defined as 0x02 -- it should be 0x0F. Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 27089032c669..60d5ac23ea80 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -31,7 +31,7 @@ #define Bit(_b) (1 << (_b)) #define OCRDMA_GEN1_FAMILY 0xB -#define OCRDMA_GEN2_FAMILY 0x2 +#define OCRDMA_GEN2_FAMILY 0x0F #define OCRDMA_SUBSYS_ROCE 10 enum { From e3cf00d0a87f025db5855a43a67c67a41fa79fef Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Tue, 10 Sep 2013 03:38:16 +0000 Subject: [PATCH 03/61] IB/usnic: Add Cisco VIC low-level hardware driver This adds a driver that allows userspace to use UD-like QPs over a proprietary Cisco transport with Cisco's Virtual Interface Cards (VICs), including VIC 1240 and 1280 cards. Signed-off-by: Upinder Malhi Signed-off-by: Christian Benvenuti Signed-off-by: Roland Dreier --- MAINTAINERS | 5 + drivers/infiniband/Kconfig | 1 + drivers/infiniband/Makefile | 1 + drivers/infiniband/hw/usnic/Kconfig | 10 + drivers/infiniband/hw/usnic/Makefile | 15 + drivers/infiniband/hw/usnic/usnic.h | 29 + drivers/infiniband/hw/usnic/usnic_abi.h | 56 ++ .../hw/usnic/usnic_common_pkt_hdr.h | 27 + .../infiniband/hw/usnic/usnic_common_util.h | 51 ++ drivers/infiniband/hw/usnic/usnic_debugfs.c | 71 ++ drivers/infiniband/hw/usnic/usnic_debugfs.h | 25 + drivers/infiniband/hw/usnic/usnic_fwd.c | 243 ++++++ drivers/infiniband/hw/usnic/usnic_fwd.h | 58 ++ drivers/infiniband/hw/usnic/usnic_ib.h | 115 +++ drivers/infiniband/hw/usnic/usnic_ib_main.c | 598 ++++++++++++++ drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c | 541 +++++++++++++ drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h | 95 +++ drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 351 +++++++++ drivers/infiniband/hw/usnic/usnic_ib_sysfs.h | 29 + drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 736 ++++++++++++++++++ drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 72 ++ drivers/infiniband/hw/usnic/usnic_log.h | 58 ++ drivers/infiniband/hw/usnic/usnic_transport.c | 125 +++ drivers/infiniband/hw/usnic/usnic_transport.h | 28 + drivers/infiniband/hw/usnic/usnic_uiom.c | 603 ++++++++++++++ drivers/infiniband/hw/usnic/usnic_uiom.h | 80 ++ .../hw/usnic/usnic_uiom_interval_tree.c | 237 ++++++ .../hw/usnic/usnic_uiom_interval_tree.h | 74 ++ drivers/infiniband/hw/usnic/usnic_vnic.c | 467 +++++++++++ drivers/infiniband/hw/usnic/usnic_vnic.h | 103 +++ 30 files changed, 4904 insertions(+) create mode 100644 drivers/infiniband/hw/usnic/Kconfig create mode 100644 drivers/infiniband/hw/usnic/Makefile create mode 100644 drivers/infiniband/hw/usnic/usnic.h create mode 100644 drivers/infiniband/hw/usnic/usnic_abi.h create mode 100644 drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h create mode 100644 drivers/infiniband/hw/usnic/usnic_common_util.h create mode 100644 drivers/infiniband/hw/usnic/usnic_debugfs.c create mode 100644 drivers/infiniband/hw/usnic/usnic_debugfs.h create mode 100644 drivers/infiniband/hw/usnic/usnic_fwd.c create mode 100644 drivers/infiniband/hw/usnic/usnic_fwd.h create mode 100644 drivers/infiniband/hw/usnic/usnic_ib.h create mode 100644 drivers/infiniband/hw/usnic/usnic_ib_main.c create mode 100644 drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c create mode 100644 drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h create mode 100644 drivers/infiniband/hw/usnic/usnic_ib_sysfs.c create mode 100644 drivers/infiniband/hw/usnic/usnic_ib_sysfs.h create mode 100644 drivers/infiniband/hw/usnic/usnic_ib_verbs.c create mode 100644 drivers/infiniband/hw/usnic/usnic_ib_verbs.h create mode 100644 drivers/infiniband/hw/usnic/usnic_log.h create mode 100644 drivers/infiniband/hw/usnic/usnic_transport.c create mode 100644 drivers/infiniband/hw/usnic/usnic_transport.h create mode 100644 drivers/infiniband/hw/usnic/usnic_uiom.c create mode 100644 drivers/infiniband/hw/usnic/usnic_uiom.h create mode 100644 drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c create mode 100644 drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h create mode 100644 drivers/infiniband/hw/usnic/usnic_vnic.c create mode 100644 drivers/infiniband/hw/usnic/usnic_vnic.h diff --git a/MAINTAINERS b/MAINTAINERS index 31a046213274..9bc170ee9a2a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2158,6 +2158,11 @@ M: Nishank Trivedi S: Supported F: drivers/net/ethernet/cisco/enic/ +CISCO VIC LOW LATENCY NIC DRIVER +M: Upinder Malhi +S: Supported +F: drivers/infiniband/hw/usnic + CIRRUS LOGIC EP93XX ETHERNET DRIVER M: Hartley Sweeten L: netdev@vger.kernel.org diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 5ceda710f516..10219ee92191 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -53,6 +53,7 @@ source "drivers/infiniband/hw/mlx4/Kconfig" source "drivers/infiniband/hw/mlx5/Kconfig" source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/hw/ocrdma/Kconfig" +source "drivers/infiniband/hw/usnic/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index 1fe69888515f..bf508b5550c4 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/ obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/ +obj-$(CONFIG_INFINIBAND_USNIC) += hw/usnic/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ obj-$(CONFIG_INFINIBAND_SRPT) += ulp/srpt/ diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig new file mode 100644 index 000000000000..2cc8ba00b34b --- /dev/null +++ b/drivers/infiniband/hw/usnic/Kconfig @@ -0,0 +1,10 @@ +config INFINIBAND_USNIC + tristate "Verbs support for Cisco VIC" + depends on NETDEVICES && ETHERNET && PCI && INTEL_IOMMU + select ENIC + select NET_VENDOR_CISCO + select PCI_IOV + select INFINIBAND_USER_ACCESS + ---help--- + This is a low-level driver for Cisco's Virtual Interface + Cards (VICs), including the VIC 1240 and 1280 cards. diff --git a/drivers/infiniband/hw/usnic/Makefile b/drivers/infiniband/hw/usnic/Makefile new file mode 100644 index 000000000000..99fb2db47cd5 --- /dev/null +++ b/drivers/infiniband/hw/usnic/Makefile @@ -0,0 +1,15 @@ +ccflags-y := -Idrivers/net/ethernet/cisco/enic + +obj-$(CONFIG_INFINIBAND_USNIC)+= usnic_verbs.o + +usnic_verbs-y=\ +usnic_fwd.o \ +usnic_transport.o \ +usnic_uiom.o \ +usnic_uiom_interval_tree.o \ +usnic_vnic.o \ +usnic_ib_main.o \ +usnic_ib_qp_grp.o \ +usnic_ib_sysfs.o \ +usnic_ib_verbs.o \ +usnic_debugfs.o \ diff --git a/drivers/infiniband/hw/usnic/usnic.h b/drivers/infiniband/hw/usnic/usnic.h new file mode 100644 index 000000000000..d741c76bc4be --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_H_ +#define USNIC_H_ + +#define DRV_NAME "usnic_verbs" + +#define PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC 0x00cf /* User space NIC */ + +#define DRV_VERSION "1.0.2" +#define DRV_RELDATE "September 09, 2013" + +#endif /* USNIC_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h new file mode 100644 index 000000000000..510b7d702465 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_abi.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#ifndef USNIC_ABI_H +#define USNIC_ABI_H + +/* ABI between userspace and kernel */ +#define USNIC_UVERBS_ABI_VERSION 2 + +#define USNIC_QP_GRP_MAX_WQS 8 +#define USNIC_QP_GRP_MAX_RQS 8 +#define USNIC_QP_GRP_MAX_CQS 16 + +enum usnic_transport_type { + USNIC_TRANSPORT_UNKNOWN = 0, + USNIC_TRANSPORT_ROCE_CUSTOM = 1, + USNIC_TRANSPORT_MAX = 2, +}; + +/*TODO: Future - usnic_modify_qp needs to pass in generic filters */ +struct usnic_ib_create_qp_resp { + u32 vfid; + u32 qp_grp_id; + u64 bar_bus_addr; + u32 bar_len; +/* + * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface + * expands the scope of ABI to many files. + */ + u32 wq_cnt; + u32 rq_cnt; + u32 cq_cnt; + u32 wq_idx[USNIC_QP_GRP_MAX_WQS]; + u32 rq_idx[USNIC_QP_GRP_MAX_RQS]; + u32 cq_idx[USNIC_QP_GRP_MAX_CQS]; + u32 transport; + u32 reserved[9]; +}; + +#endif /* USNIC_ABI_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h new file mode 100644 index 000000000000..393567266142 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_CMN_PKT_HDR_H +#define USNIC_CMN_PKT_HDR_H + +#define USNIC_ROCE_ETHERTYPE (0x8915) +#define USNIC_ROCE_GRH_VER (8) +#define USNIC_PROTO_VER (1) +#define USNIC_ROCE_GRH_VER_SHIFT (4) + +#endif /* USNIC_COMMON_PKT_HDR_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h new file mode 100644 index 000000000000..128550a4f9e2 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_common_util.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_CMN_UTIL_H +#define USNIC_CMN_UTIL_H + +static inline void +usnic_mac_to_gid(const char *const mac, char *raw_gid) +{ + raw_gid[0] = 0xfe; + raw_gid[1] = 0x80; + memset(&raw_gid[2], 0, 6); + raw_gid[8] = mac[0]^2; + raw_gid[9] = mac[1]; + raw_gid[10] = mac[2]; + raw_gid[11] = 0xff; + raw_gid[12] = 0xfe; + raw_gid[13] = mac[3]; + raw_gid[14] = mac[4]; + raw_gid[15] = mac[5]; +} + +static inline void +usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid) +{ + raw_gid[8] = mac[0]^2; + raw_gid[9] = mac[1]; + raw_gid[10] = mac[2]; + raw_gid[11] = 0xff; + raw_gid[12] = 0xfe; + raw_gid[13] = mac[3]; + raw_gid[14] = mac[4]; + raw_gid[15] = mac[5]; +} + +#endif /* USNIC_COMMON_UTIL_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c new file mode 100644 index 000000000000..91386df025ae --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include + +#include "usnic.h" +#include "usnic_log.h" +#include "usnic_debugfs.h" + +static struct dentry *debugfs_root; + +static ssize_t usnic_debugfs_buildinfo_read(struct file *f, char __user *data, + size_t count, loff_t *ppos) +{ + char buf[500]; + int res; + + if (*ppos > 0) + return 0; + + res = scnprintf(buf, sizeof(buf), + "version: %s\n" + "build date: %s\n", + DRV_VERSION, DRV_RELDATE); + + return simple_read_from_buffer(data, count, ppos, buf, res); +} + +static const struct file_operations usnic_debugfs_buildinfo_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = usnic_debugfs_buildinfo_read +}; + +void usnic_debugfs_init(void) +{ + debugfs_root = debugfs_create_dir(DRV_NAME, NULL); + if (IS_ERR(debugfs_root)) { + usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n"); + debugfs_root = NULL; + return; + } + + debugfs_create_file("build-info", S_IRUGO, debugfs_root, + NULL, &usnic_debugfs_buildinfo_ops); +} + +void usnic_debugfs_exit(void) +{ + if (!debugfs_root) + return; + + debugfs_remove_recursive(debugfs_root); + debugfs_root = NULL; +} diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.h b/drivers/infiniband/hw/usnic/usnic_debugfs.h new file mode 100644 index 000000000000..914a330dd5e7 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#ifndef USNIC_DEBUGFS_H_ +#define USNIC_DEBUGFS_H_ + +void usnic_debugfs_init(void); + +void usnic_debugfs_exit(void); + +#endif /*!USNIC_DEBUGFS_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c new file mode 100644 index 000000000000..8e42216362e7 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include + +#include "enic_api.h" +#include "usnic_common_pkt_hdr.h" +#include "usnic_fwd.h" +#include "usnic_log.h" + +struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev) +{ + struct usnic_fwd_dev *ufdev; + + ufdev = kzalloc(sizeof(*ufdev), GFP_KERNEL); + if (!ufdev) + return NULL; + + ufdev->pdev = pdev; + ufdev->netdev = pci_get_drvdata(pdev); + spin_lock_init(&ufdev->lock); + + return ufdev; +} + +void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev) +{ + kfree(ufdev); +} + +static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx, + enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1) +{ + int status; + struct net_device *netdev = ufdev->netdev; + + spin_lock(&ufdev->lock); + status = enic_api_devcmd_proxy_by_index(netdev, + vnic_idx, + cmd, + a0, a1, + 1000); + spin_unlock(&ufdev->lock); + if (status) { + if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) { + usnic_dbg("Dev %s vnic idx %u cmd %u already deleted", + netdev_name(netdev), vnic_idx, cmd); + } else { + usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n", + netdev_name(netdev), vnic_idx, cmd, + status); + } + } else { + usnic_dbg("Dev %s vnic idx %u cmd %u success", + netdev_name(netdev), vnic_idx, + cmd); + } + + return status; +} + +int usnic_fwd_add_usnic_filter(struct usnic_fwd_dev *ufdev, int vnic_idx, + int rq_idx, struct usnic_fwd_filter *fwd_filter, + struct usnic_fwd_filter_hndl **filter_hndl) +{ + struct filter_tlv *tlv, *tlv_va; + struct filter *filter; + struct filter_action *action; + struct pci_dev *pdev; + struct usnic_fwd_filter_hndl *usnic_filter_hndl; + int status; + u64 a0, a1; + u64 tlv_size; + dma_addr_t tlv_pa; + + pdev = ufdev->pdev; + tlv_size = (2*sizeof(struct filter_tlv) + + sizeof(struct filter) + + sizeof(struct filter_action)); + tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa); + if (!tlv) { + usnic_err("Failed to allocate memory\n"); + return -ENOMEM; + } + + usnic_filter_hndl = kzalloc(sizeof(*usnic_filter_hndl), GFP_ATOMIC); + if (!usnic_filter_hndl) { + usnic_err("Failed to allocate memory for hndl\n"); + pci_free_consistent(pdev, tlv_size, tlv, tlv_pa); + return -ENOMEM; + } + + tlv_va = tlv; + a0 = tlv_pa; + a1 = tlv_size; + memset(tlv, 0, tlv_size); + tlv->type = CLSF_TLV_FILTER; + tlv->length = sizeof(struct filter); + filter = (struct filter *)&tlv->val; + filter->type = FILTER_USNIC_ID; + filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE; + filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE | + FILTER_FIELD_USNIC_ID | + FILTER_FIELD_USNIC_PROTO; + filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER << + USNIC_ROCE_GRH_VER_SHIFT) + | USNIC_PROTO_VER; + filter->u.usnic.usnic_id = fwd_filter->port_num; + tlv = (struct filter_tlv *)((char *)tlv + sizeof(struct filter_tlv) + + sizeof(struct filter)); + tlv->type = CLSF_TLV_ACTION; + tlv->length = sizeof(struct filter_action); + action = (struct filter_action *)&tlv->val; + action->type = FILTER_ACTION_RQ_STEERING; + action->u.rq_idx = rq_idx; + + status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_ADD_FILTER, &a0, &a1); + pci_free_consistent(pdev, tlv_size, tlv_va, tlv_pa); + if (status) { + usnic_err("VF %s Filter add failed with status:%d", + pci_name(pdev), + status); + kfree(usnic_filter_hndl); + return status; + } else { + usnic_dbg("VF %s FILTER ID:%u", + pci_name(pdev), + (u32)a0); + } + + usnic_filter_hndl->type = FILTER_USNIC_ID; + usnic_filter_hndl->id = (u32)a0; + usnic_filter_hndl->vnic_idx = vnic_idx; + usnic_filter_hndl->ufdev = ufdev; + usnic_filter_hndl->filter = fwd_filter; + *filter_hndl = usnic_filter_hndl; + + return status; +} + +int usnic_fwd_del_filter(struct usnic_fwd_filter_hndl *filter_hndl) +{ + int status; + u64 a0, a1; + struct net_device *netdev; + + netdev = filter_hndl->ufdev->netdev; + a0 = filter_hndl->id; + + status = usnic_fwd_devcmd(filter_hndl->ufdev, filter_hndl->vnic_idx, + CMD_DEL_FILTER, &a0, &a1); + if (status) { + if (status == ERR_EINVAL) { + usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d", + filter_hndl->id, filter_hndl->vnic_idx, + netdev_name(netdev), status); + status = 0; + kfree(filter_hndl); + } else { + usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d", + netdev_name(netdev), + filter_hndl->vnic_idx, filter_hndl->id, + status); + } + } else { + usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED", + netdev_name(netdev), filter_hndl->vnic_idx, + filter_hndl->id); + kfree(filter_hndl); + } + + return status; +} + +int usnic_fwd_enable_rq(struct usnic_fwd_dev *ufdev, int vnic_idx, int rq_idx) +{ + int status; + struct net_device *pf_netdev; + u64 a0, a1; + + pf_netdev = ufdev->netdev; + a0 = rq_idx; + a1 = CMD_QP_RQWQ; + + status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE, &a0, &a1); + + if (status) { + usnic_err("PF %s VNIC Index %u RQ Index: %u ENABLE Failed with status %d", + netdev_name(pf_netdev), + vnic_idx, + rq_idx, + status); + } else { + usnic_dbg("PF %s VNIC Index %u RQ Index: %u ENABLED", + netdev_name(pf_netdev), + vnic_idx, rq_idx); + } + + return status; +} + +int usnic_fwd_disable_rq(struct usnic_fwd_dev *ufdev, int vnic_idx, int rq_idx) +{ + int status; + u64 a0, a1; + struct net_device *pf_netdev; + + pf_netdev = ufdev->netdev; + a0 = rq_idx; + a1 = CMD_QP_RQWQ; + + status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE, &a0, &a1); + + if (status) { + usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d", + netdev_name(pf_netdev), + vnic_idx, + rq_idx, + status); + } else { + usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED", + netdev_name(pf_netdev), + vnic_idx, + rq_idx); + } + + return status; +} diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h new file mode 100644 index 000000000000..6973901da8af --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_FWD_H_ +#define USNIC_FWD_H_ + +#include +#include +#include + +#include "usnic_abi.h" +#include "vnic_devcmd.h" + +struct usnic_fwd_dev { + struct pci_dev *pdev; + struct net_device *netdev; + spinlock_t lock; +}; + +struct usnic_fwd_filter { + enum usnic_transport_type transport; + u16 port_num; +}; + +struct usnic_fwd_filter_hndl { + enum filter_type type; + u32 id; + u32 vnic_idx; + struct usnic_fwd_dev *ufdev; + struct list_head link; + struct usnic_fwd_filter *filter; +}; + +struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev); +void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev); +int usnic_fwd_add_usnic_filter(struct usnic_fwd_dev *ufdev, int vnic_idx, + int rq_idx, struct usnic_fwd_filter *filter, + struct usnic_fwd_filter_hndl **filter_hndl); +int usnic_fwd_del_filter(struct usnic_fwd_filter_hndl *filter_hndl); +int usnic_fwd_enable_rq(struct usnic_fwd_dev *ufdev, int vnic_idx, int rq_idx); +int usnic_fwd_disable_rq(struct usnic_fwd_dev *ufdev, int vnic_idx, int rq_idx); + +#endif /* !USNIC_FWD_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h new file mode 100644 index 000000000000..3511c8521f30 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_IB_H_ +#define USNIC_IB_H_ + +#include +#include +#include + +#include + + +#include "usnic.h" +#include "usnic_abi.h" +#include "usnic_vnic.h" + +#define USNIC_IB_PORT_CNT 1 +#define USNIC_IB_NUM_COMP_VECTORS 1 + +extern unsigned int usnic_ib_share_vf; + +struct usnic_ib_ucontext { + struct ib_ucontext ibucontext; + /* Protected by usnic_ib_dev->usdev_lock */ + struct list_head qp_grp_list; + struct list_head link; +}; + +struct usnic_ib_pd { + struct ib_pd ibpd; + struct usnic_uiom_pd *umem_pd; +}; + +struct usnic_ib_mr { + struct ib_mr ibmr; + struct usnic_uiom_reg *umem; +}; + +struct usnic_ib_dev { + struct ib_device ib_dev; + struct pci_dev *pdev; + struct net_device *netdev; + struct usnic_fwd_dev *ufdev; + bool link_up; + struct list_head ib_dev_link; + struct list_head vf_dev_list; + struct list_head ctx_list; + struct mutex usdev_lock; + char mac[ETH_ALEN]; + unsigned int mtu; + + /* provisioning information */ + struct kref vf_cnt; + unsigned int vf_res_cnt[USNIC_VNIC_RES_TYPE_MAX]; + + /* sysfs vars for QPN reporting */ + struct kobject *qpn_kobj; +}; + +struct usnic_ib_vf { + struct usnic_ib_dev *pf; + spinlock_t lock; + struct usnic_vnic *vnic; + unsigned int qp_grp_ref_cnt; + struct usnic_ib_pd *pd; + struct list_head link; +}; + +static inline +struct usnic_ib_dev *to_usdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct usnic_ib_dev, ib_dev); +} + +static inline +struct usnic_ib_ucontext *to_ucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext); +} + +static inline +struct usnic_ib_pd *to_upd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct usnic_ib_pd, ibpd); +} + +static inline +struct usnic_ib_ucontext *to_uucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext); +} + +static inline +struct usnic_ib_mr *to_umr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct usnic_ib_mr, ibmr); +} +void usnic_ib_log_vf(struct usnic_ib_vf *vf); +#endif /* USNIC_IB_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c new file mode 100644 index 000000000000..dc09c12435b9 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -0,0 +1,598 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Upinder Malhi + * Author: Anant Deepak + * Author: Cesare Cantu' + * Author: Jeff Squyres + * Author: Kiran Thirumalai + * Author: Xuyang Wang + * Author: Reese Faucette + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "usnic_abi.h" +#include "usnic_common_util.h" +#include "usnic_ib.h" +#include "usnic_ib_qp_grp.h" +#include "usnic_log.h" +#include "usnic_fwd.h" +#include "usnic_debugfs.h" +#include "usnic_ib_verbs.h" +#include "usnic_transport.h" +#include "usnic_uiom.h" +#include "usnic_ib_sysfs.h" + +unsigned int usnic_log_lvl = USNIC_LOG_LVL_ERR; +unsigned int usnic_ib_share_vf = 1; + +static const char usnic_version[] = + DRV_NAME ": Cisco VIC (USNIC) Verbs Driver v" + DRV_VERSION " (" DRV_RELDATE ")\n"; + +static DEFINE_MUTEX(usnic_ib_ibdev_list_lock); +static LIST_HEAD(usnic_ib_ibdev_list); + +/* Callback dump funcs */ +static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz) +{ + struct usnic_ib_vf *vf = obj; + return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name); +} +/* End callback dump funcs */ + +static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz) +{ + usnic_vnic_dump(vf->vnic, buf, buf_sz, vf, + usnic_ib_dump_vf_hdr, + usnic_ib_qp_grp_dump_hdr, usnic_ib_qp_grp_dump_rows); +} + +void usnic_ib_log_vf(struct usnic_ib_vf *vf) +{ + char buf[1000]; + usnic_ib_dump_vf(vf, buf, sizeof(buf)); + usnic_dbg("%s\n", buf); +} + +/* Start of netdev section */ +static inline const char *usnic_ib_netdev_event_to_string(unsigned long event) +{ + const char *event2str[] = {"NETDEV_NONE", "NETDEV_UP", "NETDEV_DOWN", + "NETDEV_REBOOT", "NETDEV_CHANGE", + "NETDEV_REGISTER", "NETDEV_UNREGISTER", "NETDEV_CHANGEMTU", + "NETDEV_CHANGEADDR", "NETDEV_GOING_DOWN", "NETDEV_FEAT_CHANGE", + "NETDEV_BONDING_FAILOVER", "NETDEV_PRE_UP", + "NETDEV_PRE_TYPE_CHANGE", "NETDEV_POST_TYPE_CHANGE", + "NETDEV_POST_INT", "NETDEV_UNREGISTER_FINAL", "NETDEV_RELEASE", + "NETDEV_NOTIFY_PEERS", "NETDEV_JOIN" + }; + + if (event >= ARRAY_SIZE(event2str)) + return "UNKNOWN_NETDEV_EVENT"; + else + return event2str[event]; +} + +static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev) +{ + struct usnic_ib_ucontext *ctx; + struct usnic_ib_qp_grp *qp_grp; + enum ib_qp_state cur_state; + int status; + + BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock)); + + list_for_each_entry(ctx, &us_ibdev->ctx_list, link) { + list_for_each_entry(qp_grp, &ctx->qp_grp_list, link) { + cur_state = qp_grp->state; + if (cur_state == IB_QPS_INIT || + cur_state == IB_QPS_RTR || + cur_state == IB_QPS_RTS) { + status = usnic_ib_qp_grp_modify(qp_grp, + IB_QPS_ERR, + NULL); + if (status) { + usnic_err("Failed to transistion qp grp %u from %s to %s\n", + qp_grp->grp_id, + usnic_ib_qp_grp_state_to_string + (cur_state), + usnic_ib_qp_grp_state_to_string + (IB_QPS_ERR)); + } + } + } + } +} + +static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, + unsigned long event) +{ + struct net_device *netdev; + struct ib_event ib_event; + + memset(&ib_event, 0, sizeof(ib_event)); + + mutex_lock(&us_ibdev->usdev_lock); + netdev = us_ibdev->netdev; + switch (event) { + case NETDEV_REBOOT: + usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + ib_event.event = IB_EVENT_PORT_ERR; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + break; + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + if (!us_ibdev->link_up && netif_carrier_ok(netdev)) { + us_ibdev->link_up = true; + usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name); + ib_event.event = IB_EVENT_PORT_ACTIVE; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + } else if (us_ibdev->link_up && !netif_carrier_ok(netdev)) { + us_ibdev->link_up = false; + usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + ib_event.event = IB_EVENT_PORT_ERR; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + } else { + usnic_dbg("Ignorning %s on %s\n", + usnic_ib_netdev_event_to_string(event), + us_ibdev->ib_dev.name); + } + break; + case NETDEV_CHANGEADDR: + if (!memcmp(us_ibdev->mac, netdev->dev_addr, + sizeof(us_ibdev->mac))) { + usnic_dbg("Ignorning addr change on %s\n", + us_ibdev->ib_dev.name); + } else { + usnic_info(" %s old mac: %pM new mac: %pM\n", + us_ibdev->ib_dev.name, + us_ibdev->mac, + netdev->dev_addr); + memcpy(us_ibdev->mac, netdev->dev_addr, + sizeof(us_ibdev->mac)); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + ib_event.event = IB_EVENT_GID_CHANGE; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + } + + break; + case NETDEV_CHANGEMTU: + if (us_ibdev->mtu != netdev->mtu) { + usnic_info("MTU Change on %s old: %u new: %u\n", + us_ibdev->ib_dev.name, + us_ibdev->mtu, netdev->mtu); + us_ibdev->mtu = netdev->mtu; + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + } else { + usnic_dbg("Ignoring MTU change on %s\n", + us_ibdev->ib_dev.name); + } + break; + default: + usnic_dbg("Ignorning event %s on %s", + usnic_ib_netdev_event_to_string(event), + us_ibdev->ib_dev.name); + } + mutex_unlock(&us_ibdev->usdev_lock); +} + +static int usnic_ib_netdevice_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct usnic_ib_dev *us_ibdev; + + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + mutex_lock(&usnic_ib_ibdev_list_lock); + list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) { + if (us_ibdev->netdev == netdev) { + usnic_ib_handle_usdev_event(us_ibdev, event); + break; + } + } + mutex_unlock(&usnic_ib_ibdev_list_lock); + + return NOTIFY_DONE; +} + +static struct notifier_block usnic_ib_netdevice_notifier = { + .notifier_call = usnic_ib_netdevice_event +}; +/* End of netdev section */ + +/* Start of PF discovery section */ +static void *usnic_ib_device_add(struct pci_dev *dev) +{ + struct usnic_ib_dev *us_ibdev; + union ib_gid gid; + + usnic_dbg("\n"); + + us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); + if (IS_ERR_OR_NULL(us_ibdev)) { + usnic_err("Device %s context alloc failed\n", + netdev_name(pci_get_drvdata(dev))); + return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT); + } + + us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); + if (IS_ERR_OR_NULL(us_ibdev->ufdev)) { + usnic_err("Failed to alloc ufdev for %s with err %ld\n", + pci_name(dev), PTR_ERR(us_ibdev->ufdev)); + goto err_dealloc; + } + + mutex_init(&us_ibdev->usdev_lock); + INIT_LIST_HEAD(&us_ibdev->vf_dev_list); + INIT_LIST_HEAD(&us_ibdev->ctx_list); + + us_ibdev->pdev = dev; + us_ibdev->netdev = pci_get_drvdata(dev); + us_ibdev->ib_dev.owner = THIS_MODULE; + us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC; + us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT; + us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; + us_ibdev->ib_dev.dma_device = &dev->dev; + us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION; + strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX); + + us_ibdev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_OPEN_QP); + + us_ibdev->ib_dev.query_device = usnic_ib_query_device; + us_ibdev->ib_dev.query_port = usnic_ib_query_port; + us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey; + us_ibdev->ib_dev.query_gid = usnic_ib_query_gid; + us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer; + us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd; + us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd; + us_ibdev->ib_dev.create_qp = usnic_ib_create_qp; + us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp; + us_ibdev->ib_dev.query_qp = usnic_ib_query_qp; + us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp; + us_ibdev->ib_dev.create_cq = usnic_ib_create_cq; + us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq; + us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr; + us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr; + us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext; + us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext; + us_ibdev->ib_dev.mmap = usnic_ib_mmap; + us_ibdev->ib_dev.create_ah = usnic_ib_create_ah; + us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah; + us_ibdev->ib_dev.post_send = usnic_ib_post_send; + us_ibdev->ib_dev.post_recv = usnic_ib_post_recv; + us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq; + us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; + us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; + + + if (ib_register_device(&us_ibdev->ib_dev, NULL)) + goto err_fwd_dealloc; + + us_ibdev->link_up = netif_carrier_ok(us_ibdev->netdev); + us_ibdev->mtu = us_ibdev->netdev->mtu; + memcpy(&us_ibdev->mac, us_ibdev->netdev->dev_addr, + sizeof(us_ibdev->mac)); + usnic_mac_to_gid(us_ibdev->netdev->perm_addr, &gid.raw[0]); + memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id, + sizeof(gid.global.interface_id)); + kref_init(&us_ibdev->vf_cnt); + + usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n", + us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev), + us_ibdev->mac, us_ibdev->link_up, us_ibdev->mtu); + return us_ibdev; + +err_fwd_dealloc: + usnic_fwd_dev_free(us_ibdev->ufdev); +err_dealloc: + usnic_err("failed -- deallocing device\n"); + ib_dealloc_device(&us_ibdev->ib_dev); + return NULL; +} + +static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev) +{ + usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name); + usnic_ib_sysfs_unregister_usdev(us_ibdev); + usnic_fwd_dev_free(us_ibdev->ufdev); + ib_unregister_device(&us_ibdev->ib_dev); + ib_dealloc_device(&us_ibdev->ib_dev); +} + +static void usnic_ib_undiscover_pf(struct kref *kref) +{ + struct usnic_ib_dev *us_ibdev, *tmp; + struct pci_dev *dev; + bool found = false; + + dev = container_of(kref, struct usnic_ib_dev, vf_cnt)->pdev; + mutex_lock(&usnic_ib_ibdev_list_lock); + list_for_each_entry_safe(us_ibdev, tmp, + &usnic_ib_ibdev_list, ib_dev_link) { + if (us_ibdev->pdev == dev) { + list_del(&us_ibdev->ib_dev_link); + usnic_ib_device_remove(us_ibdev); + found = true; + break; + } + } + + WARN(!found, "Failed to remove PF %s\n", pci_name(dev)); + + mutex_unlock(&usnic_ib_ibdev_list_lock); +} + +static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) +{ + struct usnic_ib_dev *us_ibdev; + struct pci_dev *parent_pci, *vf_pci; + int err; + + vf_pci = usnic_vnic_get_pdev(vnic); + parent_pci = pci_physfn(vf_pci); + + BUG_ON(!parent_pci); + + mutex_lock(&usnic_ib_ibdev_list_lock); + list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) { + if (us_ibdev->pdev == parent_pci) { + kref_get(&us_ibdev->vf_cnt); + goto out; + } + } + + us_ibdev = usnic_ib_device_add(parent_pci); + if (IS_ERR_OR_NULL(us_ibdev)) { + us_ibdev = (us_ibdev) ? us_ibdev : ERR_PTR(-EFAULT); + goto out; + } + + err = usnic_ib_sysfs_register_usdev(us_ibdev); + if (err) { + usnic_ib_device_remove(us_ibdev); + us_ibdev = ERR_PTR(err); + goto out; + } + + list_add(&us_ibdev->ib_dev_link, &usnic_ib_ibdev_list); +out: + mutex_unlock(&usnic_ib_ibdev_list_lock); + return us_ibdev; +} +/* End of PF discovery section */ + +/* Start of PCI section */ + +static DEFINE_PCI_DEVICE_TABLE(usnic_ib_pci_ids) = { + {PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)}, + {0,} +}; + +static int usnic_ib_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int err; + struct usnic_ib_dev *pf; + struct usnic_ib_vf *vf; + enum usnic_vnic_res_type res_type; + + vf = kzalloc(sizeof(*vf), GFP_KERNEL); + if (!vf) + return -ENOMEM; + + err = pci_enable_device(pdev); + if (err) { + usnic_err("Failed to enable %s with err %d\n", + pci_name(pdev), err); + goto out_clean_vf; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + usnic_err("Failed to request region for %s with err %d\n", + pci_name(pdev), err); + goto out_disable_device; + } + + pci_set_master(pdev); + pci_set_drvdata(pdev, vf); + + vf->vnic = usnic_vnic_alloc(pdev); + if (IS_ERR_OR_NULL(vf->vnic)) { + err = (vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM); + usnic_err("Failed to alloc vnic for %s with err %d\n", + pci_name(pdev), err); + goto out_release_regions; + } + + pf = usnic_ib_discover_pf(vf->vnic); + if (IS_ERR_OR_NULL(pf)) { + usnic_err("Failed to discover pf of vnic %s with err%ld\n", + pci_name(pdev), PTR_ERR(pf)); + err = (pf ? PTR_ERR(pf) : -EFAULT); + goto out_clean_vnic; + } + + vf->pf = pf; + spin_lock_init(&vf->lock); + mutex_lock(&pf->usdev_lock); + list_add_tail(&vf->link, &pf->vf_dev_list); + /* + * Save max settings (will be same for each VF, easier to re-write than + * to say "if (!set) { set_values(); set=1; } + */ + for (res_type = USNIC_VNIC_RES_TYPE_EOL+1; + res_type < USNIC_VNIC_RES_TYPE_MAX; + res_type++) { + pf->vf_res_cnt[res_type] = usnic_vnic_res_cnt(vf->vnic, + res_type); + } + + mutex_unlock(&pf->usdev_lock); + + usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev), + pf->ib_dev.name); + usnic_ib_log_vf(vf); + return 0; + +out_clean_vnic: + usnic_vnic_free(vf->vnic); +out_release_regions: + pci_set_drvdata(pdev, NULL); + pci_clear_master(pdev); + pci_release_regions(pdev); +out_disable_device: + pci_disable_device(pdev); +out_clean_vf: + kfree(vf); + return err; +} + +static void usnic_ib_pci_remove(struct pci_dev *pdev) +{ + struct usnic_ib_vf *vf = pci_get_drvdata(pdev); + struct usnic_ib_dev *pf = vf->pf; + + mutex_lock(&pf->usdev_lock); + list_del(&vf->link); + mutex_unlock(&pf->usdev_lock); + + kref_put(&pf->vf_cnt, usnic_ib_undiscover_pf); + usnic_vnic_free(vf->vnic); + pci_set_drvdata(pdev, NULL); + pci_clear_master(pdev); + pci_release_regions(pdev); + pci_disable_device(pdev); + kfree(vf); + + usnic_info("Removed VF %s\n", pci_name(pdev)); +} + +/* PCI driver entry points */ +static struct pci_driver usnic_ib_pci_driver = { + .name = DRV_NAME, + .id_table = usnic_ib_pci_ids, + .probe = usnic_ib_pci_probe, + .remove = usnic_ib_pci_remove, +}; +/* End of PCI section */ + +/* Start of module section */ +static int __init usnic_ib_init(void) +{ + int err; + + printk_once(KERN_INFO "%s", usnic_version); + + err = usnic_uiom_init(DRV_NAME); + if (err) { + usnic_err("Unable to initalize umem with err %d\n", err); + return err; + } + + if (pci_register_driver(&usnic_ib_pci_driver)) { + usnic_err("Unable to register with PCI\n"); + goto out_umem_fini; + } + + err = register_netdevice_notifier(&usnic_ib_netdevice_notifier); + if (err) { + usnic_err("Failed to register netdev notifier\n"); + goto out_pci_unreg; + } + + err = usnic_transport_init(); + if (err) { + usnic_err("Failed to initialize transport\n"); + goto out_unreg_netdev_notifier; + } + + usnic_debugfs_init(); + + return 0; + +out_unreg_netdev_notifier: + unregister_netdevice_notifier(&usnic_ib_netdevice_notifier); +out_pci_unreg: + pci_unregister_driver(&usnic_ib_pci_driver); +out_umem_fini: + usnic_uiom_fini(); + + return err; +} + +static void __exit usnic_ib_destroy(void) +{ + usnic_dbg("\n"); + usnic_debugfs_exit(); + usnic_transport_fini(); + unregister_netdevice_notifier(&usnic_ib_netdevice_notifier); + pci_unregister_driver(&usnic_ib_pci_driver); + usnic_uiom_fini(); +} + +MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver"); +MODULE_AUTHOR("Upinder Malhi "); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); +module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR); +module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3"); +MODULE_PARM_DESC(usnic_ib_share_vf, "Off=0, On=1 VF sharing amongst QPs"); +MODULE_DEVICE_TABLE(pci, usnic_ib_pci_ids); + +module_init(usnic_ib_init); +module_exit(usnic_ib_destroy); +/* End of module section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c new file mode 100644 index 000000000000..ca5fa6ad59ac --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -0,0 +1,541 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "usnic_log.h" +#include "usnic_vnic.h" +#include "usnic_fwd.h" +#include "usnic_uiom.h" +#include "usnic_ib_qp_grp.h" +#include "usnic_ib_sysfs.h" +#include "usnic_transport.h" + +const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state) +{ + switch (state) { + case IB_QPS_RESET: + return "Rst"; + case IB_QPS_INIT: + return "Init"; + case IB_QPS_RTR: + return "RTR"; + case IB_QPS_RTS: + return "RTS"; + case IB_QPS_SQD: + return "SQD"; + case IB_QPS_SQE: + return "SQE"; + case IB_QPS_ERR: + return "ERR"; + default: + return "UNKOWN STATE"; + + } +} + +int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz) +{ + return scnprintf(buf, buf_sz, "|QPN\t|State\t|PID\t|VF Idx\t|Fil ID"); +} + +int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz) +{ + struct usnic_ib_qp_grp *qp_grp = obj; + struct usnic_fwd_filter_hndl *default_filter_hndl; + if (obj) { + default_filter_hndl = list_first_entry(&qp_grp->filter_hndls, + struct usnic_fwd_filter_hndl, link); + return scnprintf(buf, buf_sz, "|%d\t|%s\t|%d\t|%hu\t|%d", + qp_grp->ibqp.qp_num, + usnic_ib_qp_grp_state_to_string( + qp_grp->state), + qp_grp->owner_pid, + usnic_vnic_get_index(qp_grp->vf->vnic), + default_filter_hndl->id); + } else { + return scnprintf(buf, buf_sz, "|N/A\t|N/A\t|N/A\t|N/A\t|N/A"); + } +} + +static int add_fwd_filter(struct usnic_ib_qp_grp *qp_grp, + struct usnic_fwd_filter *fwd_filter) +{ + struct usnic_fwd_filter_hndl *filter_hndl; + int status; + struct usnic_vnic_res_chunk *chunk; + int rq_idx; + + WARN_ON(!spin_is_locked(&qp_grp->lock)); + + chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); + if (IS_ERR_OR_NULL(chunk) || chunk->cnt < 1) { + usnic_err("Failed to get RQ info for qp_grp %u\n", + qp_grp->grp_id); + return -EFAULT; + } + + rq_idx = chunk->res[0]->vnic_idx; + + switch (qp_grp->transport) { + case USNIC_TRANSPORT_ROCE_CUSTOM: + status = usnic_fwd_add_usnic_filter(qp_grp->ufdev, + usnic_vnic_get_index(qp_grp->vf->vnic), + rq_idx, + fwd_filter, + &filter_hndl); + break; + default: + usnic_err("Unable to install filter for qp_grp %u for transport %d", + qp_grp->grp_id, qp_grp->transport); + status = -EINVAL; + } + + if (status) + return status; + + list_add_tail(&filter_hndl->link, &qp_grp->filter_hndls); + return 0; +} + +static int del_all_filters(struct usnic_ib_qp_grp *qp_grp) +{ + int err, status; + struct usnic_fwd_filter_hndl *filter_hndl, *tmp; + + WARN_ON(!spin_is_locked(&qp_grp->lock)); + + status = 0; + + list_for_each_entry_safe(filter_hndl, tmp, + &qp_grp->filter_hndls, link) { + list_del(&filter_hndl->link); + err = usnic_fwd_del_filter(filter_hndl); + if (err) { + usnic_err("Failed to delete filter %u of qp_grp %d\n", + filter_hndl->id, qp_grp->grp_id); + } + status |= err; + } + + return status; +} + +static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp) +{ + + int status; + int i, vnic_idx; + struct usnic_vnic_res_chunk *res_chunk; + struct usnic_vnic_res *res; + + WARN_ON(!spin_is_locked(&qp_grp->lock)); + + vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); + + res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); + if (IS_ERR_OR_NULL(res_chunk)) { + usnic_err("Unable to get %s with err %ld\n", + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), + PTR_ERR(res_chunk)); + return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + } + + for (i = 0; i < res_chunk->cnt; i++) { + res = res_chunk->res[i]; + status = usnic_fwd_enable_rq(qp_grp->ufdev, vnic_idx, + res->vnic_idx); + if (status) { + usnic_err("Failed to enable rq %d of %s:%d\n with err %d\n", + res->vnic_idx, + netdev_name(qp_grp->ufdev->netdev), + vnic_idx, status); + goto out_err; + } + } + + return 0; + +out_err: + for (i--; i >= 0; i--) { + res = res_chunk->res[i]; + usnic_fwd_disable_rq(qp_grp->ufdev, vnic_idx, + res->vnic_idx); + } + + return status; +} + +static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp) +{ + int i, vnic_idx; + struct usnic_vnic_res_chunk *res_chunk; + struct usnic_vnic_res *res; + int status = 0; + + WARN_ON(!spin_is_locked(&qp_grp->lock)); + vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); + + res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); + if (IS_ERR_OR_NULL(res_chunk)) { + usnic_err("Unable to get %s with err %ld\n", + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), + PTR_ERR(res_chunk)); + return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + } + + for (i = 0; i < res_chunk->cnt; i++) { + res = res_chunk->res[i]; + status = usnic_fwd_disable_rq(qp_grp->ufdev, vnic_idx, + res->vnic_idx); + if (status) { + usnic_err("Failed to disable rq %d of %s:%d\n with err %d\n", + res->vnic_idx, + netdev_name(qp_grp->ufdev->netdev), + vnic_idx, status); + } + } + + return status; + +} + +int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, + enum ib_qp_state new_state, + struct usnic_fwd_filter *fwd_filter) +{ + int status = 0; + int vnic_idx; + struct ib_event ib_event; + enum ib_qp_state old_state; + + old_state = qp_grp->state; + vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); + + spin_lock(&qp_grp->lock); + switch (new_state) { + case IB_QPS_RESET: + switch (old_state) { + case IB_QPS_RESET: + /* NO-OP */ + break; + case IB_QPS_INIT: + status = del_all_filters(qp_grp); + break; + case IB_QPS_RTR: + case IB_QPS_RTS: + case IB_QPS_ERR: + status = disable_qp_grp(qp_grp); + status &= del_all_filters(qp_grp); + break; + default: + status = -EINVAL; + } + break; + case IB_QPS_INIT: + switch (old_state) { + case IB_QPS_RESET: + status = add_fwd_filter(qp_grp, fwd_filter); + break; + case IB_QPS_INIT: + status = add_fwd_filter(qp_grp, fwd_filter); + break; + case IB_QPS_RTR: + status = disable_qp_grp(qp_grp); + break; + case IB_QPS_RTS: + status = disable_qp_grp(qp_grp); + break; + default: + status = -EINVAL; + } + break; + case IB_QPS_RTR: + switch (old_state) { + case IB_QPS_INIT: + status = enable_qp_grp(qp_grp); + break; + default: + status = -EINVAL; + } + break; + case IB_QPS_RTS: + switch (old_state) { + case IB_QPS_RTR: + /* NO-OP FOR NOW */ + break; + default: + status = -EINVAL; + } + break; + case IB_QPS_ERR: + ib_event.device = &qp_grp->vf->pf->ib_dev; + ib_event.element.qp = &qp_grp->ibqp; + ib_event.event = IB_EVENT_QP_FATAL; + + switch (old_state) { + case IB_QPS_RESET: + qp_grp->ibqp.event_handler(&ib_event, + qp_grp->ibqp.qp_context); + break; + case IB_QPS_INIT: + status = del_all_filters(qp_grp); + qp_grp->ibqp.event_handler(&ib_event, + qp_grp->ibqp.qp_context); + break; + case IB_QPS_RTR: + case IB_QPS_RTS: + status = disable_qp_grp(qp_grp); + status &= del_all_filters(qp_grp); + qp_grp->ibqp.event_handler(&ib_event, + qp_grp->ibqp.qp_context); + break; + default: + status = -EINVAL; + } + break; + default: + status = -EINVAL; + } + spin_unlock(&qp_grp->lock); + + if (!status) { + qp_grp->state = new_state; + usnic_info("Transistioned %u from %s to %s", + qp_grp->grp_id, + usnic_ib_qp_grp_state_to_string(old_state), + usnic_ib_qp_grp_state_to_string(new_state)); + } else { + usnic_err("Failed to transistion %u from %s to %s", + qp_grp->grp_id, + usnic_ib_qp_grp_state_to_string(old_state), + usnic_ib_qp_grp_state_to_string(new_state)); + } + + return status; +} + +static struct usnic_vnic_res_chunk** +alloc_res_chunk_list(struct usnic_vnic *vnic, + struct usnic_vnic_res_spec *res_spec, void *owner_obj) +{ + enum usnic_vnic_res_type res_type; + struct usnic_vnic_res_chunk **res_chunk_list; + int err, i, res_cnt, res_lst_sz; + + for (res_lst_sz = 0; + res_spec->resources[res_lst_sz].type != USNIC_VNIC_RES_TYPE_EOL; + res_lst_sz++) { + /* Do Nothing */ + } + + res_chunk_list = kzalloc(sizeof(*res_chunk_list)*(res_lst_sz+1), + GFP_ATOMIC); + if (!res_chunk_list) + return ERR_PTR(-ENOMEM); + + for (i = 0; res_spec->resources[i].type != USNIC_VNIC_RES_TYPE_EOL; + i++) { + res_type = res_spec->resources[i].type; + res_cnt = res_spec->resources[i].cnt; + + res_chunk_list[i] = usnic_vnic_get_resources(vnic, res_type, + res_cnt, owner_obj); + if (IS_ERR_OR_NULL(res_chunk_list[i])) { + err = (res_chunk_list[i] ? + PTR_ERR(res_chunk_list[i]) : -ENOMEM); + usnic_err("Failed to get %s from %s with err %d\n", + usnic_vnic_res_type_to_str(res_type), + usnic_vnic_pci_name(vnic), + err); + goto out_free_res; + } + } + + return res_chunk_list; + +out_free_res: + for (i--; i > 0; i--) + usnic_vnic_put_resources(res_chunk_list[i]); + kfree(res_chunk_list); + return ERR_PTR(err); +} + +static void free_qp_grp_res(struct usnic_vnic_res_chunk **res_chunk_list) +{ + int i; + for (i = 0; res_chunk_list[i]; i++) + usnic_vnic_put_resources(res_chunk_list[i]); + kfree(res_chunk_list); +} + +static int qp_grp_and_vf_bind(struct usnic_ib_vf *vf, + struct usnic_ib_pd *pd, + struct usnic_ib_qp_grp *qp_grp) +{ + int err; + struct pci_dev *pdev; + + WARN_ON(!spin_is_locked(&vf->lock)); + + pdev = usnic_vnic_get_pdev(vf->vnic); + if (vf->qp_grp_ref_cnt == 0) { + err = usnic_uiom_attach_dev_to_pd(pd->umem_pd, &pdev->dev); + if (err) { + usnic_err("Failed to attach %s to domain\n", + pci_name(pdev)); + return err; + } + vf->pd = pd; + } + vf->qp_grp_ref_cnt++; + + WARN_ON(vf->pd != pd); + qp_grp->vf = vf; + + return 0; +} + +static void qp_grp_and_vf_unbind(struct usnic_ib_qp_grp *qp_grp) +{ + struct pci_dev *pdev; + struct usnic_ib_pd *pd; + + WARN_ON(!spin_is_locked(&qp_grp->vf->lock)); + + pd = qp_grp->vf->pd; + pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic); + if (--qp_grp->vf->qp_grp_ref_cnt == 0) { + qp_grp->vf->pd = NULL; + usnic_uiom_detach_dev_from_pd(pd->umem_pd, &pdev->dev); + } + qp_grp->vf = NULL; +} + +static void log_spec(struct usnic_vnic_res_spec *res_spec) +{ + char buf[512]; + usnic_vnic_spec_dump(buf, sizeof(buf), res_spec); + usnic_dbg("%s\n", buf); +} + +struct usnic_ib_qp_grp * +usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, + struct usnic_ib_vf *vf, + struct usnic_ib_pd *pd, + struct usnic_vnic_res_spec *res_spec, + enum usnic_transport_type transport) +{ + struct usnic_ib_qp_grp *qp_grp; + u16 port_num; + int err; + + WARN_ON(!spin_is_locked(&vf->lock)); + + err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport], + res_spec); + if (err) { + usnic_err("Spec does not meet miniumum req for transport %d\n", + transport); + log_spec(res_spec); + return ERR_PTR(err); + } + + port_num = usnic_transport_rsrv_port(transport, 0); + if (!port_num) { + usnic_err("Unable to allocate port for %s\n", + netdev_name(ufdev->netdev)); + return ERR_PTR(-EINVAL); + } + + qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC); + if (!qp_grp) { + usnic_err("Unable to alloc qp_grp - Out of memory\n"); + return NULL; + } + + qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec, + qp_grp); + if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) { + err = qp_grp->res_chunk_list ? + PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM; + usnic_err("Unable to alloc res for %d with err %d\n", + qp_grp->grp_id, err); + goto out_free_port; + } + + INIT_LIST_HEAD(&qp_grp->filter_hndls); + spin_lock_init(&qp_grp->lock); + qp_grp->ufdev = ufdev; + qp_grp->transport = transport; + qp_grp->filters[DFLT_FILTER_IDX].transport = transport; + qp_grp->filters[DFLT_FILTER_IDX].port_num = port_num; + qp_grp->state = IB_QPS_RESET; + qp_grp->owner_pid = current->pid; + + /* qp_num is same as default filter port_num */ + qp_grp->ibqp.qp_num = qp_grp->filters[DFLT_FILTER_IDX].port_num; + qp_grp->grp_id = qp_grp->ibqp.qp_num; + + err = qp_grp_and_vf_bind(vf, pd, qp_grp); + if (err) + goto out_free_port; + + usnic_ib_sysfs_qpn_add(qp_grp); + + return qp_grp; + +out_free_port: + kfree(qp_grp); + usnic_transport_unrsrv_port(transport, port_num); + + return ERR_PTR(err); +} + +void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) +{ + u16 default_port_num; + enum usnic_transport_type transport; + + WARN_ON(qp_grp->state != IB_QPS_RESET); + WARN_ON(!spin_is_locked(&qp_grp->vf->lock)); + + transport = qp_grp->filters[DFLT_FILTER_IDX].transport; + default_port_num = qp_grp->filters[DFLT_FILTER_IDX].port_num; + + usnic_ib_sysfs_qpn_remove(qp_grp); + qp_grp_and_vf_unbind(qp_grp); + free_qp_grp_res(qp_grp->res_chunk_list); + kfree(qp_grp); + usnic_transport_unrsrv_port(transport, default_port_num); +} + +struct usnic_vnic_res_chunk* +usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp, + enum usnic_vnic_res_type res_type) +{ + int i; + + for (i = 0; qp_grp->res_chunk_list[i]; i++) { + if (qp_grp->res_chunk_list[i]->type == res_type) + return qp_grp->res_chunk_list[i]; + } + + return ERR_PTR(-EINVAL); +} diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h new file mode 100644 index 000000000000..6416a956dc4a --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_IB_QP_GRP_H_ +#define USNIC_IB_QP_GRP_H_ + +#include + +#include "usnic_ib.h" +#include "usnic_abi.h" +#include "usnic_fwd.h" +#include "usnic_vnic.h" + +#define MAX_QP_GRP_FILTERS 10 +#define DFLT_FILTER_IDX 0 + +/* + * The qp group struct represents all the hw resources needed to present a ib_qp + */ +struct usnic_ib_qp_grp { + struct ib_qp ibqp; + enum ib_qp_state state; + int grp_id; + + struct usnic_fwd_dev *ufdev; + short unsigned filter_cnt; + struct usnic_fwd_filter filters[MAX_QP_GRP_FILTERS]; + struct list_head filter_hndls; + enum usnic_transport_type transport; + struct usnic_ib_ucontext *ctx; + + struct usnic_vnic_res_chunk **res_chunk_list; + + pid_t owner_pid; + struct usnic_ib_vf *vf; + struct list_head link; + + spinlock_t lock; + + struct kobject kobj; +}; + +static const struct +usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = { + { /*USNIC_TRANSPORT_UNKNOWN*/ + .resources = { + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, + }, + }, + { /*USNIC_TRANSPORT_ROCE_CUSTOM*/ + .resources = { + {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, + }, + }, +}; + +const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state); +int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz); +int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz); +struct usnic_ib_qp_grp * +usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, + struct usnic_ib_pd *pd, + struct usnic_vnic_res_spec *res_spec, + enum usnic_transport_type transport); +void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp); +int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, + enum ib_qp_state new_state, + struct usnic_fwd_filter *fwd_filter); +struct usnic_vnic_res_chunk +*usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp, + enum usnic_vnic_res_type type); +static inline +struct usnic_ib_qp_grp *to_uqp_grp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct usnic_ib_qp_grp, ibqp); +} +#endif /* USNIC_IB_QP_GRP_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c new file mode 100644 index 000000000000..bad985e9df08 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include + +#include +#include + +#include "usnic_common_util.h" +#include "usnic_ib.h" +#include "usnic_ib_qp_grp.h" +#include "usnic_vnic.h" +#include "usnic_ib_verbs.h" +#include "usnic_log.h" + +#define UPDATE_PTR_LEFT(N, P, L) \ +do { \ + L -= (N); \ + P += (N); \ +} while (0) + +static ssize_t usnic_ib_show_fw_ver(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev = + container_of(device, struct usnic_ib_dev, ib_dev.dev); + struct ethtool_drvinfo info; + + mutex_lock(&us_ibdev->usdev_lock); + us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); + mutex_unlock(&us_ibdev->usdev_lock); + + return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version); +} + +static ssize_t usnic_ib_show_board(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev = + container_of(device, struct usnic_ib_dev, ib_dev.dev); + unsigned short subsystem_device_id; + + mutex_lock(&us_ibdev->usdev_lock); + subsystem_device_id = us_ibdev->pdev->subsystem_device; + mutex_unlock(&us_ibdev->usdev_lock); + + return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); +} + +/* + * Report the configuration for this PF + */ +static ssize_t +usnic_ib_show_config(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev; + char *ptr; + unsigned left; + unsigned n; + enum usnic_vnic_res_type res_type; + + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + + /* Buffer space limit is 1 page */ + ptr = buf; + left = PAGE_SIZE; + + mutex_lock(&us_ibdev->usdev_lock); + if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) { + char *busname; + + /* + * bus name seems to come with annoying prefix. + * Remove it if it is predictable + */ + busname = us_ibdev->pdev->bus->name; + if (strncmp(busname, "PCI Bus ", 8) == 0) + busname += 8; + + n = scnprintf(ptr, left, + "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", + us_ibdev->ib_dev.name, + busname, + PCI_SLOT(us_ibdev->pdev->devfn), + PCI_FUNC(us_ibdev->pdev->devfn), + netdev_name(us_ibdev->netdev), + us_ibdev->mac, + atomic_read(&us_ibdev->vf_cnt.refcount)); + UPDATE_PTR_LEFT(n, ptr, left); + + for (res_type = USNIC_VNIC_RES_TYPE_EOL; + res_type < USNIC_VNIC_RES_TYPE_MAX; + res_type++) { + if (us_ibdev->vf_res_cnt[res_type] == 0) + continue; + n = scnprintf(ptr, left, " %d %s%s", + us_ibdev->vf_res_cnt[res_type], + usnic_vnic_res_type_to_str(res_type), + (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ? + "," : ""); + UPDATE_PTR_LEFT(n, ptr, left); + } + n = scnprintf(ptr, left, "\n"); + UPDATE_PTR_LEFT(n, ptr, left); + } else { + n = scnprintf(ptr, left, "%s: no VFs\n", + us_ibdev->ib_dev.name); + UPDATE_PTR_LEFT(n, ptr, left); + } + mutex_unlock(&us_ibdev->usdev_lock); + + return ptr - buf; +} + +static ssize_t +usnic_ib_show_iface(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev; + + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + + return scnprintf(buf, PAGE_SIZE, "%s\n", + netdev_name(us_ibdev->netdev)); +} + +static ssize_t +usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev; + + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", + atomic_read(&us_ibdev->vf_cnt.refcount)); +} + +static ssize_t +usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev; + int qp_per_vf; + + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); + + return scnprintf(buf, PAGE_SIZE, + "%d\n", qp_per_vf); +} + +static ssize_t +usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev; + + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); +} + +static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL); +static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL); +static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL); +static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL); +static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL); +static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL); + +static struct device_attribute *usnic_class_attributes[] = { + &dev_attr_fw_ver, + &dev_attr_board_id, + &dev_attr_config, + &dev_attr_iface, + &dev_attr_max_vf, + &dev_attr_qp_per_vf, + &dev_attr_cq_per_vf, +}; + +struct qpn_attribute { + struct attribute attr; + ssize_t (*show)(struct usnic_ib_qp_grp *, char *buf); +}; + +/* + * Definitions for supporting QPN entries in sysfs + */ +static ssize_t +usnic_ib_qpn_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct usnic_ib_qp_grp *qp_grp; + struct qpn_attribute *qpn_attr; + + qp_grp = container_of(kobj, struct usnic_ib_qp_grp, kobj); + qpn_attr = container_of(attr, struct qpn_attribute, attr); + + return qpn_attr->show(qp_grp, buf); +} + +static const struct sysfs_ops usnic_ib_qpn_sysfs_ops = { + .show = usnic_ib_qpn_attr_show +}; + +#define QPN_ATTR_RO(NAME) \ +struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME) + +static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx); +} + +static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) +{ + int i, j, n; + int left; + char *ptr; + struct usnic_vnic_res_chunk *res_chunk; + struct usnic_fwd_filter_hndl *default_filter_hndl; + struct usnic_vnic_res *vnic_res; + + left = PAGE_SIZE; + ptr = buf; + default_filter_hndl = list_first_entry(&qp_grp->filter_hndls, + struct usnic_fwd_filter_hndl, link); + + n = scnprintf(ptr, left, + "QPN: %d State: (%s) PID: %u VF Idx: %hu Filter ID: 0x%x ", + qp_grp->ibqp.qp_num, + usnic_ib_qp_grp_state_to_string(qp_grp->state), + qp_grp->owner_pid, + usnic_vnic_get_index(qp_grp->vf->vnic), + default_filter_hndl->id); + UPDATE_PTR_LEFT(n, ptr, left); + + for (i = 0; qp_grp->res_chunk_list[i]; i++) { + res_chunk = qp_grp->res_chunk_list[i]; + for (j = 0; j < res_chunk->cnt; j++) { + vnic_res = res_chunk->res[j]; + n = scnprintf(ptr, left, "%s[%d] ", + usnic_vnic_res_type_to_str(vnic_res->type), + vnic_res->vnic_idx); + UPDATE_PTR_LEFT(n, ptr, left); + } + } + + n = scnprintf(ptr, left, "\n"); + UPDATE_PTR_LEFT(n, ptr, left); + + return ptr - buf; +} + +static QPN_ATTR_RO(context); +static QPN_ATTR_RO(summary); + +static struct attribute *usnic_ib_qpn_default_attrs[] = { + &qpn_attr_context.attr, + &qpn_attr_summary.attr, + NULL +}; + +static struct kobj_type usnic_ib_qpn_type = { + .sysfs_ops = &usnic_ib_qpn_sysfs_ops, + .default_attrs = usnic_ib_qpn_default_attrs +}; + +int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev) +{ + int i; + int err; + for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) { + err = device_create_file(&us_ibdev->ib_dev.dev, + usnic_class_attributes[i]); + if (err) { + usnic_err("Failed to create device file %d for %s eith err %d", + i, us_ibdev->ib_dev.name, err); + return -EINVAL; + } + } + + /* create kernel object for looking at individual QPs */ + kobject_get(&us_ibdev->ib_dev.dev.kobj); + us_ibdev->qpn_kobj = kobject_create_and_add("qpn", + &us_ibdev->ib_dev.dev.kobj); + if (us_ibdev->qpn_kobj == NULL) { + kobject_put(&us_ibdev->ib_dev.dev.kobj); + return -ENOMEM; + } + + return 0; +} + +void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev) +{ + int i; + for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) { + device_remove_file(&us_ibdev->ib_dev.dev, + usnic_class_attributes[i]); + } + + kobject_put(us_ibdev->qpn_kobj); +} + +void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp) +{ + struct usnic_ib_dev *us_ibdev; + int err; + + us_ibdev = qp_grp->vf->pf; + + err = kobject_init_and_add(&qp_grp->kobj, &usnic_ib_qpn_type, + kobject_get(us_ibdev->qpn_kobj), + "%d", qp_grp->grp_id); + if (err) { + kobject_put(us_ibdev->qpn_kobj); + return; + } +} + +void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp) +{ + struct usnic_ib_dev *us_ibdev; + + us_ibdev = qp_grp->vf->pf; + + kobject_put(&qp_grp->kobj); + kobject_put(us_ibdev->qpn_kobj); +} diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h new file mode 100644 index 000000000000..0d09b493cd02 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_IB_SYSFS_H_ +#define USNIC_IB_SYSFS_H_ + +#include "usnic_ib.h" + +int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev); +void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev); +void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp); +void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp); + +#endif /* !USNIC_IB_SYSFS_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c new file mode 100644 index 000000000000..8f8dfa2672b0 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include + +#include +#include + +#include "usnic_abi.h" +#include "usnic_ib.h" +#include "usnic_common_util.h" +#include "usnic_ib_qp_grp.h" +#include "usnic_fwd.h" +#include "usnic_log.h" +#include "usnic_uiom.h" +#include "usnic_transport.h" + +#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM + +static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver) +{ + *fw_ver = (u64) *fw_ver_str; +} + +static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, + struct ib_udata *udata) +{ + struct usnic_ib_dev *us_ibdev; + struct usnic_ib_create_qp_resp resp; + struct pci_dev *pdev; + struct vnic_dev_bar *bar; + struct usnic_vnic_res_chunk *chunk; + int i, err; + + memset(&resp, 0, sizeof(resp)); + + us_ibdev = qp_grp->vf->pf; + pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic); + if (!pdev) { + usnic_err("Failed to get pdev of qp_grp %d\n", + qp_grp->grp_id); + return -EFAULT; + } + + bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0); + if (!bar) { + usnic_err("Failed to get bar0 of qp_grp %d vf %s", + qp_grp->grp_id, pci_name(pdev)); + return -EFAULT; + } + + resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic); + resp.bar_bus_addr = bar->bus_addr; + resp.bar_len = bar->len; + resp.transport = qp_grp->transport; + + chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); + if (IS_ERR_OR_NULL(chunk)) { + usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), + qp_grp->grp_id, + PTR_ERR(chunk)); + return chunk ? PTR_ERR(chunk) : -ENOMEM; + } + + WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ); + resp.rq_cnt = chunk->cnt; + for (i = 0; i < chunk->cnt; i++) + resp.rq_idx[i] = chunk->res[i]->vnic_idx; + + chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ); + if (IS_ERR_OR_NULL(chunk)) { + usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ), + qp_grp->grp_id, + PTR_ERR(chunk)); + return chunk ? PTR_ERR(chunk) : -ENOMEM; + } + + WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ); + resp.wq_cnt = chunk->cnt; + for (i = 0; i < chunk->cnt; i++) + resp.wq_idx[i] = chunk->res[i]->vnic_idx; + + chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ); + if (IS_ERR_OR_NULL(chunk)) { + usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ), + qp_grp->grp_id, + PTR_ERR(chunk)); + return chunk ? PTR_ERR(chunk) : -ENOMEM; + } + + WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ); + resp.cq_cnt = chunk->cnt; + for (i = 0; i < chunk->cnt; i++) + resp.cq_idx[i] = chunk->res[i]->vnic_idx; + + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (err) { + usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name); + return err; + } + + return 0; +} + +static struct usnic_ib_qp_grp* +find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, + struct usnic_ib_pd *pd, + enum usnic_transport_type transport, + struct usnic_vnic_res_spec *res_spec) +{ + struct usnic_ib_vf *vf; + struct usnic_vnic *vnic; + struct usnic_ib_qp_grp *qp_grp; + struct device *dev, **dev_list; + int i, found = 0; + + BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock)); + + if (list_empty(&us_ibdev->vf_dev_list)) { + usnic_info("No vfs to allocate\n"); + return NULL; + } + + if (!us_ibdev->link_up) { + usnic_info("Cannot allocate qp b/c PF link is down\n"); + return NULL; + } + + if (usnic_ib_share_vf) { + /* Try to find resouces on a used vf which is in pd */ + dev_list = usnic_uiom_get_dev_list(pd->umem_pd); + for (i = 0; dev_list[i]; i++) { + dev = dev_list[i]; + vf = pci_get_drvdata(to_pci_dev(dev)); + spin_lock(&vf->lock); + vnic = vf->vnic; + if (!usnic_vnic_check_room(vnic, res_spec)) { + usnic_dbg("Found used vnic %s from %s\n", + us_ibdev->ib_dev.name, + pci_name(usnic_vnic_get_pdev( + vnic))); + found = 1; + break; + } + spin_unlock(&vf->lock); + + } + usnic_uiom_free_dev_list(dev_list); + } + + if (!found) { + /* Try to find resources on an unused vf */ + list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) { + spin_lock(&vf->lock); + vnic = vf->vnic; + if (vf->qp_grp_ref_cnt == 0 && + usnic_vnic_check_room(vnic, res_spec) == 0) { + found = 1; + break; + } + spin_unlock(&vf->lock); + } + } + + if (!found) { + usnic_info("No free qp grp found on %s\n", + us_ibdev->ib_dev.name); + return ERR_PTR(-ENOMEM); + } + + qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec, + transport); + spin_unlock(&vf->lock); + if (IS_ERR_OR_NULL(qp_grp)) { + usnic_err("Failed to allocate qp_grp\n"); + return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); + } + + return qp_grp; +} + +static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) +{ + struct usnic_ib_vf *vf = qp_grp->vf; + + WARN_ON(qp_grp->state != IB_QPS_RESET); + + spin_lock(&vf->lock); + usnic_ib_qp_grp_destroy(qp_grp); + spin_unlock(&vf->lock); +} + +static void eth_speed_to_ib_speed(int speed, u8 *active_speed, + u8 *active_width) +{ + if (speed <= 10000) { + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_FDR10; + } else if (speed <= 20000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_DDR; + } else if (speed <= 30000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_QDR; + } else if (speed <= 40000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_FDR10; + } else { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_EDR; + } +} + +/* Start of ib callback functions */ + +enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, + u8 port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +int usnic_ib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); + union ib_gid gid; + struct ethtool_drvinfo info; + struct ethtool_cmd cmd; + int qp_per_vf; + + usnic_dbg("\n"); + mutex_lock(&us_ibdev->usdev_lock); + us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); + us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); + memset(props, 0, sizeof(*props)); + usnic_mac_to_gid(us_ibdev->mac, &gid.raw[0]); + memcpy(&props->sys_image_guid, &gid.global.interface_id, + sizeof(gid.global.interface_id)); + usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver); + props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE; + props->page_size_cap = USNIC_UIOM_PAGE_SIZE; + props->vendor_id = PCI_VENDOR_ID_CISCO; + props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC; + props->hw_ver = us_ibdev->pdev->subsystem_device; + qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); + props->max_qp = qp_per_vf * + atomic_read(&us_ibdev->vf_cnt.refcount); + props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] * + atomic_read(&us_ibdev->vf_cnt.refcount); + props->max_pd = USNIC_UIOM_MAX_PD_CNT; + props->max_mr = USNIC_UIOM_MAX_MR_CNT; + props->local_ca_ack_delay = 0; + props->max_pkeys = 0; + props->atomic_cap = IB_ATOMIC_NONE; + props->masked_atomic_cap = props->atomic_cap; + props->max_qp_rd_atom = 0; + props->max_qp_init_rd_atom = 0; + props->max_res_rd_atom = 0; + props->max_srq = 0; + props->max_srq_wr = 0; + props->max_srq_sge = 0; + props->max_fast_reg_page_list_len = 0; + props->max_mcast_grp = 0; + props->max_mcast_qp_attach = 0; + props->max_total_mcast_qp_attach = 0; + props->max_map_per_fmr = 0; + /* Owned by Userspace + * max_qp_wr, max_sge, max_sge_rd, max_cqe */ + mutex_unlock(&us_ibdev->usdev_lock); + + return 0; +} + +int usnic_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); + struct ethtool_cmd cmd; + + usnic_dbg("\n"); + + mutex_lock(&us_ibdev->usdev_lock); + us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); + memset(props, 0, sizeof(*props)); + + props->lid = 0; + props->lmc = 1; + props->sm_lid = 0; + props->sm_sl = 0; + + if (us_ibdev->link_up) { + props->state = IB_PORT_ACTIVE; + props->phys_state = 5; + } else { + props->state = IB_PORT_DOWN; + props->phys_state = 3; + } + + props->port_cap_flags = 0; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->bad_pkey_cntr = 0; + props->qkey_viol_cntr = 0; + eth_speed_to_ib_speed(cmd.speed, &props->active_speed, + &props->active_width); + props->max_mtu = IB_MTU_4096; + props->active_mtu = iboe_get_mtu(us_ibdev->mtu); + /* Userspace will adjust for hdrs */ + props->max_msg_sz = us_ibdev->mtu; + props->max_vl_num = 1; + mutex_unlock(&us_ibdev->usdev_lock); + + return 0; +} + +int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct usnic_ib_qp_grp *qp_grp; + struct usnic_ib_vf *vf; + int err; + + usnic_dbg("\n"); + + memset(qp_attr, 0, sizeof(*qp_attr)); + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + + qp_grp = to_uqp_grp(qp); + vf = qp_grp->vf; + mutex_lock(&vf->pf->usdev_lock); + usnic_dbg("\n"); + qp_attr->qp_state = qp_grp->state; + qp_attr->cur_qp_state = qp_grp->state; + + switch (qp_grp->ibqp.qp_type) { + case IB_QPT_UD: + qp_attr->qkey = 0; + break; + default: + usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type); + err = -EINVAL; + goto err_out; + } + + mutex_unlock(&vf->pf->usdev_lock); + return 0; + +err_out: + mutex_unlock(&vf->pf->usdev_lock); + return err; +} + +int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); + usnic_dbg("\n"); + + if (index > 1) + return -EINVAL; + + mutex_lock(&us_ibdev->usdev_lock); + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + usnic_mac_to_gid(us_ibdev->mac, &gid->raw[0]); + mutex_unlock(&us_ibdev->usdev_lock); + + return 0; +} + +int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + if (index > 1) + return -EINVAL; + + *pkey = 0xffff; + return 0; +} + +struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct usnic_ib_pd *pd; + void *umem_pd; + + usnic_dbg("\n"); + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + umem_pd = pd->umem_pd = usnic_uiom_alloc_pd(); + if (IS_ERR_OR_NULL(umem_pd)) { + kfree(pd); + return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM); + } + + usnic_info("domain 0x%p allocated for context 0x%p and device %s\n", + pd, context, ibdev->name); + return &pd->ibpd; +} + +int usnic_ib_dealloc_pd(struct ib_pd *pd) +{ + usnic_info("freeing domain 0x%p\n", pd); + + usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd); + kfree(pd); + return 0; +} + +struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + int err; + struct usnic_ib_dev *us_ibdev; + struct usnic_ib_qp_grp *qp_grp; + struct usnic_ib_ucontext *ucontext; + int cq_cnt; + struct usnic_vnic_res_spec res_spec; + + usnic_dbg("\n"); + + ucontext = to_uucontext(pd->uobject->context); + us_ibdev = to_usdev(pd->device); + + if (init_attr->qp_type != IB_QPT_UD) { + usnic_err("%s asked to make a non-UD QP: %d\n", + us_ibdev->ib_dev.name, init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + + mutex_lock(&us_ibdev->usdev_lock); + cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2, + res_spec = min_transport_spec[USNIC_DEFAULT_TRANSPORT]; + usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt); + qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd), + USNIC_DEFAULT_TRANSPORT, + &res_spec); + if (IS_ERR_OR_NULL(qp_grp)) { + err = (qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); + goto out_release_mutex; + } + + err = usnic_ib_fill_create_qp_resp(qp_grp, udata); + if (err) { + err = -EBUSY; + goto out_release_qp_grp; + } + + qp_grp->ctx = ucontext; + list_add_tail(&qp_grp->link, &ucontext->qp_grp_list); + usnic_ib_log_vf(qp_grp->vf); + mutex_unlock(&us_ibdev->usdev_lock); + return &qp_grp->ibqp; + +out_release_qp_grp: + qp_grp_destroy(qp_grp); +out_release_mutex: + mutex_unlock(&us_ibdev->usdev_lock); + return ERR_PTR(err); +} + +int usnic_ib_destroy_qp(struct ib_qp *qp) +{ + struct usnic_ib_qp_grp *qp_grp; + struct usnic_ib_vf *vf; + + usnic_dbg("\n"); + + qp_grp = to_uqp_grp(qp); + vf = qp_grp->vf; + mutex_lock(&vf->pf->usdev_lock); + if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) { + usnic_err("Failed to move qp grp %u to reset\n", + qp_grp->grp_id); + } + + list_del(&qp_grp->link); + qp_grp_destroy(qp_grp); + mutex_unlock(&vf->pf->usdev_lock); + + return 0; +} + +int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct usnic_ib_qp_grp *qp_grp; + int status; + usnic_dbg("\n"); + + qp_grp = to_uqp_grp(ibqp); + + /* TODO: Future Support All States */ + mutex_lock(&qp_grp->vf->pf->usdev_lock); + if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) { + status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, + &qp_grp->filters[DFLT_FILTER_IDX]); + } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) { + status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL); + } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) { + status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL); + } else { + usnic_err("Unexpected combination mask: %u state: %u\n", + attr_mask & IB_QP_STATE, attr->qp_state); + status = -EINVAL; + } + + mutex_unlock(&qp_grp->vf->pf->usdev_lock); + return status; +} + +struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct ib_cq *cq; + + usnic_dbg("\n"); + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return ERR_PTR(-EBUSY); + + return cq; +} + +int usnic_ib_destroy_cq(struct ib_cq *cq) +{ + usnic_dbg("\n"); + kfree(cq); + return 0; +} + +struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct usnic_ib_mr *mr; + int err; + + usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start, + virt_addr, length); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (IS_ERR_OR_NULL(mr)) + return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM); + + mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length, + access_flags, 0); + if (IS_ERR_OR_NULL(mr->umem)) { + err = (mr->umem) ? PTR_ERR(mr->umem) : -EFAULT; + goto err_free; + } + + mr->ibmr.lkey = mr->ibmr.rkey = 0; + return &mr->ibmr; + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +int usnic_ib_dereg_mr(struct ib_mr *ibmr) +{ + struct usnic_ib_mr *mr = to_umr(ibmr); + + usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); + + usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing); + kfree(mr); + return 0; +} + +struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct usnic_ib_ucontext *context; + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); + usnic_dbg("\n"); + + context = kmalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&context->qp_grp_list); + mutex_lock(&us_ibdev->usdev_lock); + list_add_tail(&context->link, &us_ibdev->ctx_list); + mutex_unlock(&us_ibdev->usdev_lock); + + return &context->ibucontext; +} + +int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct usnic_ib_ucontext *context = to_uucontext(ibcontext); + struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device); + usnic_dbg("\n"); + + mutex_lock(&us_ibdev->usdev_lock); + BUG_ON(!list_empty(&context->qp_grp_list)); + list_del(&context->link); + mutex_unlock(&us_ibdev->usdev_lock); + kfree(context); + return 0; +} + +int usnic_ib_mmap(struct ib_ucontext *context, + struct vm_area_struct *vma) +{ + struct usnic_ib_ucontext *uctx = to_ucontext(context); + struct usnic_ib_dev *us_ibdev; + struct usnic_ib_qp_grp *qp_grp; + struct usnic_ib_vf *vf; + struct vnic_dev_bar *bar; + dma_addr_t bus_addr; + unsigned int len; + unsigned int vfid; + + usnic_dbg("\n"); + + us_ibdev = to_usdev(context->device); + vma->vm_flags |= VM_IO; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vfid = vma->vm_pgoff; + usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n", + vma->vm_pgoff, PAGE_SHIFT, vfid); + + mutex_lock(&us_ibdev->usdev_lock); + list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) { + vf = qp_grp->vf; + if (usnic_vnic_get_index(vf->vnic) == vfid) { + bar = usnic_vnic_get_bar(vf->vnic, 0); + if ((vma->vm_end - vma->vm_start) != bar->len) { + usnic_err("Bar0 Len %lu - Request map %lu\n", + bar->len, + vma->vm_end - vma->vm_start); + mutex_unlock(&us_ibdev->usdev_lock); + return -EINVAL; + } + bus_addr = bar->bus_addr; + len = bar->len; + usnic_dbg("bus: %pa vaddr: %p size: %ld\n", + &bus_addr, bar->vaddr, bar->len); + mutex_unlock(&us_ibdev->usdev_lock); + + return remap_pfn_range(vma, + vma->vm_start, + bus_addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } + } + + mutex_unlock(&us_ibdev->usdev_lock); + usnic_err("No VF %u found\n", vfid); + return -EINVAL; +} + +/* In ib callbacks section - Start of stub funcs */ +struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + usnic_dbg("\n"); + return ERR_PTR(-EPERM); +} + +int usnic_ib_destroy_ah(struct ib_ah *ah) +{ + usnic_dbg("\n"); + return -EINVAL; +} + +int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + usnic_dbg("\n"); + return -EINVAL; +} + +int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + usnic_dbg("\n"); + return -EINVAL; +} + +int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *wc) +{ + usnic_dbg("\n"); + return -EINVAL; +} + +int usnic_ib_req_notify_cq(struct ib_cq *cq, + enum ib_cq_notify_flags flags) +{ + usnic_dbg("\n"); + return -EINVAL; +} + +struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc) +{ + usnic_dbg("\n"); + return ERR_PTR(-ENOMEM); +} + + +/* In ib callbacks section - End of stub funcs */ +/* End of ib callbacks section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h new file mode 100644 index 000000000000..bb864f5aed70 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_IB_VERBS_H_ +#define USNIC_IB_VERBS_H_ + +#include "usnic_ib.h" + +enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, + u8 port_num); +int usnic_ib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props); +int usnic_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); +int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); +int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey); +struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata); +int usnic_ib_dealloc_pd(struct ib_pd *pd); +struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int usnic_ib_destroy_qp(struct ib_qp *qp); +int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *context, + struct ib_udata *udata); +int usnic_ib_destroy_cq(struct ib_cq *cq); +struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int usnic_ib_dereg_mr(struct ib_mr *ibmr); +struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata); +int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); +int usnic_ib_mmap(struct ib_ucontext *context, + struct vm_area_struct *vma); +struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr); +int usnic_ib_destroy_ah(struct ib_ah *ah); +int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *wc); +int usnic_ib_req_notify_cq(struct ib_cq *cq, + enum ib_cq_notify_flags flags); +struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc); +#endif /* !USNIC_IB_VERBS_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_log.h b/drivers/infiniband/hw/usnic/usnic_log.h new file mode 100644 index 000000000000..75777a66c684 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_log.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_LOG_H_ +#define USNIC_LOG_H_ + +#include "usnic.h" + +extern unsigned int usnic_log_lvl; + +#define USNIC_LOG_LVL_NONE (0) +#define USNIC_LOG_LVL_ERR (1) +#define USNIC_LOG_LVL_INFO (2) +#define USNIC_LOG_LVL_DBG (3) + +#define usnic_printk(lvl, args...) \ + do { \ + printk(lvl "%s:%s:%d: ", DRV_NAME, __func__, \ + __LINE__); \ + printk(args); \ + } while (0) + +#define usnic_dbg(args...) \ + do { \ + if (unlikely(usnic_log_lvl >= USNIC_LOG_LVL_DBG)) { \ + usnic_printk(KERN_INFO, args); \ + } \ +} while (0) + +#define usnic_info(args...) \ +do { \ + if (usnic_log_lvl >= USNIC_LOG_LVL_INFO) { \ + usnic_printk(KERN_INFO, args); \ + } \ +} while (0) + +#define usnic_err(args...) \ + do { \ + if (usnic_log_lvl >= USNIC_LOG_LVL_ERR) { \ + usnic_printk(KERN_ERR, args); \ + } \ + } while (0) +#endif /* !USNIC_LOG_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c new file mode 100644 index 000000000000..723bd6c3a8f8 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_transport.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "usnic_transport.h" +#include "usnic_log.h" + +/* ROCE */ +static unsigned long *roce_bitmap; +static u16 roce_next_port = 1; +#define ROCE_BITMAP_SZ ((1 << (8 /*CHAR_BIT*/ * sizeof(u16)))/8 /*CHAR BIT*/) +static DEFINE_SPINLOCK(roce_bitmap_lock); + +static const char *transport_to_str(enum usnic_transport_type type) +{ + switch (type) { + case USNIC_TRANSPORT_UNKNOWN: + return "Unknown"; + case USNIC_TRANSPORT_ROCE_CUSTOM: + return "roce custom"; + case USNIC_TRANSPORT_MAX: + return "Max?"; + default: + return "Not known"; + } +} + +/* + * reserve a port number. if "0" specified, we will try to pick one + * starting at roce_next_port. roce_next_port will take on the values + * 1..4096 + */ +u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num) +{ + if (type == USNIC_TRANSPORT_ROCE_CUSTOM) { + spin_lock(&roce_bitmap_lock); + if (!port_num) { + port_num = bitmap_find_next_zero_area(roce_bitmap, + ROCE_BITMAP_SZ, + roce_next_port /* start */, + 1 /* nr */, + 0 /* align */); + roce_next_port = (port_num & 4095) + 1; + } else if (test_bit(port_num, roce_bitmap)) { + usnic_err("Failed to allocate port for %s\n", + transport_to_str(type)); + spin_unlock(&roce_bitmap_lock); + goto out_fail; + } + bitmap_set(roce_bitmap, port_num, 1); + spin_unlock(&roce_bitmap_lock); + } else { + usnic_err("Failed to allocate port - transport %s unsupported\n", + transport_to_str(type)); + goto out_fail; + } + + usnic_dbg("Allocating port %hu for %s\n", port_num, + transport_to_str(type)); + return port_num; + +out_fail: + return 0; +} + +void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num) +{ + if (type == USNIC_TRANSPORT_ROCE_CUSTOM) { + spin_lock(&roce_bitmap_lock); + if (!port_num) { + usnic_err("Unreserved unvalid port num 0 for %s\n", + transport_to_str(type)); + goto out_roce_custom; + } + + if (!test_bit(port_num, roce_bitmap)) { + usnic_err("Unreserving invalid %hu for %s\n", + port_num, + transport_to_str(type)); + goto out_roce_custom; + } + bitmap_clear(roce_bitmap, port_num, 1); + usnic_dbg("Freeing port %hu for %s\n", port_num, + transport_to_str(type)); +out_roce_custom: + spin_unlock(&roce_bitmap_lock); + } else { + usnic_err("Freeing invalid port %hu for %d\n", port_num, type); + } +} + +int usnic_transport_init(void) +{ + roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL); + if (!roce_bitmap) { + usnic_err("Failed to allocate bit map"); + return -ENOMEM; + } + + /* Do not ever allocate bit 0, hence set it here */ + bitmap_set(roce_bitmap, 0, 1); + return 0; +} + +void usnic_transport_fini(void) +{ + kfree(roce_bitmap); +} diff --git a/drivers/infiniband/hw/usnic/usnic_transport.h b/drivers/infiniband/hw/usnic/usnic_transport.h new file mode 100644 index 000000000000..091fdaf4d25a --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_transport.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_TRANSPORT_H_ +#define USNIC_TRANSPORT_H_ + +#include "usnic_abi.h" + +u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num); +void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num); +int usnic_transport_init(void); +void usnic_transport_fini(void); +#endif /* !USNIC_TRANSPORT_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c new file mode 100644 index 000000000000..c841a752dbd0 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -0,0 +1,603 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "usnic_log.h" +#include "usnic_uiom.h" +#include "usnic_uiom_interval_tree.h" + +static struct workqueue_struct *usnic_uiom_wq; + +#define USNIC_UIOM_PAGE_CHUNK \ + ((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list)) /\ + ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \ + (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0])) + +static void usnic_uiom_reg_account(struct work_struct *work) +{ + struct usnic_uiom_reg *umem = container_of(work, + struct usnic_uiom_reg, work); + + down_write(&umem->mm->mmap_sem); + umem->mm->locked_vm -= umem->diff; + up_write(&umem->mm->mmap_sem); + mmput(umem->mm); + kfree(umem); +} + +static int usnic_uiom_dma_fault(struct iommu_domain *domain, + struct device *dev, + unsigned long iova, int flags, + void *token) +{ + usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n", + dev_name(dev), + domain, iova, flags); + return -ENOSYS; +} + +static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) +{ + struct usnic_uiom_chunk *chunk, *tmp; + struct page *page; + int i; + dma_addr_t pa; + + list_for_each_entry_safe(chunk, tmp, chunk_list, list) { + for (i = 0; i < chunk->nents; i++) { + page = sg_page(&chunk->page_list[i]); + pa = sg_phys(&chunk->page_list[i]); + if (dirty) + set_page_dirty_lock(page); + put_page(page); + usnic_dbg("pa: %pa\n", &pa); + } + kfree(chunk); + } +} + +static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, + int dmasync, struct list_head *chunk_list) +{ + struct page **page_list; + struct usnic_uiom_chunk *chunk; + unsigned long locked; + unsigned long lock_limit; + unsigned long cur_base; + unsigned long npages; + int ret; + int off; + int i; + int flags; + dma_addr_t pa; + DEFINE_DMA_ATTRS(attrs); + + if (dmasync) + dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + + if (!can_do_mlock()) + return -EPERM; + + INIT_LIST_HEAD(chunk_list); + + page_list = (struct page **) __get_free_page(GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + locked = npages + current->mm->locked_vm; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + ret = -ENOMEM; + goto out; + } + + flags = IOMMU_READ | IOMMU_CACHE; + flags |= (writable) ? IOMMU_WRITE : 0; + cur_base = addr & PAGE_MASK; + ret = 0; + + while (npages) { + ret = get_user_pages(current, current->mm, cur_base, + min_t(unsigned long, npages, + PAGE_SIZE / sizeof(struct page *)), + 1, !writable, page_list, NULL); + + if (ret < 0) + goto out; + + npages -= ret; + off = 0; + + while (ret) { + chunk = kmalloc(sizeof(*chunk) + + sizeof(struct scatterlist) * + min_t(int, ret, USNIC_UIOM_PAGE_CHUNK), + GFP_KERNEL); + if (!chunk) { + ret = -ENOMEM; + goto out; + } + + chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK); + sg_init_table(chunk->page_list, chunk->nents); + for (i = 0; i < chunk->nents; ++i) { + sg_set_page(&chunk->page_list[i], + page_list[i + off], + PAGE_SIZE, 0); + pa = sg_phys(&chunk->page_list[i]); + usnic_dbg("va: 0x%lx pa: %pa\n", + cur_base + i*PAGE_SIZE, &pa); + } + cur_base += chunk->nents * PAGE_SIZE; + ret -= chunk->nents; + off += chunk->nents; + list_add_tail(&chunk->list, chunk_list); + } + + ret = 0; + } + +out: + if (ret < 0) + usnic_uiom_put_pages(chunk_list, 0); + else + current->mm->locked_vm = locked; + + up_write(¤t->mm->mmap_sem); + free_page((unsigned long) page_list); + return ret; +} + +static void usnic_uiom_unmap_sorted_intervals(struct list_head *intervals, + struct usnic_uiom_pd *pd) +{ + struct usnic_uiom_interval_node *interval, *tmp; + long unsigned va, size; + + list_for_each_entry_safe(interval, tmp, intervals, link) { + va = interval->start << PAGE_SHIFT; + size = ((interval->last - interval->start) + 1) << PAGE_SHIFT; + while (size > 0) { + /* Workaround for RH 970401 */ + usnic_dbg("va 0x%lx size 0x%lx", va, PAGE_SIZE); + iommu_unmap(pd->domain, va, PAGE_SIZE); + va += PAGE_SIZE; + size -= PAGE_SIZE; + } + } +} + +static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd, + struct usnic_uiom_reg *uiomr, + int dirty) +{ + int npages; + unsigned long vpn_start, vpn_last; + struct usnic_uiom_interval_node *interval, *tmp; + int writable = 0; + LIST_HEAD(rm_intervals); + + npages = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; + vpn_start = (uiomr->va & PAGE_MASK) >> PAGE_SHIFT; + vpn_last = vpn_start + npages - 1; + + spin_lock(&pd->lock); + usnic_uiom_remove_interval(&pd->rb_root, vpn_start, + vpn_last, &rm_intervals); + usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd); + + list_for_each_entry_safe(interval, tmp, &rm_intervals, link) { + if (interval->flags & IOMMU_WRITE) + writable = 1; + list_del(&interval->link); + kfree(interval); + } + + usnic_uiom_put_pages(&uiomr->chunk_list, dirty & writable); + spin_unlock(&pd->lock); +} + +static int usnic_uiom_map_sorted_intervals(struct list_head *intervals, + struct usnic_uiom_reg *uiomr) +{ + int i, err; + size_t size; + struct usnic_uiom_chunk *chunk; + struct usnic_uiom_interval_node *interval_node; + dma_addr_t pa; + dma_addr_t pa_start = 0; + dma_addr_t pa_end = 0; + long int va_start = -EINVAL; + struct usnic_uiom_pd *pd = uiomr->pd; + long int va = uiomr->va & PAGE_MASK; + int flags = IOMMU_READ | IOMMU_CACHE; + + flags |= (uiomr->writable) ? IOMMU_WRITE : 0; + chunk = list_first_entry(&uiomr->chunk_list, struct usnic_uiom_chunk, + list); + list_for_each_entry(interval_node, intervals, link) { +iter_chunk: + for (i = 0; i < chunk->nents; i++, va += PAGE_SIZE) { + pa = sg_phys(&chunk->page_list[i]); + if ((va >> PAGE_SHIFT) < interval_node->start) + continue; + + if ((va >> PAGE_SHIFT) == interval_node->start) { + /* First page of the interval */ + va_start = va; + pa_start = pa; + pa_end = pa; + } + + WARN_ON(va_start == -EINVAL); + + if ((pa_end + PAGE_SIZE != pa) && + (pa != pa_start)) { + /* PAs are not contiguous */ + size = pa_end - pa_start + PAGE_SIZE; + usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", + va_start, &pa_start, size, flags); + err = iommu_map(pd->domain, va_start, pa_start, + size, flags); + if (err) { + usnic_err("Failed to map va 0x%lx pa 0x%pa size 0x%zx with err %d\n", + va_start, &pa_start, size, err); + goto err_out; + } + va_start = va; + pa_start = pa; + pa_end = pa; + } + + if ((va >> PAGE_SHIFT) == interval_node->last) { + /* Last page of the interval */ + size = pa - pa_start + PAGE_SIZE; + usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", + va_start, &pa_start, size, flags); + err = iommu_map(pd->domain, va_start, pa_start, + size, flags); + if (err) { + usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", + va_start, &pa_start, size, err); + goto err_out; + } + break; + } + + if (pa != pa_start) + pa_end += PAGE_SIZE; + } + + if (i == chunk->nents) { + /* + * Hit last entry of the chunk, + * hence advance to next chunk + */ + chunk = list_first_entry(&chunk->list, + struct usnic_uiom_chunk, + list); + goto iter_chunk; + } + } + + return 0; + +err_out: + usnic_uiom_unmap_sorted_intervals(intervals, pd); + return err; +} + +struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, + unsigned long addr, size_t size, + int writable, int dmasync) +{ + struct usnic_uiom_reg *uiomr; + unsigned long va_base, vpn_start, vpn_last; + unsigned long npages; + int offset, err; + LIST_HEAD(sorted_diff_intervals); + + /* + * Intel IOMMU map throws an error if a translation entry is + * changed from read to write. This module may not unmap + * and then remap the entry after fixing the permission + * b/c this open up a small windows where hw DMA may page fault + * Hence, make all entries to be writable. + */ + writable = 1; + + va_base = addr & PAGE_MASK; + offset = addr & ~PAGE_MASK; + npages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT; + vpn_start = (addr & PAGE_MASK) >> PAGE_SHIFT; + vpn_last = vpn_start + npages - 1; + + uiomr = kmalloc(sizeof(*uiomr), GFP_KERNEL); + if (!uiomr) + return ERR_PTR(-ENOMEM); + + uiomr->va = va_base; + uiomr->offset = offset; + uiomr->length = size; + uiomr->writable = writable; + uiomr->pd = pd; + + err = usnic_uiom_get_pages(addr, size, writable, dmasync, + &uiomr->chunk_list); + if (err) { + usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n", + vpn_start, vpn_last, err); + goto out_free_uiomr; + } + + spin_lock(&pd->lock); + err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last, + (writable) ? IOMMU_WRITE : 0, + IOMMU_WRITE, + &pd->rb_root, + &sorted_diff_intervals); + if (err) { + usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n", + vpn_start, vpn_last, err); + goto out_put_pages; + } + + err = usnic_uiom_map_sorted_intervals(&sorted_diff_intervals, uiomr); + if (err) { + usnic_err("Failed map interval vpn [0x%lx,0x%lx] err %d\n", + vpn_start, vpn_last, err); + goto out_put_intervals; + + } + + err = usnic_uiom_insert_interval(&pd->rb_root, vpn_start, vpn_last, + (writable) ? IOMMU_WRITE : 0); + if (err) { + usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n", + vpn_start, vpn_last, err); + goto out_unmap_intervals; + } + + usnic_uiom_put_interval_set(&sorted_diff_intervals); + spin_unlock(&pd->lock); + + return uiomr; + +out_unmap_intervals: + usnic_uiom_unmap_sorted_intervals(&sorted_diff_intervals, pd); +out_put_intervals: + usnic_uiom_put_interval_set(&sorted_diff_intervals); +out_put_pages: + usnic_uiom_put_pages(&uiomr->chunk_list, 0); + spin_unlock(&pd->lock); +out_free_uiomr: + kfree(uiomr); + return ERR_PTR(err); +} + +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) +{ + struct mm_struct *mm; + unsigned long diff; + + __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); + + mm = get_task_mm(current); + if (!mm) { + kfree(uiomr); + return; + } + + diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; + + /* + * We may be called with the mm's mmap_sem already held. This + * can happen when a userspace munmap() is the call that drops + * the last reference to our file and calls our release + * method. If there are memory regions to destroy, we'll end + * up here and not be able to take the mmap_sem. In that case + * we defer the vm_locked accounting to the system workqueue. + */ + if (closing) { + if (!down_write_trylock(&mm->mmap_sem)) { + INIT_WORK(&uiomr->work, usnic_uiom_reg_account); + uiomr->mm = mm; + uiomr->diff = diff; + + queue_work(usnic_uiom_wq, &uiomr->work); + return; + } + } else + down_write(&mm->mmap_sem); + + current->mm->locked_vm -= diff; + up_write(&mm->mmap_sem); + mmput(mm); + kfree(uiomr); +} + +struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) +{ + struct usnic_uiom_pd *pd; + void *domain; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + pd->domain = domain = iommu_domain_alloc(&pci_bus_type); + if (IS_ERR_OR_NULL(domain)) { + usnic_err("Failed to allocate IOMMU domain with err %ld\n", + PTR_ERR(pd->domain)); + kfree(pd); + return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM); + } + + iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL); + + spin_lock_init(&pd->lock); + INIT_LIST_HEAD(&pd->devs); + + return pd; +} + +void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd) +{ + iommu_domain_free(pd->domain); + kfree(pd); +} + +int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev) +{ + struct usnic_uiom_dev *uiom_dev; + int err; + + uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_KERNEL); + if (!uiom_dev) + return -ENOMEM; + uiom_dev->dev = dev; + + err = iommu_attach_device(pd->domain, dev); + if (err) + goto out_free_dev; + + if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) { + usnic_err("IOMMU of %s does not support cache coherency\n", + dev_name(dev)); + err = -EINVAL; + goto out_detach_device; + } + + spin_lock(&pd->lock); + list_add_tail(&uiom_dev->link, &pd->devs); + pd->dev_cnt++; + spin_unlock(&pd->lock); + + return 0; + +out_detach_device: + iommu_detach_device(pd->domain, dev); +out_free_dev: + kfree(uiom_dev); + return err; +} + +void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, struct device *dev) +{ + struct usnic_uiom_dev *uiom_dev; + int found = 0; + + spin_lock(&pd->lock); + list_for_each_entry(uiom_dev, &pd->devs, link) { + if (uiom_dev->dev == dev) { + found = 1; + break; + } + } + + if (!found) { + usnic_err("Unable to free dev %s - not found\n", + dev_name(dev)); + spin_unlock(&pd->lock); + return; + } + + list_del(&uiom_dev->link); + pd->dev_cnt--; + spin_unlock(&pd->lock); + + return iommu_detach_device(pd->domain, dev); +} + +struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd) +{ + struct usnic_uiom_dev *uiom_dev; + struct device **devs; + int i = 0; + + spin_lock(&pd->lock); + devs = kcalloc(pd->dev_cnt + 1, sizeof(*devs), GFP_ATOMIC); + if (!devs) { + devs = ERR_PTR(-ENOMEM); + goto out; + } + + list_for_each_entry(uiom_dev, &pd->devs, link) { + devs[i++] = uiom_dev->dev; + } +out: + spin_unlock(&pd->lock); + return devs; +} + +void usnic_uiom_free_dev_list(struct device **devs) +{ + kfree(devs); +} + +int usnic_uiom_init(char *drv_name) +{ + if (!iommu_present(&pci_bus_type)) { + usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n"); + return -EPERM; + } + + usnic_uiom_wq = create_workqueue(drv_name); + if (!usnic_uiom_wq) { + usnic_err("Unable to alloc wq for drv %s\n", drv_name); + return -ENOMEM; + } + + return 0; +} + +void usnic_uiom_fini(void) +{ + flush_workqueue(usnic_uiom_wq); + destroy_workqueue(usnic_uiom_wq); +} diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h new file mode 100644 index 000000000000..70440996e8f2 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_UIOM_H_ +#define USNIC_UIOM_H_ + +#include +#include + +#include "usnic_uiom_interval_tree.h" + +#define USNIC_UIOM_READ (1) +#define USNIC_UIOM_WRITE (2) + +#define USNIC_UIOM_MAX_PD_CNT (1000) +#define USNIC_UIOM_MAX_MR_CNT (1000000) +#define USNIC_UIOM_MAX_MR_SIZE (~0UL) +#define USNIC_UIOM_PAGE_SIZE (PAGE_SIZE) + +struct usnic_uiom_dev { + struct device *dev; + struct list_head link; +}; + +struct usnic_uiom_pd { + struct iommu_domain *domain; + spinlock_t lock; + struct rb_root rb_root; + struct list_head devs; + int dev_cnt; +}; + +struct usnic_uiom_reg { + struct usnic_uiom_pd *pd; + unsigned long va; + size_t length; + int offset; + int page_size; + int writable; + struct list_head chunk_list; + struct work_struct work; + struct mm_struct *mm; + unsigned long diff; +}; + +struct usnic_uiom_chunk { + struct list_head list; + int nents; + struct scatterlist page_list[0]; +}; + +struct usnic_uiom_pd *usnic_uiom_alloc_pd(void); +void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd); +int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev); +void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, + struct device *dev); +struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd); +void usnic_uiom_free_dev_list(struct device **devs); +struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, + unsigned long addr, size_t size, + int access, int dmasync); +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing); +int usnic_uiom_init(char *drv_name); +void usnic_uiom_fini(void); +#endif /* USNIC_UIOM_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c new file mode 100644 index 000000000000..7e1dafccb11e --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c @@ -0,0 +1,237 @@ +#include +#include +#include +#include +#include + +#include +#include "usnic_uiom_interval_tree.h" + +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) + +#define MAKE_NODE(node, start, end, ref_cnt, flags, err, err_out) \ + do { \ + node = usnic_uiom_interval_node_alloc(start, \ + end, ref_cnt, flags); \ + if (!node) { \ + err = -ENOMEM; \ + goto err_out; \ + } \ + } while (0) + +#define MARK_FOR_ADD(node, list) (list_add_tail(&node->link, list)) + +#define MAKE_NODE_AND_APPEND(node, start, end, ref_cnt, flags, err, \ + err_out, list) \ + do { \ + MAKE_NODE(node, start, end, \ + ref_cnt, flags, err, \ + err_out); \ + MARK_FOR_ADD(node, list); \ + } while (0) + +#define FLAGS_EQUAL(flags1, flags2, mask) \ + (((flags1) & (mask)) == ((flags2) & (mask))) + +static struct usnic_uiom_interval_node* +usnic_uiom_interval_node_alloc(long int start, long int last, int ref_cnt, + int flags) +{ + struct usnic_uiom_interval_node *interval = kzalloc(sizeof(*interval), + GFP_ATOMIC); + if (!interval) + return NULL; + + interval->start = start; + interval->last = last; + interval->flags = flags; + interval->ref_cnt = ref_cnt; + + return interval; +} + +static int interval_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct usnic_uiom_interval_node *node_a, *node_b; + + node_a = list_entry(a, struct usnic_uiom_interval_node, link); + node_b = list_entry(b, struct usnic_uiom_interval_node, link); + + /* long to int */ + if (node_a->start < node_b->start) + return -1; + else if (node_a->start > node_b->start) + return 1; + + return 0; +} + +static void +find_intervals_intersection_sorted(struct rb_root *root, unsigned long start, + unsigned long last, + struct list_head *list) +{ + struct usnic_uiom_interval_node *node; + + INIT_LIST_HEAD(list); + + for (node = usnic_uiom_interval_tree_iter_first(root, start, last); + node; + node = usnic_uiom_interval_tree_iter_next(node, start, last)) + list_add_tail(&node->link, list); + + list_sort(NULL, list, interval_cmp); +} + +int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last, + int flags, int flag_mask, + struct rb_root *root, + struct list_head *diff_set) +{ + struct usnic_uiom_interval_node *interval, *tmp; + int err = 0; + long int pivot = start; + LIST_HEAD(intersection_set); + + INIT_LIST_HEAD(diff_set); + + find_intervals_intersection_sorted(root, start, last, + &intersection_set); + + list_for_each_entry(interval, &intersection_set, link) { + if (pivot < interval->start) { + MAKE_NODE_AND_APPEND(tmp, pivot, interval->start - 1, + 1, flags, err, err_out, + diff_set); + pivot = interval->start; + } + + /* + * Invariant: Set [start, pivot] is either in diff_set or root, + * but not in both. + */ + + if (pivot > interval->last) { + continue; + } else if (pivot <= interval->last && + FLAGS_EQUAL(interval->flags, flags, + flag_mask)) { + pivot = interval->last + 1; + } + } + + if (pivot <= last) + MAKE_NODE_AND_APPEND(tmp, pivot, last, 1, flags, err, err_out, + diff_set); + + return 0; + +err_out: + list_for_each_entry_safe(interval, tmp, diff_set, link) { + list_del(&interval->link); + kfree(interval); + } + + return err; +} + +void usnic_uiom_put_interval_set(struct list_head *intervals) +{ + struct usnic_uiom_interval_node *interval, *tmp; + list_for_each_entry_safe(interval, tmp, intervals, link) + kfree(interval); +} + +int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start, + unsigned long last, int flags) +{ + struct usnic_uiom_interval_node *interval, *tmp; + unsigned long istart, ilast; + int iref_cnt, iflags; + unsigned long lpivot = start; + int err = 0; + LIST_HEAD(to_add); + LIST_HEAD(intersection_set); + + find_intervals_intersection_sorted(root, start, last, + &intersection_set); + + list_for_each_entry(interval, &intersection_set, link) { + /* + * Invariant - lpivot is the left edge of next interval to be + * inserted + */ + istart = interval->start; + ilast = interval->last; + iref_cnt = interval->ref_cnt; + iflags = interval->flags; + + if (istart < lpivot) { + MAKE_NODE_AND_APPEND(tmp, istart, lpivot - 1, iref_cnt, + iflags, err, err_out, &to_add); + } else if (istart > lpivot) { + MAKE_NODE_AND_APPEND(tmp, lpivot, istart - 1, 1, flags, + err, err_out, &to_add); + lpivot = istart; + } else { + lpivot = istart; + } + + if (ilast > last) { + MAKE_NODE_AND_APPEND(tmp, lpivot, last, iref_cnt + 1, + iflags | flags, err, err_out, + &to_add); + MAKE_NODE_AND_APPEND(tmp, last + 1, ilast, iref_cnt, + iflags, err, err_out, &to_add); + } else { + MAKE_NODE_AND_APPEND(tmp, lpivot, ilast, iref_cnt + 1, + iflags | flags, err, err_out, + &to_add); + } + + lpivot = ilast + 1; + } + + if (lpivot <= last) + MAKE_NODE_AND_APPEND(tmp, lpivot, last, 1, flags, err, err_out, + &to_add); + + list_for_each_entry_safe(interval, tmp, &intersection_set, link) { + usnic_uiom_interval_tree_remove(interval, root); + kfree(interval); + } + + list_for_each_entry(interval, &to_add, link) + usnic_uiom_interval_tree_insert(interval, root); + + return 0; + +err_out: + list_for_each_entry_safe(interval, tmp, &to_add, link) + kfree(interval); + + return err; +} + +void usnic_uiom_remove_interval(struct rb_root *root, unsigned long start, + unsigned long last, struct list_head *removed) +{ + struct usnic_uiom_interval_node *interval; + + for (interval = usnic_uiom_interval_tree_iter_first(root, start, last); + interval; + interval = usnic_uiom_interval_tree_iter_next(interval, + start, + last)) { + if (--interval->ref_cnt == 0) + list_add_tail(&interval->link, removed); + } + + list_for_each_entry(interval, removed, link) + usnic_uiom_interval_tree_remove(interval, root); +} + +INTERVAL_TREE_DEFINE(struct usnic_uiom_interval_node, rb, + unsigned long, __subtree_last, + START, LAST, , usnic_uiom_interval_tree) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h new file mode 100644 index 000000000000..030ba6e68a52 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_UIOM_INTERVAL_TREE_H_ +#define USNIC_UIOM_INTERVAL_TREE_H_ + +#include +#include + +struct usnic_uiom_interval_node { + struct rb_node rb; + struct list_head link; + unsigned long start; + unsigned long last; + unsigned long __subtree_last; + unsigned int ref_cnt; + int flags; +}; + +extern void +usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node, + struct rb_root *root); +extern void +usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node, + struct rb_root *root); +extern struct usnic_uiom_interval_node * +usnic_uiom_interval_tree_iter_first(struct rb_root *root, + unsigned long start, + unsigned long last); +extern struct usnic_uiom_interval_node * +usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node, + unsigned long start, unsigned long last); +/* + * Inserts {start...last} into {root}. If there are overlaps, + * nodes will be broken up and merged + */ +int usnic_uiom_insert_interval(struct rb_root *root, + unsigned long start, unsigned long last, + int flags); +/* + * Removed {start...last} from {root}. The nodes removed are returned in + * 'removed.' The caller is responsibile for freeing memory of nodes in + * 'removed.' + */ +void usnic_uiom_remove_interval(struct rb_root *root, + unsigned long start, unsigned long last, + struct list_head *removed); +/* + * Returns {start...last} - {root} (relative complement of {start...last} in + * {root}) in diff_set sorted ascendingly + */ +int usnic_uiom_get_intervals_diff(unsigned long start, + unsigned long last, int flags, + int flag_mask, + struct rb_root *root, + struct list_head *diff_set); +/* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */ +void usnic_uiom_put_interval_set(struct list_head *intervals); +#endif /* USNIC_UIOM_INTERVAL_TREE_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c new file mode 100644 index 000000000000..656b88c39eda --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_vnic.c @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "usnic_ib.h" +#include "vnic_resource.h" +#include "usnic_log.h" +#include "usnic_vnic.h" + +struct usnic_vnic { + struct vnic_dev *vdev; + struct vnic_dev_bar bar[PCI_NUM_RESOURCES]; + struct usnic_vnic_res_chunk chunks[USNIC_VNIC_RES_TYPE_MAX]; + spinlock_t res_lock; +}; + +static enum vnic_res_type _to_vnic_res_type(enum usnic_vnic_res_type res_type) +{ +#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \ + vnic_res_type, +#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \ + vnic_res_type, + static enum vnic_res_type usnic_vnic_type_2_vnic_type[] = { + USNIC_VNIC_RES_TYPES}; +#undef DEFINE_USNIC_VNIC_RES +#undef DEFINE_USNIC_VNIC_RES_AT + + if (res_type >= USNIC_VNIC_RES_TYPE_MAX) + return RES_TYPE_MAX; + + return usnic_vnic_type_2_vnic_type[res_type]; +} + +const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type) +{ +#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \ + desc, +#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \ + desc, + static const char * const usnic_vnic_res_type_desc[] = { + USNIC_VNIC_RES_TYPES}; +#undef DEFINE_USNIC_VNIC_RES +#undef DEFINE_USNIC_VNIC_RES_AT + + if (res_type >= USNIC_VNIC_RES_TYPE_MAX) + return "unknown"; + + return usnic_vnic_res_type_desc[res_type]; + +} + +const char *usnic_vnic_pci_name(struct usnic_vnic *vnic) +{ + return pci_name(usnic_vnic_get_pdev(vnic)); +} + +int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, + int buf_sz, + void *hdr_obj, + int (*printtitle)(void *, char*, int), + int (*printcols)(char *, int), + int (*printrow)(void *, char *, int)) +{ + struct usnic_vnic_res_chunk *chunk; + struct usnic_vnic_res *res; + struct vnic_dev_bar *bar0; + int i, j, offset; + + offset = 0; + bar0 = usnic_vnic_get_bar(vnic, 0); + offset += scnprintf(buf + offset, buf_sz - offset, + "VF:%hu BAR0 bus_addr=%pa vaddr=0x%p size=%ld ", + usnic_vnic_get_index(vnic), + &bar0->bus_addr, + bar0->vaddr, bar0->len); + if (printtitle) + offset += printtitle(hdr_obj, buf + offset, buf_sz - offset); + offset += scnprintf(buf + offset, buf_sz - offset, "\n"); + offset += scnprintf(buf + offset, buf_sz - offset, + "|RES\t|CTRL_PIN\t\t|IN_USE\t"); + if (printcols) + offset += printcols(buf + offset, buf_sz - offset); + offset += scnprintf(buf + offset, buf_sz - offset, "\n"); + + spin_lock(&vnic->res_lock); + for (i = 0; i < ARRAY_SIZE(vnic->chunks); i++) { + chunk = &vnic->chunks[i]; + for (j = 0; j < chunk->cnt; j++) { + res = chunk->res[j]; + offset += scnprintf(buf + offset, buf_sz - offset, + "|%s[%u]\t|0x%p\t|%u\t", + usnic_vnic_res_type_to_str(res->type), + res->vnic_idx, res->ctrl, !!res->owner); + if (printrow) { + offset += printrow(res->owner, buf + offset, + buf_sz - offset); + } + offset += scnprintf(buf + offset, buf_sz - offset, + "\n"); + } + } + spin_unlock(&vnic->res_lock); + return offset; +} + +void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec, + enum usnic_vnic_res_type trgt_type, + u16 cnt) +{ + int i; + + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { + if (spec->resources[i].type == trgt_type) { + spec->resources[i].cnt = cnt; + return; + } + } + + WARN_ON(1); +} + +int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec, + struct usnic_vnic_res_spec *res_spec) +{ + int found, i, j; + + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { + found = 0; + + for (j = 0; j < USNIC_VNIC_RES_TYPE_MAX; j++) { + if (res_spec->resources[i].type != + min_spec->resources[i].type) + continue; + found = 1; + if (min_spec->resources[i].cnt > + res_spec->resources[i].cnt) + return -EINVAL; + break; + } + + if (!found) + return -EINVAL; + } + return 0; +} + +int usnic_vnic_spec_dump(char *buf, int buf_sz, + struct usnic_vnic_res_spec *res_spec) +{ + enum usnic_vnic_res_type res_type; + int res_cnt; + int i; + int offset = 0; + + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { + res_type = res_spec->resources[i].type; + res_cnt = res_spec->resources[i].cnt; + offset += scnprintf(buf + offset, buf_sz - offset, + "Res: %s Cnt: %d ", + usnic_vnic_res_type_to_str(res_type), + res_cnt); + } + + return offset; +} + +int usnic_vnic_check_room(struct usnic_vnic *vnic, + struct usnic_vnic_res_spec *res_spec) +{ + int i; + enum usnic_vnic_res_type res_type; + int res_cnt; + + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { + res_type = res_spec->resources[i].type; + res_cnt = res_spec->resources[i].cnt; + + if (res_type == USNIC_VNIC_RES_TYPE_EOL) + break; + + if (res_cnt > usnic_vnic_res_free_cnt(vnic, res_type)) + return -EBUSY; + } + + return 0; +} + +int usnic_vnic_res_cnt(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type) +{ + return vnic->chunks[type].cnt; +} + +int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type) +{ + return vnic->chunks[type].free_cnt; +} + +struct usnic_vnic_res_chunk * +usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type, + int cnt, void *owner) +{ + struct usnic_vnic_res_chunk *src, *ret; + struct usnic_vnic_res *res; + int i; + + if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner) + return ERR_PTR(-EINVAL); + + ret = kzalloc(sizeof(*ret), GFP_ATOMIC); + if (!ret) { + usnic_err("Failed to allocate chunk for %s - Out of memory\n", + usnic_vnic_pci_name(vnic)); + return ERR_PTR(-ENOMEM); + } + + ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC); + if (!ret->res) { + usnic_err("Failed to allocate resources for %s. Out of memory\n", + usnic_vnic_pci_name(vnic)); + kfree(ret); + return ERR_PTR(-ENOMEM); + } + + spin_lock(&vnic->res_lock); + src = &vnic->chunks[type]; + for (i = 0; i < src->cnt && ret->cnt < cnt; i++) { + res = src->res[i]; + if (!res->owner) { + src->free_cnt--; + res->owner = owner; + ret->res[ret->cnt++] = res; + } + } + + spin_unlock(&vnic->res_lock); + ret->type = type; + ret->vnic = vnic; + WARN_ON(ret->cnt != cnt); + + return ret; +} + +void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk) +{ + + struct usnic_vnic_res *res; + int i; + struct usnic_vnic *vnic = chunk->vnic; + + spin_lock(&vnic->res_lock); + while ((i = --chunk->cnt) >= 0) { + res = chunk->res[i]; + chunk->res[i] = NULL; + res->owner = NULL; + vnic->chunks[res->type].free_cnt++; + } + spin_unlock(&vnic->res_lock); + + kfree(chunk->res); + kfree(chunk); +} + +u16 usnic_vnic_get_index(struct usnic_vnic *vnic) +{ + return usnic_vnic_get_pdev(vnic)->devfn - 1; +} + +static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type, + struct usnic_vnic_res_chunk *chunk) +{ + int cnt, err, i; + struct usnic_vnic_res *res; + + cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type)); + if (cnt < 1) + return -EINVAL; + + chunk->cnt = chunk->free_cnt = cnt; + chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL); + if (!chunk->res) + return -ENOMEM; + + for (i = 0; i < cnt; i++) { + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + err = -ENOMEM; + goto fail; + } + res->type = type; + res->vnic_idx = i; + res->vnic = vnic; + res->ctrl = vnic_dev_get_res(vnic->vdev, + _to_vnic_res_type(type), i); + chunk->res[i] = res; + } + + chunk->vnic = vnic; + return 0; +fail: + for (i--; i >= 0; i--) + kfree(chunk->res[i]); + kfree(chunk->res); + return err; +} + +static void usnic_vnic_free_res_chunk(struct usnic_vnic_res_chunk *chunk) +{ + int i; + for (i = 0; i < chunk->cnt; i++) + kfree(chunk->res[i]); + kfree(chunk->res); +} + +static int usnic_vnic_discover_resources(struct pci_dev *pdev, + struct usnic_vnic *vnic) +{ + enum usnic_vnic_res_type res_type; + int i; + int err = 0; + + for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + vnic->bar[i].len = pci_resource_len(pdev, i); + vnic->bar[i].vaddr = pci_iomap(pdev, i, vnic->bar[i].len); + if (!vnic->bar[i].vaddr) { + usnic_err("Cannot memory-map BAR %d, aborting\n", + i); + err = -ENODEV; + goto out_clean_bar; + } + vnic->bar[i].bus_addr = pci_resource_start(pdev, i); + } + + vnic->vdev = vnic_dev_register(NULL, pdev, pdev, vnic->bar, + ARRAY_SIZE(vnic->bar)); + if (!vnic->vdev) { + usnic_err("Failed to register device %s\n", + pci_name(pdev)); + err = -EINVAL; + goto out_clean_bar; + } + + for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1; + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { + err = usnic_vnic_alloc_res_chunk(vnic, res_type, + &vnic->chunks[res_type]); + if (err) { + usnic_err("Failed to alloc res %s with err %d\n", + usnic_vnic_res_type_to_str(res_type), + err); + goto out_clean_chunks; + } + } + + return 0; + +out_clean_chunks: + for (res_type--; res_type > USNIC_VNIC_RES_TYPE_EOL; res_type--) + usnic_vnic_free_res_chunk(&vnic->chunks[res_type]); + vnic_dev_unregister(vnic->vdev); +out_clean_bar: + for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + if (!vnic->bar[i].vaddr) + break; + + iounmap(vnic->bar[i].vaddr); + } + + return err; +} + +struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic) +{ + return vnic_dev_get_pdev(vnic->vdev); +} + +struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic, + int bar_num) +{ + return (bar_num < ARRAY_SIZE(vnic->bar)) ? &vnic->bar[bar_num] : NULL; +} + +static void usnic_vnic_release_resources(struct usnic_vnic *vnic) +{ + int i; + struct pci_dev *pdev; + enum usnic_vnic_res_type res_type; + + pdev = usnic_vnic_get_pdev(vnic); + + for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1; + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) + usnic_vnic_free_res_chunk(&vnic->chunks[res_type]); + + vnic_dev_unregister(vnic->vdev); + + for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + iounmap(vnic->bar[i].vaddr); + } +} + +struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev) +{ + struct usnic_vnic *vnic; + int err = 0; + + if (!pci_is_enabled(pdev)) { + usnic_err("PCI dev %s is disabled\n", pci_name(pdev)); + return ERR_PTR(-EINVAL); + } + + vnic = kzalloc(sizeof(*vnic), GFP_KERNEL); + if (!vnic) { + usnic_err("Failed to alloc vnic for %s - out of memory\n", + pci_name(pdev)); + return ERR_PTR(-ENOMEM); + } + + spin_lock_init(&vnic->res_lock); + + err = usnic_vnic_discover_resources(pdev, vnic); + if (err) { + usnic_err("Failed to discover %s resources with err %d\n", + pci_name(pdev), err); + goto out_free_vnic; + } + + usnic_dbg("Allocated vnic for %s\n", usnic_vnic_pci_name(vnic)); + + return vnic; + +out_free_vnic: + kfree(vnic); + + return ERR_PTR(err); +} + +void usnic_vnic_free(struct usnic_vnic *vnic) +{ + usnic_vnic_release_resources(vnic); + kfree(vnic); +} diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.h b/drivers/infiniband/hw/usnic/usnic_vnic.h new file mode 100644 index 000000000000..14d931a8829d --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_vnic.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_VNIC_H_ +#define USNIC_VNIC_H_ + +#include + +#include "vnic_dev.h" + +/* =USNIC_VNIC_RES_TYPE= =VNIC_RES= =DESC= */ +#define USNIC_VNIC_RES_TYPES \ + DEFINE_USNIC_VNIC_RES_AT(EOL, RES_TYPE_EOL, "EOL", 0) \ + DEFINE_USNIC_VNIC_RES(WQ, RES_TYPE_WQ, "WQ") \ + DEFINE_USNIC_VNIC_RES(RQ, RES_TYPE_RQ, "RQ") \ + DEFINE_USNIC_VNIC_RES(CQ, RES_TYPE_CQ, "CQ") \ + DEFINE_USNIC_VNIC_RES(INTR, RES_TYPE_INTR_CTRL, "INT") \ + DEFINE_USNIC_VNIC_RES(MAX, RES_TYPE_MAX, "MAX")\ + +#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \ + USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t = val, +#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \ + USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t, +enum usnic_vnic_res_type { + USNIC_VNIC_RES_TYPES +}; +#undef DEFINE_USNIC_VNIC_RES +#undef DEFINE_USNIC_VNIC_RES_AT + +struct usnic_vnic_res { + enum usnic_vnic_res_type type; + unsigned int vnic_idx; + struct usnic_vnic *vnic; + void __iomem *ctrl; + void *owner; +}; + +struct usnic_vnic_res_chunk { + enum usnic_vnic_res_type type; + int cnt; + int free_cnt; + struct usnic_vnic_res **res; + struct usnic_vnic *vnic; +}; + +struct usnic_vnic_res_desc { + enum usnic_vnic_res_type type; + uint16_t cnt; +}; + +struct usnic_vnic_res_spec { + struct usnic_vnic_res_desc resources[USNIC_VNIC_RES_TYPE_MAX]; +}; + +const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type); +const char *usnic_vnic_pci_name(struct usnic_vnic *vnic); +int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, int buf_sz, + void *hdr_obj, + int (*printtitle)(void *, char*, int), + int (*printcols)(char *, int), + int (*printrow)(void *, char *, int)); +void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec, + enum usnic_vnic_res_type trgt_type, + u16 cnt); +int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec, + struct usnic_vnic_res_spec *res_spec); +int usnic_vnic_spec_dump(char *buf, int buf_sz, + struct usnic_vnic_res_spec *res_spec); +int usnic_vnic_check_room(struct usnic_vnic *vnic, + struct usnic_vnic_res_spec *res_spec); +int usnic_vnic_res_cnt(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type); +int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type); +struct usnic_vnic_res_chunk * +usnic_vnic_get_resources(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type, + int cnt, + void *owner); +void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk); +struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic); +struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic, + int bar_num); +struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev); +void usnic_vnic_free(struct usnic_vnic *vnic); +u16 usnic_vnic_get_index(struct usnic_vnic *vnic); + +#endif /*!USNIC_VNIC_H_*/ From 8192d4acb5c5376c0f6756f2106ab243036c8c7d Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:47:33 -0800 Subject: [PATCH 04/61] IB/usnic: Change WARN_ON to lockdep_assert_held usNIC calls WARN_ON(spin_is_locked..) at few places. In some of these instances, the call is made while holding a spinlock. Change all WARN_ON(spin_is_locked...) calls in usNIC to lockdep_assert_held to make it fool-proof bc the latter can be called while holding a spinlock and unlike spin_is_locked, lockdep_assert_held also works correctly on UP. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index ca5fa6ad59ac..5a873f5c2794 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -82,7 +82,7 @@ static int add_fwd_filter(struct usnic_ib_qp_grp *qp_grp, struct usnic_vnic_res_chunk *chunk; int rq_idx; - WARN_ON(!spin_is_locked(&qp_grp->lock)); + lockdep_assert_held(&qp_grp->lock); chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); if (IS_ERR_OR_NULL(chunk) || chunk->cnt < 1) { @@ -119,7 +119,7 @@ static int del_all_filters(struct usnic_ib_qp_grp *qp_grp) int err, status; struct usnic_fwd_filter_hndl *filter_hndl, *tmp; - WARN_ON(!spin_is_locked(&qp_grp->lock)); + lockdep_assert_held(&qp_grp->lock); status = 0; @@ -145,7 +145,7 @@ static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp) struct usnic_vnic_res_chunk *res_chunk; struct usnic_vnic_res *res; - WARN_ON(!spin_is_locked(&qp_grp->lock)); + lockdep_assert_held(&qp_grp->lock); vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); @@ -189,7 +189,7 @@ static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp) struct usnic_vnic_res *res; int status = 0; - WARN_ON(!spin_is_locked(&qp_grp->lock)); + lockdep_assert_held(&qp_grp->lock); vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); @@ -392,7 +392,7 @@ static int qp_grp_and_vf_bind(struct usnic_ib_vf *vf, int err; struct pci_dev *pdev; - WARN_ON(!spin_is_locked(&vf->lock)); + lockdep_assert_held(&vf->lock); pdev = usnic_vnic_get_pdev(vf->vnic); if (vf->qp_grp_ref_cnt == 0) { @@ -417,7 +417,7 @@ static void qp_grp_and_vf_unbind(struct usnic_ib_qp_grp *qp_grp) struct pci_dev *pdev; struct usnic_ib_pd *pd; - WARN_ON(!spin_is_locked(&qp_grp->vf->lock)); + lockdep_assert_held(&qp_grp->vf->lock); pd = qp_grp->vf->pd; pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic); @@ -446,7 +446,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, u16 port_num; int err; - WARN_ON(!spin_is_locked(&vf->lock)); + lockdep_assert_held(&vf->lock); err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport], res_spec); @@ -514,7 +514,7 @@ void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) enum usnic_transport_type transport; WARN_ON(qp_grp->state != IB_QPS_RESET); - WARN_ON(!spin_is_locked(&qp_grp->vf->lock)); + lockdep_assert_held(&qp_grp->vf->lock); transport = qp_grp->filters[DFLT_FILTER_IDX].transport; default_port_num = qp_grp->filters[DFLT_FILTER_IDX].port_num; From 301a0dd68e5ddd22d992a58f466b621987d9df3b Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:06 -0800 Subject: [PATCH 05/61] IB/usnic: Add struct usnic_transport_spec Add *struct usnic_transport_spec* for passing around transport specifications. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_abi.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h index 510b7d702465..dd3e4c6c474f 100644 --- a/drivers/infiniband/hw/usnic/usnic_abi.h +++ b/drivers/infiniband/hw/usnic/usnic_abi.h @@ -33,6 +33,15 @@ enum usnic_transport_type { USNIC_TRANSPORT_MAX = 2, }; +struct usnic_transport_spec { + enum usnic_transport_type trans_type; + union { + struct { + uint16_t port_num; + } usnic_roce; + }; +}; + /*TODO: Future - usnic_modify_qp needs to pass in generic filters */ struct usnic_ib_create_qp_resp { u32 vfid; From 2183b990b67b761f81c68a18f60df028e080cf05 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:07 -0800 Subject: [PATCH 06/61] IB/usnic: Push all forwarding state to usnic_fwd.[hc] Push all of the usnic device forwarding state - such as mtu, mac - to usnic_fwd_dev. Furthermore, usnic_fwd.h exposes a improved interface for rest of the usnic code. The primary improvement is that usnic_fwd.h's flow management interface takes in high-level *filter* and *action* structures now, instead of low-level paramaters such as vnic_idx, rq_idx. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_fwd.c | 293 ++++++++++++++---------- drivers/infiniband/hw/usnic/usnic_fwd.h | 70 ++++-- 2 files changed, 225 insertions(+), 138 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 8e42216362e7..33fdd771e599 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -23,6 +23,49 @@ #include "usnic_fwd.h" #include "usnic_log.h" +static int usnic_fwd_devcmd_locked(struct usnic_fwd_dev *ufdev, int vnic_idx, + enum vnic_devcmd_cmd cmd, u64 *a0, + u64 *a1) +{ + int status; + struct net_device *netdev = ufdev->netdev; + + lockdep_assert_held(&ufdev->lock); + + status = enic_api_devcmd_proxy_by_index(netdev, + vnic_idx, + cmd, + a0, a1, + 1000); + if (status) { + if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) { + usnic_dbg("Dev %s vnic idx %u cmd %u already deleted", + ufdev->name, vnic_idx, cmd); + } else { + usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n", + ufdev->name, vnic_idx, cmd, + status); + } + } else { + usnic_dbg("Dev %s vnic idx %u cmd %u success", + ufdev->name, vnic_idx, cmd); + } + + return status; +} + +static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx, + enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1) +{ + int status; + + spin_lock(&ufdev->lock); + status = usnic_fwd_devcmd_locked(ufdev, vnic_idx, cmd, a0, a1); + spin_unlock(&ufdev->lock); + + return status; +} + struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev) { struct usnic_fwd_dev *ufdev; @@ -34,6 +77,8 @@ struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev) ufdev->pdev = pdev; ufdev->netdev = pci_get_drvdata(pdev); spin_lock_init(&ufdev->lock); + strncpy(ufdev->name, netdev_name(ufdev->netdev), + sizeof(ufdev->name) - 1); return ufdev; } @@ -43,200 +88,208 @@ void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev) kfree(ufdev); } -static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx, - enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1) +void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]) { - int status; - struct net_device *netdev = ufdev->netdev; - spin_lock(&ufdev->lock); - status = enic_api_devcmd_proxy_by_index(netdev, - vnic_idx, - cmd, - a0, a1, - 1000); + memcpy(&ufdev->mac, mac, sizeof(ufdev->mac)); spin_unlock(&ufdev->lock); - if (status) { - if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) { - usnic_dbg("Dev %s vnic idx %u cmd %u already deleted", - netdev_name(netdev), vnic_idx, cmd); - } else { - usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n", - netdev_name(netdev), vnic_idx, cmd, - status); - } - } else { - usnic_dbg("Dev %s vnic idx %u cmd %u success", - netdev_name(netdev), vnic_idx, - cmd); - } - - return status; } -int usnic_fwd_add_usnic_filter(struct usnic_fwd_dev *ufdev, int vnic_idx, - int rq_idx, struct usnic_fwd_filter *fwd_filter, - struct usnic_fwd_filter_hndl **filter_hndl) +void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev) { - struct filter_tlv *tlv, *tlv_va; - struct filter *filter; - struct filter_action *action; - struct pci_dev *pdev; - struct usnic_fwd_filter_hndl *usnic_filter_hndl; - int status; - u64 a0, a1; - u64 tlv_size; - dma_addr_t tlv_pa; + spin_lock(&ufdev->lock); + ufdev->link_up = 1; + spin_unlock(&ufdev->lock); +} - pdev = ufdev->pdev; - tlv_size = (2*sizeof(struct filter_tlv) + - sizeof(struct filter) + - sizeof(struct filter_action)); - tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa); - if (!tlv) { - usnic_err("Failed to allocate memory\n"); - return -ENOMEM; - } +void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev) +{ + spin_lock(&ufdev->lock); + ufdev->link_up = 0; + spin_unlock(&ufdev->lock); +} - usnic_filter_hndl = kzalloc(sizeof(*usnic_filter_hndl), GFP_ATOMIC); - if (!usnic_filter_hndl) { - usnic_err("Failed to allocate memory for hndl\n"); - pci_free_consistent(pdev, tlv_size, tlv, tlv_pa); - return -ENOMEM; - } +void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu) +{ + spin_lock(&ufdev->lock); + ufdev->mtu = mtu; + spin_unlock(&ufdev->lock); +} - tlv_va = tlv; - a0 = tlv_pa; - a1 = tlv_size; - memset(tlv, 0, tlv_size); +static int usnic_fwd_dev_ready_locked(struct usnic_fwd_dev *ufdev) +{ + lockdep_assert_held(&ufdev->lock); + + if (!ufdev->link_up) + return -EPERM; + + return 0; +} + +static void fill_tlv(struct filter_tlv *tlv, struct filter *filter, + struct filter_action *action) +{ tlv->type = CLSF_TLV_FILTER; tlv->length = sizeof(struct filter); - filter = (struct filter *)&tlv->val; - filter->type = FILTER_USNIC_ID; - filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE; - filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE | - FILTER_FIELD_USNIC_ID | - FILTER_FIELD_USNIC_PROTO; - filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER << - USNIC_ROCE_GRH_VER_SHIFT) - | USNIC_PROTO_VER; - filter->u.usnic.usnic_id = fwd_filter->port_num; + *((struct filter *)&tlv->val) = *filter; + tlv = (struct filter_tlv *)((char *)tlv + sizeof(struct filter_tlv) + sizeof(struct filter)); tlv->type = CLSF_TLV_ACTION; tlv->length = sizeof(struct filter_action); - action = (struct filter_action *)&tlv->val; - action->type = FILTER_ACTION_RQ_STEERING; - action->u.rq_idx = rq_idx; - - status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_ADD_FILTER, &a0, &a1); - pci_free_consistent(pdev, tlv_size, tlv_va, tlv_pa); - if (status) { - usnic_err("VF %s Filter add failed with status:%d", - pci_name(pdev), - status); - kfree(usnic_filter_hndl); - return status; - } else { - usnic_dbg("VF %s FILTER ID:%u", - pci_name(pdev), - (u32)a0); - } - - usnic_filter_hndl->type = FILTER_USNIC_ID; - usnic_filter_hndl->id = (u32)a0; - usnic_filter_hndl->vnic_idx = vnic_idx; - usnic_filter_hndl->ufdev = ufdev; - usnic_filter_hndl->filter = fwd_filter; - *filter_hndl = usnic_filter_hndl; - - return status; + *((struct filter_action *)&tlv->val) = *action; } -int usnic_fwd_del_filter(struct usnic_fwd_filter_hndl *filter_hndl) +struct usnic_fwd_flow* +usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, + struct usnic_filter_action *uaction) +{ + struct filter_tlv *tlv; + struct pci_dev *pdev; + struct usnic_fwd_flow *flow; + uint64_t a0, a1; + uint64_t tlv_size; + dma_addr_t tlv_pa; + int status; + + pdev = ufdev->pdev; + tlv_size = (2*sizeof(struct filter_tlv) + sizeof(struct filter) + + sizeof(struct filter_action)); + + flow = kzalloc(sizeof(*flow), GFP_ATOMIC); + if (!flow) + return ERR_PTR(-ENOMEM); + + tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa); + if (!tlv) { + usnic_err("Failed to allocate memory\n"); + status = -ENOMEM; + goto out_free_flow; + } + + fill_tlv(tlv, filter, &uaction->action); + + spin_lock(&ufdev->lock); + status = usnic_fwd_dev_ready_locked(ufdev); + if (status) { + usnic_err("Forwarding dev %s not ready with status %d\n", + ufdev->name, status); + goto out_free_tlv; + } + + /* Issue Devcmd */ + a0 = tlv_pa; + a1 = tlv_size; + status = usnic_fwd_devcmd_locked(ufdev, uaction->vnic_idx, + CMD_ADD_FILTER, &a0, &a1); + if (status) { + usnic_err("VF %s Filter add failed with status:%d", + ufdev->name, status); + status = -EFAULT; + goto out_free_tlv; + } else { + usnic_dbg("VF %s FILTER ID:%llu", ufdev->name, a0); + } + + flow->flow_id = (uint32_t) a0; + flow->vnic_idx = uaction->vnic_idx; + flow->ufdev = ufdev; + +out_free_tlv: + spin_unlock(&ufdev->lock); + pci_free_consistent(pdev, tlv_size, tlv, tlv_pa); + if (!status) + return flow; +out_free_flow: + kfree(flow); + return ERR_PTR(status); +} + +int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow) { int status; u64 a0, a1; - struct net_device *netdev; - netdev = filter_hndl->ufdev->netdev; - a0 = filter_hndl->id; + a0 = flow->flow_id; - status = usnic_fwd_devcmd(filter_hndl->ufdev, filter_hndl->vnic_idx, + status = usnic_fwd_devcmd(flow->ufdev, flow->vnic_idx, CMD_DEL_FILTER, &a0, &a1); if (status) { if (status == ERR_EINVAL) { usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d", - filter_hndl->id, filter_hndl->vnic_idx, - netdev_name(netdev), status); - status = 0; - kfree(filter_hndl); + flow->flow_id, flow->vnic_idx, + flow->ufdev->name, status); } else { usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d", - netdev_name(netdev), - filter_hndl->vnic_idx, filter_hndl->id, - status); + flow->ufdev->name, flow->vnic_idx, + flow->flow_id, status); } + status = 0; + /* + * Log the error and fake success to the caller because if + * a flow fails to be deleted in the firmware, it is an + * unrecoverable error. + */ } else { usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED", - netdev_name(netdev), filter_hndl->vnic_idx, - filter_hndl->id); - kfree(filter_hndl); + flow->ufdev->name, flow->vnic_idx, + flow->flow_id); } + kfree(flow); return status; } -int usnic_fwd_enable_rq(struct usnic_fwd_dev *ufdev, int vnic_idx, int rq_idx) +int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx) { int status; struct net_device *pf_netdev; u64 a0, a1; pf_netdev = ufdev->netdev; - a0 = rq_idx; + a0 = qp_idx; a1 = CMD_QP_RQWQ; - status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE, &a0, &a1); - + status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE, + &a0, &a1); if (status) { usnic_err("PF %s VNIC Index %u RQ Index: %u ENABLE Failed with status %d", netdev_name(pf_netdev), vnic_idx, - rq_idx, + qp_idx, status); } else { usnic_dbg("PF %s VNIC Index %u RQ Index: %u ENABLED", netdev_name(pf_netdev), - vnic_idx, rq_idx); + vnic_idx, qp_idx); } return status; } -int usnic_fwd_disable_rq(struct usnic_fwd_dev *ufdev, int vnic_idx, int rq_idx) +int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx) { int status; u64 a0, a1; struct net_device *pf_netdev; pf_netdev = ufdev->netdev; - a0 = rq_idx; + a0 = qp_idx; a1 = CMD_QP_RQWQ; - status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE, &a0, &a1); - + status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE, + &a0, &a1); if (status) { usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d", netdev_name(pf_netdev), vnic_idx, - rq_idx, + qp_idx, status); } else { usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED", netdev_name(pf_netdev), vnic_idx, - rq_idx); + qp_idx); } return status; diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h index 6973901da8af..b146eb97d82c 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.h +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -20,39 +20,73 @@ #define USNIC_FWD_H_ #include +#include #include -#include +#include #include "usnic_abi.h" +#include "usnic_common_pkt_hdr.h" #include "vnic_devcmd.h" struct usnic_fwd_dev { struct pci_dev *pdev; struct net_device *netdev; spinlock_t lock; + /* + * The following fields can be read directly off the device. + * However, they should be set by a accessor function, except name, + * which cannot be changed. + */ + bool link_up; + char mac[ETH_ALEN]; + unsigned int mtu; + char name[IFNAMSIZ+1]; }; -struct usnic_fwd_filter { - enum usnic_transport_type transport; - u16 port_num; -}; - -struct usnic_fwd_filter_hndl { - enum filter_type type; - u32 id; - u32 vnic_idx; +struct usnic_fwd_flow { + uint32_t flow_id; struct usnic_fwd_dev *ufdev; - struct list_head link; - struct usnic_fwd_filter *filter; + unsigned int vnic_idx; +}; + +struct usnic_filter_action { + int vnic_idx; + struct filter_action action; }; struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev); void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev); -int usnic_fwd_add_usnic_filter(struct usnic_fwd_dev *ufdev, int vnic_idx, - int rq_idx, struct usnic_fwd_filter *filter, - struct usnic_fwd_filter_hndl **filter_hndl); -int usnic_fwd_del_filter(struct usnic_fwd_filter_hndl *filter_hndl); -int usnic_fwd_enable_rq(struct usnic_fwd_dev *ufdev, int vnic_idx, int rq_idx); -int usnic_fwd_disable_rq(struct usnic_fwd_dev *ufdev, int vnic_idx, int rq_idx); + +void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]); +void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev); +void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev); +void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu); + +/* + * Allocate a flow on this forwarding device. Whoever calls this function, + * must monitor netdev events on ufdev's netdevice. If NETDEV_REBOOT or + * NETDEV_DOWN is seen, flow will no longer function and must be + * immediately freed by calling usnic_dealloc_flow. + */ +struct usnic_fwd_flow* +usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, + struct usnic_filter_action *action); +int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow); +int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx); +int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx); + +static inline void usnic_fwd_init_usnic_filter(struct filter *filter, + uint32_t usnic_id) +{ + filter->type = FILTER_USNIC_ID; + filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE; + filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE | + FILTER_FIELD_USNIC_ID | + FILTER_FIELD_USNIC_PROTO; + filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER << + USNIC_ROCE_GRH_VER_SHIFT) | + USNIC_PROTO_VER; + filter->u.usnic.usnic_id = usnic_id; +} #endif /* !USNIC_FWD_H_ */ From 8af94ac66a4d53a96278ecbb9ef2e8592f0d9ba3 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:08 -0800 Subject: [PATCH 07/61] IB/usnic: Port over main.c and verbs.c to the usnic_fwd.h This patch ports usnic_ib_main.c, usnic_ib_verbs.c and usnic_ib.h to the new interface of usnic_fwd.h. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_ib.h | 3 -- drivers/infiniband/hw/usnic/usnic_ib_main.c | 38 ++++++++++---------- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 37 +++++++++---------- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h index 3511c8521f30..92d9d9a60b3b 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib.h +++ b/drivers/infiniband/hw/usnic/usnic_ib.h @@ -57,13 +57,10 @@ struct usnic_ib_dev { struct pci_dev *pdev; struct net_device *netdev; struct usnic_fwd_dev *ufdev; - bool link_up; struct list_head ib_dev_link; struct list_head vf_dev_list; struct list_head ctx_list; struct mutex usdev_lock; - char mac[ETH_ALEN]; - unsigned int mtu; /* provisioning information */ struct kref vf_cnt; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index dc09c12435b9..6ab0b41be9c5 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -150,15 +150,17 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, case NETDEV_UP: case NETDEV_DOWN: case NETDEV_CHANGE: - if (!us_ibdev->link_up && netif_carrier_ok(netdev)) { - us_ibdev->link_up = true; + if (!us_ibdev->ufdev->link_up && + netif_carrier_ok(netdev)) { + usnic_fwd_carrier_up(us_ibdev->ufdev); usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name); ib_event.event = IB_EVENT_PORT_ACTIVE; ib_event.device = &us_ibdev->ib_dev; ib_event.element.port_num = 1; ib_dispatch_event(&ib_event); - } else if (us_ibdev->link_up && !netif_carrier_ok(netdev)) { - us_ibdev->link_up = false; + } else if (us_ibdev->ufdev->link_up && + !netif_carrier_ok(netdev)) { + usnic_fwd_carrier_down(us_ibdev->ufdev); usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name); usnic_ib_qp_grp_modify_active_to_err(us_ibdev); ib_event.event = IB_EVENT_PORT_ERR; @@ -172,17 +174,16 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, } break; case NETDEV_CHANGEADDR: - if (!memcmp(us_ibdev->mac, netdev->dev_addr, - sizeof(us_ibdev->mac))) { + if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr, + sizeof(us_ibdev->ufdev->mac))) { usnic_dbg("Ignorning addr change on %s\n", us_ibdev->ib_dev.name); } else { usnic_info(" %s old mac: %pM new mac: %pM\n", us_ibdev->ib_dev.name, - us_ibdev->mac, + us_ibdev->ufdev->mac, netdev->dev_addr); - memcpy(us_ibdev->mac, netdev->dev_addr, - sizeof(us_ibdev->mac)); + usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr); usnic_ib_qp_grp_modify_active_to_err(us_ibdev); ib_event.event = IB_EVENT_GID_CHANGE; ib_event.device = &us_ibdev->ib_dev; @@ -192,11 +193,11 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, break; case NETDEV_CHANGEMTU: - if (us_ibdev->mtu != netdev->mtu) { + if (us_ibdev->ufdev->mtu != netdev->mtu) { usnic_info("MTU Change on %s old: %u new: %u\n", us_ibdev->ib_dev.name, - us_ibdev->mtu, netdev->mtu); - us_ibdev->mtu = netdev->mtu; + us_ibdev->ufdev->mtu, netdev->mtu); + usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu); usnic_ib_qp_grp_modify_active_to_err(us_ibdev); } else { usnic_dbg("Ignoring MTU change on %s\n", @@ -320,18 +321,19 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (ib_register_device(&us_ibdev->ib_dev, NULL)) goto err_fwd_dealloc; - us_ibdev->link_up = netif_carrier_ok(us_ibdev->netdev); - us_ibdev->mtu = us_ibdev->netdev->mtu; - memcpy(&us_ibdev->mac, us_ibdev->netdev->dev_addr, - sizeof(us_ibdev->mac)); - usnic_mac_to_gid(us_ibdev->netdev->perm_addr, &gid.raw[0]); + usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu); + usnic_fwd_set_mac(us_ibdev->ufdev, us_ibdev->netdev->dev_addr); + if (netif_carrier_ok(us_ibdev->netdev)) + usnic_fwd_carrier_up(us_ibdev->ufdev); + memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id, sizeof(gid.global.interface_id)); kref_init(&us_ibdev->vf_cnt); usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n", us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev), - us_ibdev->mac, us_ibdev->link_up, us_ibdev->mtu); + us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up, + us_ibdev->ufdev->mtu); return us_ibdev; err_fwd_dealloc: diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 8f8dfa2672b0..2217bc0a6bbe 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -47,6 +47,7 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, struct pci_dev *pdev; struct vnic_dev_bar *bar; struct usnic_vnic_res_chunk *chunk; + struct usnic_ib_qp_grp_flow *default_flow; int i, err; memset(&resp, 0, sizeof(resp)); @@ -69,7 +70,6 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic); resp.bar_bus_addr = bar->bus_addr; resp.bar_len = bar->len; - resp.transport = qp_grp->transport; chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); if (IS_ERR_OR_NULL(chunk)) { @@ -113,6 +113,10 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, for (i = 0; i < chunk->cnt; i++) resp.cq_idx[i] = chunk->res[i]->vnic_idx; + default_flow = list_first_entry(&qp_grp->flows_lst, + struct usnic_ib_qp_grp_flow, link); + resp.transport = default_flow->trans_type; + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (err) { usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name); @@ -125,7 +129,7 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, static struct usnic_ib_qp_grp* find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, struct usnic_ib_pd *pd, - enum usnic_transport_type transport, + struct usnic_transport_spec *trans_spec, struct usnic_vnic_res_spec *res_spec) { struct usnic_ib_vf *vf; @@ -141,11 +145,6 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, return NULL; } - if (!us_ibdev->link_up) { - usnic_info("Cannot allocate qp b/c PF link is down\n"); - return NULL; - } - if (usnic_ib_share_vf) { /* Try to find resouces on a used vf which is in pd */ dev_list = usnic_uiom_get_dev_list(pd->umem_pd); @@ -189,7 +188,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, } qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec, - transport); + trans_spec); spin_unlock(&vf->lock); if (IS_ERR_OR_NULL(qp_grp)) { usnic_err("Failed to allocate qp_grp\n"); @@ -253,7 +252,7 @@ int usnic_ib_query_device(struct ib_device *ibdev, us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); memset(props, 0, sizeof(*props)); - usnic_mac_to_gid(us_ibdev->mac, &gid.raw[0]); + usnic_mac_to_gid(us_ibdev->ufdev->mac, &gid.raw[0]); memcpy(&props->sys_image_guid, &gid.global.interface_id, sizeof(gid.global.interface_id)); usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver); @@ -311,7 +310,7 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, props->sm_lid = 0; props->sm_sl = 0; - if (us_ibdev->link_up) { + if (us_ibdev->ufdev->link_up) { props->state = IB_PORT_ACTIVE; props->phys_state = 5; } else { @@ -327,9 +326,9 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, eth_speed_to_ib_speed(cmd.speed, &props->active_speed, &props->active_width); props->max_mtu = IB_MTU_4096; - props->active_mtu = iboe_get_mtu(us_ibdev->mtu); + props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu); /* Userspace will adjust for hdrs */ - props->max_msg_sz = us_ibdev->mtu; + props->max_msg_sz = us_ibdev->ufdev->mtu; props->max_vl_num = 1; mutex_unlock(&us_ibdev->usdev_lock); @@ -386,7 +385,7 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, mutex_lock(&us_ibdev->usdev_lock); memset(&(gid->raw[0]), 0, sizeof(gid->raw)); - usnic_mac_to_gid(us_ibdev->mac, &gid->raw[0]); + usnic_mac_to_gid(us_ibdev->ufdev->mac, &gid->raw[0]); mutex_unlock(&us_ibdev->usdev_lock); return 0; @@ -445,6 +444,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, struct usnic_ib_ucontext *ucontext; int cq_cnt; struct usnic_vnic_res_spec res_spec; + struct usnic_transport_spec trans_spec; usnic_dbg("\n"); @@ -457,12 +457,14 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } + memset(&trans_spec, 0, sizeof(trans_spec)); + trans_spec.trans_type = USNIC_TRANSPORT_ROCE_CUSTOM; mutex_lock(&us_ibdev->usdev_lock); - cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2, - res_spec = min_transport_spec[USNIC_DEFAULT_TRANSPORT]; + cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2; + res_spec = min_transport_spec[trans_spec.trans_type]; usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt); qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd), - USNIC_DEFAULT_TRANSPORT, + &trans_spec, &res_spec); if (IS_ERR_OR_NULL(qp_grp)) { err = (qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); @@ -522,8 +524,7 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, /* TODO: Future Support All States */ mutex_lock(&qp_grp->vf->pf->usdev_lock); if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) { - status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, - &qp_grp->filters[DFLT_FILTER_IDX]); + status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL); } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) { status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL); } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) { From 256d6a6ac52ee02e897cec88ecc96c3ae7f9cb88 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:09 -0800 Subject: [PATCH 08/61] IB/usnic: Port over usnic_ib_qp_grp.[hc] to new usnic_fwd.h This patch ports usnic_ib_qp_grp.[hc] to the new interface of usnic_fwd.h. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c | 357 ++++++++++++------ drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h | 24 +- 2 files changed, 254 insertions(+), 127 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 5a873f5c2794..2b7e0a1a07b4 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -15,6 +15,7 @@ * SOFTWARE. * */ +#include #include #include #include @@ -27,6 +28,8 @@ #include "usnic_ib_sysfs.h" #include "usnic_transport.h" +#define DFLT_RQ_IDX 0 + const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state) { switch (state) { @@ -58,83 +61,31 @@ int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz) int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz) { struct usnic_ib_qp_grp *qp_grp = obj; - struct usnic_fwd_filter_hndl *default_filter_hndl; + struct usnic_ib_qp_grp_flow *default_flow; if (obj) { - default_filter_hndl = list_first_entry(&qp_grp->filter_hndls, - struct usnic_fwd_filter_hndl, link); + default_flow = list_first_entry(&qp_grp->flows_lst, + struct usnic_ib_qp_grp_flow, link); return scnprintf(buf, buf_sz, "|%d\t|%s\t|%d\t|%hu\t|%d", qp_grp->ibqp.qp_num, usnic_ib_qp_grp_state_to_string( qp_grp->state), qp_grp->owner_pid, usnic_vnic_get_index(qp_grp->vf->vnic), - default_filter_hndl->id); + default_flow->flow->flow_id); } else { return scnprintf(buf, buf_sz, "|N/A\t|N/A\t|N/A\t|N/A\t|N/A"); } } -static int add_fwd_filter(struct usnic_ib_qp_grp *qp_grp, - struct usnic_fwd_filter *fwd_filter) +static struct usnic_vnic_res_chunk * +get_qp_res_chunk(struct usnic_ib_qp_grp *qp_grp) { - struct usnic_fwd_filter_hndl *filter_hndl; - int status; - struct usnic_vnic_res_chunk *chunk; - int rq_idx; - lockdep_assert_held(&qp_grp->lock); - - chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); - if (IS_ERR_OR_NULL(chunk) || chunk->cnt < 1) { - usnic_err("Failed to get RQ info for qp_grp %u\n", - qp_grp->grp_id); - return -EFAULT; - } - - rq_idx = chunk->res[0]->vnic_idx; - - switch (qp_grp->transport) { - case USNIC_TRANSPORT_ROCE_CUSTOM: - status = usnic_fwd_add_usnic_filter(qp_grp->ufdev, - usnic_vnic_get_index(qp_grp->vf->vnic), - rq_idx, - fwd_filter, - &filter_hndl); - break; - default: - usnic_err("Unable to install filter for qp_grp %u for transport %d", - qp_grp->grp_id, qp_grp->transport); - status = -EINVAL; - } - - if (status) - return status; - - list_add_tail(&filter_hndl->link, &qp_grp->filter_hndls); - return 0; -} - -static int del_all_filters(struct usnic_ib_qp_grp *qp_grp) -{ - int err, status; - struct usnic_fwd_filter_hndl *filter_hndl, *tmp; - - lockdep_assert_held(&qp_grp->lock); - - status = 0; - - list_for_each_entry_safe(filter_hndl, tmp, - &qp_grp->filter_hndls, link) { - list_del(&filter_hndl->link); - err = usnic_fwd_del_filter(filter_hndl); - if (err) { - usnic_err("Failed to delete filter %u of qp_grp %d\n", - filter_hndl->id, qp_grp->grp_id); - } - status |= err; - } - - return status; + /* + * The QP res chunk, used to derive qp indices, + * are just indices of the RQs + */ + return usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); } static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp) @@ -149,22 +100,20 @@ static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp) vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); - res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); + res_chunk = get_qp_res_chunk(qp_grp); if (IS_ERR_OR_NULL(res_chunk)) { - usnic_err("Unable to get %s with err %ld\n", - usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), - PTR_ERR(res_chunk)); + usnic_err("Unable to get qp res with err %ld\n", + PTR_ERR(res_chunk)); return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; } for (i = 0; i < res_chunk->cnt; i++) { res = res_chunk->res[i]; - status = usnic_fwd_enable_rq(qp_grp->ufdev, vnic_idx, + status = usnic_fwd_enable_qp(qp_grp->ufdev, vnic_idx, res->vnic_idx); if (status) { - usnic_err("Failed to enable rq %d of %s:%d\n with err %d\n", - res->vnic_idx, - netdev_name(qp_grp->ufdev->netdev), + usnic_err("Failed to enable qp %d of %s:%d\n with err %d\n", + res->vnic_idx, qp_grp->ufdev->name, vnic_idx, status); goto out_err; } @@ -175,7 +124,7 @@ static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp) out_err: for (i--; i >= 0; i--) { res = res_chunk->res[i]; - usnic_fwd_disable_rq(qp_grp->ufdev, vnic_idx, + usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx, res->vnic_idx); } @@ -192,22 +141,21 @@ static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp) lockdep_assert_held(&qp_grp->lock); vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); - res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); + res_chunk = get_qp_res_chunk(qp_grp); if (IS_ERR_OR_NULL(res_chunk)) { - usnic_err("Unable to get %s with err %ld\n", - usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), + usnic_err("Unable to get qp res with err %ld\n", PTR_ERR(res_chunk)); return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; } for (i = 0; i < res_chunk->cnt; i++) { res = res_chunk->res[i]; - status = usnic_fwd_disable_rq(qp_grp->ufdev, vnic_idx, + status = usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx, res->vnic_idx); if (status) { usnic_err("Failed to disable rq %d of %s:%d\n with err %d\n", res->vnic_idx, - netdev_name(qp_grp->ufdev->netdev), + qp_grp->ufdev->name, vnic_idx, status); } } @@ -216,17 +164,148 @@ static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp) } +static int init_filter_action(struct usnic_ib_qp_grp *qp_grp, + struct usnic_filter_action *uaction) +{ + struct usnic_vnic_res_chunk *res_chunk; + + res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); + if (IS_ERR_OR_NULL(res_chunk)) { + usnic_err("Unable to get %s with err %ld\n", + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), + PTR_ERR(res_chunk)); + return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + } + + uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); + uaction->action.type = FILTER_ACTION_RQ_STEERING; + uaction->action.u.rq_idx = res_chunk->res[DFLT_RQ_IDX]->vnic_idx; + + return 0; +} + +static struct usnic_ib_qp_grp_flow* +create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp, + struct usnic_transport_spec *trans_spec) +{ + uint16_t port_num; + int err; + struct filter filter; + struct usnic_filter_action uaction; + struct usnic_ib_qp_grp_flow *qp_flow; + struct usnic_fwd_flow *flow; + enum usnic_transport_type trans_type; + + trans_type = trans_spec->trans_type; + port_num = trans_spec->usnic_roce.port_num; + + /* Reserve Port */ + port_num = usnic_transport_rsrv_port(trans_type, port_num); + if (port_num == 0) + return ERR_PTR(-EINVAL); + + /* Create Flow */ + usnic_fwd_init_usnic_filter(&filter, port_num); + err = init_filter_action(qp_grp, &uaction); + if (err) + goto out_unreserve_port; + + flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction); + if (IS_ERR_OR_NULL(flow)) { + usnic_err("Unable to alloc flow failed with err %ld\n", + PTR_ERR(flow)); + err = (flow) ? PTR_ERR(flow) : -EFAULT; + goto out_unreserve_port; + } + + /* Create Flow Handle */ + qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); + if (IS_ERR_OR_NULL(qp_flow)) { + err = (qp_flow) ? PTR_ERR(qp_flow) : -ENOMEM; + goto out_dealloc_flow; + } + qp_flow->flow = flow; + qp_flow->trans_type = trans_type; + qp_flow->usnic_roce.port_num = port_num; + qp_flow->qp_grp = qp_grp; + return qp_flow; + +out_dealloc_flow: + usnic_fwd_dealloc_flow(flow); +out_unreserve_port: + usnic_transport_unrsrv_port(trans_type, port_num); + return ERR_PTR(err); +} + +static void release_roce_custom_flow(struct usnic_ib_qp_grp_flow *qp_flow) +{ + usnic_fwd_dealloc_flow(qp_flow->flow); + usnic_transport_unrsrv_port(qp_flow->trans_type, + qp_flow->usnic_roce.port_num); + kfree(qp_flow); +} + +static struct usnic_ib_qp_grp_flow* +create_and_add_flow(struct usnic_ib_qp_grp *qp_grp, + struct usnic_transport_spec *trans_spec) +{ + struct usnic_ib_qp_grp_flow *qp_flow; + enum usnic_transport_type trans_type; + + trans_type = trans_spec->trans_type; + switch (trans_type) { + case USNIC_TRANSPORT_ROCE_CUSTOM: + qp_flow = create_roce_custom_flow(qp_grp, trans_spec); + break; + default: + usnic_err("Unsupported transport %u\n", + trans_spec->trans_type); + return ERR_PTR(-EINVAL); + } + + if (!IS_ERR_OR_NULL(qp_flow)) + list_add_tail(&qp_flow->link, &qp_grp->flows_lst); + + + return qp_flow; +} + +static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow) +{ + list_del(&qp_flow->link); + + switch (qp_flow->trans_type) { + case USNIC_TRANSPORT_ROCE_CUSTOM: + release_roce_custom_flow(qp_flow); + break; + default: + WARN(1, "Unsupported transport %u\n", + qp_flow->trans_type); + break; + } +} + +static void release_and_remove_all_flows(struct usnic_ib_qp_grp *qp_grp) +{ + struct usnic_ib_qp_grp_flow *qp_flow, *tmp; + list_for_each_entry_safe(qp_flow, tmp, &qp_grp->flows_lst, link) + release_and_remove_flow(qp_flow); +} + int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, enum ib_qp_state new_state, - struct usnic_fwd_filter *fwd_filter) + void *data) { int status = 0; int vnic_idx; struct ib_event ib_event; enum ib_qp_state old_state; + struct usnic_transport_spec *trans_spec; + struct usnic_ib_qp_grp_flow *qp_flow; old_state = qp_grp->state; vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); + trans_spec = (struct usnic_transport_spec *) data; spin_lock(&qp_grp->lock); switch (new_state) { @@ -236,13 +315,14 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, /* NO-OP */ break; case IB_QPS_INIT: - status = del_all_filters(qp_grp); + release_and_remove_all_flows(qp_grp); + status = 0; break; case IB_QPS_RTR: case IB_QPS_RTS: case IB_QPS_ERR: status = disable_qp_grp(qp_grp); - status &= del_all_filters(qp_grp); + release_and_remove_all_flows(qp_grp); break; default: status = -EINVAL; @@ -251,10 +331,35 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, case IB_QPS_INIT: switch (old_state) { case IB_QPS_RESET: - status = add_fwd_filter(qp_grp, fwd_filter); + if (trans_spec) { + qp_flow = create_and_add_flow(qp_grp, + trans_spec); + if (IS_ERR_OR_NULL(qp_flow)) { + status = (qp_flow) ? PTR_ERR(qp_flow) : -EFAULT; + break; + } + } else { + /* + * Optional to specify filters. + */ + status = 0; + } break; case IB_QPS_INIT: - status = add_fwd_filter(qp_grp, fwd_filter); + if (trans_spec) { + qp_flow = create_and_add_flow(qp_grp, + trans_spec); + if (IS_ERR_OR_NULL(qp_flow)) { + status = (qp_flow) ? PTR_ERR(qp_flow) : -EFAULT; + break; + } + } else { + /* + * Doesn't make sense to go into INIT state + * from INIT state w/o adding filters. + */ + status = -EINVAL; + } break; case IB_QPS_RTR: status = disable_qp_grp(qp_grp); @@ -295,14 +400,14 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, qp_grp->ibqp.qp_context); break; case IB_QPS_INIT: - status = del_all_filters(qp_grp); + release_and_remove_all_flows(qp_grp); qp_grp->ibqp.event_handler(&ib_event, qp_grp->ibqp.qp_context); break; case IB_QPS_RTR: case IB_QPS_RTS: status = disable_qp_grp(qp_grp); - status &= del_all_filters(qp_grp); + release_and_remove_all_flows(qp_grp); qp_grp->ibqp.event_handler(&ib_event, qp_grp->ibqp.qp_context); break; @@ -435,16 +540,33 @@ static void log_spec(struct usnic_vnic_res_spec *res_spec) usnic_dbg("%s\n", buf); } +static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow, + uint32_t *id) +{ + enum usnic_transport_type trans_type = qp_flow->trans_type; + + switch (trans_type) { + case USNIC_TRANSPORT_ROCE_CUSTOM: + *id = qp_flow->usnic_roce.port_num; + break; + default: + usnic_err("Unsupported transport %u\n", trans_type); + return -EINVAL; + } + + return 0; +} + struct usnic_ib_qp_grp * -usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, - struct usnic_ib_vf *vf, +usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, struct usnic_ib_pd *pd, struct usnic_vnic_res_spec *res_spec, - enum usnic_transport_type transport) + struct usnic_transport_spec *transport_spec) { struct usnic_ib_qp_grp *qp_grp; - u16 port_num; int err; + enum usnic_transport_type transport = transport_spec->trans_type; + struct usnic_ib_qp_grp_flow *qp_flow; lockdep_assert_held(&vf->lock); @@ -457,13 +579,6 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, return ERR_PTR(err); } - port_num = usnic_transport_rsrv_port(transport, 0); - if (!port_num) { - usnic_err("Unable to allocate port for %s\n", - netdev_name(ufdev->netdev)); - return ERR_PTR(-EINVAL); - } - qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC); if (!qp_grp) { usnic_err("Unable to alloc qp_grp - Out of memory\n"); @@ -477,53 +592,59 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM; usnic_err("Unable to alloc res for %d with err %d\n", qp_grp->grp_id, err); - goto out_free_port; + goto out_free_qp_grp; } - INIT_LIST_HEAD(&qp_grp->filter_hndls); - spin_lock_init(&qp_grp->lock); - qp_grp->ufdev = ufdev; - qp_grp->transport = transport; - qp_grp->filters[DFLT_FILTER_IDX].transport = transport; - qp_grp->filters[DFLT_FILTER_IDX].port_num = port_num; - qp_grp->state = IB_QPS_RESET; - qp_grp->owner_pid = current->pid; - - /* qp_num is same as default filter port_num */ - qp_grp->ibqp.qp_num = qp_grp->filters[DFLT_FILTER_IDX].port_num; - qp_grp->grp_id = qp_grp->ibqp.qp_num; - err = qp_grp_and_vf_bind(vf, pd, qp_grp); if (err) - goto out_free_port; + goto out_free_res; + + INIT_LIST_HEAD(&qp_grp->flows_lst); + spin_lock_init(&qp_grp->lock); + qp_grp->ufdev = ufdev; + qp_grp->state = IB_QPS_RESET; + qp_grp->owner_pid = current->pid; + + qp_flow = create_and_add_flow(qp_grp, transport_spec); + if (IS_ERR_OR_NULL(qp_flow)) { + usnic_err("Unable to create and add flow with err %ld\n", + PTR_ERR(qp_flow)); + err = (qp_flow) ? PTR_ERR(qp_flow) : -EFAULT; + goto out_qp_grp_vf_unbind; + } + + err = qp_grp_id_from_flow(qp_flow, &qp_grp->grp_id); + if (err) + goto out_release_flow; + qp_grp->ibqp.qp_num = qp_grp->grp_id; usnic_ib_sysfs_qpn_add(qp_grp); return qp_grp; -out_free_port: +out_release_flow: + release_and_remove_flow(qp_flow); +out_qp_grp_vf_unbind: + qp_grp_and_vf_unbind(qp_grp); +out_free_res: + free_qp_grp_res(qp_grp->res_chunk_list); +out_free_qp_grp: kfree(qp_grp); - usnic_transport_unrsrv_port(transport, port_num); return ERR_PTR(err); } void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) { - u16 default_port_num; - enum usnic_transport_type transport; WARN_ON(qp_grp->state != IB_QPS_RESET); lockdep_assert_held(&qp_grp->vf->lock); - transport = qp_grp->filters[DFLT_FILTER_IDX].transport; - default_port_num = qp_grp->filters[DFLT_FILTER_IDX].port_num; - usnic_ib_sysfs_qpn_remove(qp_grp); qp_grp_and_vf_unbind(qp_grp); + release_and_remove_all_flows(qp_grp); free_qp_grp_res(qp_grp->res_chunk_list); kfree(qp_grp); - usnic_transport_unrsrv_port(transport, default_port_num); } struct usnic_vnic_res_chunk* diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h index 6416a956dc4a..570fea2e2cb9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h @@ -26,9 +26,6 @@ #include "usnic_fwd.h" #include "usnic_vnic.h" -#define MAX_QP_GRP_FILTERS 10 -#define DFLT_FILTER_IDX 0 - /* * The qp group struct represents all the hw resources needed to present a ib_qp */ @@ -38,11 +35,8 @@ struct usnic_ib_qp_grp { int grp_id; struct usnic_fwd_dev *ufdev; - short unsigned filter_cnt; - struct usnic_fwd_filter filters[MAX_QP_GRP_FILTERS]; - struct list_head filter_hndls; - enum usnic_transport_type transport; struct usnic_ib_ucontext *ctx; + struct list_head flows_lst; struct usnic_vnic_res_chunk **res_chunk_list; @@ -55,6 +49,18 @@ struct usnic_ib_qp_grp { struct kobject kobj; }; +struct usnic_ib_qp_grp_flow { + struct usnic_fwd_flow *flow; + enum usnic_transport_type trans_type; + union { + struct { + uint16_t port_num; + } usnic_roce; + }; + struct usnic_ib_qp_grp *qp_grp; + struct list_head link; +}; + static const struct usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = { { /*USNIC_TRANSPORT_UNKNOWN*/ @@ -79,11 +85,11 @@ struct usnic_ib_qp_grp * usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, struct usnic_ib_pd *pd, struct usnic_vnic_res_spec *res_spec, - enum usnic_transport_type transport); + struct usnic_transport_spec *trans_spec); void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp); int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, enum ib_qp_state new_state, - struct usnic_fwd_filter *fwd_filter); + void *data); struct usnic_vnic_res_chunk *usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp, enum usnic_vnic_res_type type); From 60b215e8b267f911751a043de63181dab1b69706 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:10 -0800 Subject: [PATCH 09/61] IB/usnic: Port over sysfs to new usnic_fwd.h This patch ports usnic_ib_sysfs.c to the new interface of usnic_fwd.h. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index bad985e9df08..3e5884232342 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -104,7 +104,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr, PCI_SLOT(us_ibdev->pdev->devfn), PCI_FUNC(us_ibdev->pdev->devfn), netdev_name(us_ibdev->netdev), - us_ibdev->mac, + us_ibdev->ufdev->mac, atomic_read(&us_ibdev->vf_cnt.refcount)); UPDATE_PTR_LEFT(n, ptr, left); @@ -239,21 +239,17 @@ static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) int left; char *ptr; struct usnic_vnic_res_chunk *res_chunk; - struct usnic_fwd_filter_hndl *default_filter_hndl; struct usnic_vnic_res *vnic_res; left = PAGE_SIZE; ptr = buf; - default_filter_hndl = list_first_entry(&qp_grp->filter_hndls, - struct usnic_fwd_filter_hndl, link); n = scnprintf(ptr, left, - "QPN: %d State: (%s) PID: %u VF Idx: %hu Filter ID: 0x%x ", + "QPN: %d State: (%s) PID: %u VF Idx: %hu ", qp_grp->ibqp.qp_num, usnic_ib_qp_grp_state_to_string(qp_grp->state), qp_grp->owner_pid, - usnic_vnic_get_index(qp_grp->vf->vnic), - default_filter_hndl->id); + usnic_vnic_get_index(qp_grp->vf->vnic)); UPDATE_PTR_LEFT(n, ptr, left); for (i = 0; qp_grp->res_chunk_list[i]; i++) { From b85caf479b577f000067002259539ad4341d4530 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:14 -0800 Subject: [PATCH 10/61] IB/usnic: Update ABI and Version file for UDP support Expand the kernel/userspace interface so userspace may push down a socket file descriptor to usNIC. Also, bump up the abi and version numbers. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic.h | 4 ++-- drivers/infiniband/hw/usnic/usnic_abi.h | 12 ++++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic.h b/drivers/infiniband/hw/usnic/usnic.h index d741c76bc4be..5be13d8991bc 100644 --- a/drivers/infiniband/hw/usnic/usnic.h +++ b/drivers/infiniband/hw/usnic/usnic.h @@ -23,7 +23,7 @@ #define PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC 0x00cf /* User space NIC */ -#define DRV_VERSION "1.0.2" -#define DRV_RELDATE "September 09, 2013" +#define DRV_VERSION "1.0.3" +#define DRV_RELDATE "December 19, 2013" #endif /* USNIC_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h index dd3e4c6c474f..014bcfcabc0f 100644 --- a/drivers/infiniband/hw/usnic/usnic_abi.h +++ b/drivers/infiniband/hw/usnic/usnic_abi.h @@ -21,7 +21,7 @@ #define USNIC_ABI_H /* ABI between userspace and kernel */ -#define USNIC_UVERBS_ABI_VERSION 2 +#define USNIC_UVERBS_ABI_VERSION 3 #define USNIC_QP_GRP_MAX_WQS 8 #define USNIC_QP_GRP_MAX_RQS 8 @@ -30,7 +30,8 @@ enum usnic_transport_type { USNIC_TRANSPORT_UNKNOWN = 0, USNIC_TRANSPORT_ROCE_CUSTOM = 1, - USNIC_TRANSPORT_MAX = 2, + USNIC_TRANSPORT_IPV4_UDP = 2, + USNIC_TRANSPORT_MAX = 3, }; struct usnic_transport_spec { @@ -39,9 +40,16 @@ struct usnic_transport_spec { struct { uint16_t port_num; } usnic_roce; + struct { + uint32_t sock_fd; + } udp; }; }; +struct usnic_ib_create_qp_cmd { + struct usnic_transport_spec spec; +}; + /*TODO: Future - usnic_modify_qp needs to pass in generic filters */ struct usnic_ib_create_qp_resp { u32 vfid; From 3f92bed3d6c073f41efc0777ecd3442aa1f03d20 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:15 -0800 Subject: [PATCH 11/61] IB/usnic: Add UDP support to usnic_fwd.[hc] Add *ip field* to *struct usnic_fwd_dev* as well as new *functions* to manipulate the *ip field.* Furthermore, add new functions for programming UDP flows in the forwarding device. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_fwd.c | 54 +++++++++++++++++++++++++ drivers/infiniband/hw/usnic/usnic_fwd.h | 21 ++++++++++ 2 files changed, 75 insertions(+) diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 33fdd771e599..e3c9bd9d3ba3 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -95,6 +95,29 @@ void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]) spin_unlock(&ufdev->lock); } +int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr) +{ + int status; + + spin_lock(&ufdev->lock); + if (ufdev->inaddr == 0) { + ufdev->inaddr = inaddr; + status = 0; + } else { + status = -EFAULT; + } + spin_unlock(&ufdev->lock); + + return status; +} + +void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev) +{ + spin_lock(&ufdev->lock); + ufdev->inaddr = 0; + spin_unlock(&ufdev->lock); +} + void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev) { spin_lock(&ufdev->lock); @@ -126,6 +149,30 @@ static int usnic_fwd_dev_ready_locked(struct usnic_fwd_dev *ufdev) return 0; } +static int validate_filter_locked(struct usnic_fwd_dev *ufdev, + struct filter *filter) +{ + + lockdep_assert_held(&ufdev->lock); + + if (filter->type == FILTER_IPV4_5TUPLE) { + if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_AD)) + return -EACCES; + if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_PT)) + return -EBUSY; + else if (ufdev->inaddr == 0) + return -EINVAL; + else if (filter->u.ipv4.dst_port == 0) + return -ERANGE; + else if (ntohl(ufdev->inaddr) != filter->u.ipv4.dst_addr) + return -EFAULT; + else + return 0; + } + + return 0; +} + static void fill_tlv(struct filter_tlv *tlv, struct filter *filter, struct filter_action *action) { @@ -177,6 +224,13 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, goto out_free_tlv; } + status = validate_filter_locked(ufdev, filter); + if (status) { + usnic_err("Failed to validate filter with status %d\n", + status); + goto out_free_tlv; + } + /* Issue Devcmd */ a0 = tlv_pa; a1 = tlv_size; diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h index b146eb97d82c..93713a2230b3 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.h +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -40,6 +40,7 @@ struct usnic_fwd_dev { bool link_up; char mac[ETH_ALEN]; unsigned int mtu; + __be32 inaddr; char name[IFNAMSIZ+1]; }; @@ -58,6 +59,8 @@ struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev); void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev); void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]); +int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr); +void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev); void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev); void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev); void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu); @@ -89,4 +92,22 @@ static inline void usnic_fwd_init_usnic_filter(struct filter *filter, filter->u.usnic.usnic_id = usnic_id; } +static inline void usnic_fwd_init_udp_filter(struct filter *filter, + uint32_t daddr, uint16_t dport) +{ + filter->type = FILTER_IPV4_5TUPLE; + filter->u.ipv4.flags = FILTER_FIELD_5TUP_PROTO; + filter->u.ipv4.protocol = PROTO_UDP; + + if (daddr) { + filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_AD; + filter->u.ipv4.dst_addr = daddr; + } + + if (dport) { + filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_PT; + filter->u.ipv4.dst_port = dport; + } +} + #endif /* !USNIC_FWD_H_ */ From 6214105460842759020bdd7f4dbb50afa1be9d17 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:16 -0800 Subject: [PATCH 12/61] IB:usnic: Add UDP support to usnic_transport.[hc] This patch provides API for rest of usNIC code to increment or decrement socket's reference count. Auxiliary socket APIs are also provided. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_transport.c | 93 +++++++++++++++++-- drivers/infiniband/hw/usnic/usnic_transport.h | 19 ++++ 2 files changed, 105 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c index 723bd6c3a8f8..73dffc9c535a 100644 --- a/drivers/infiniband/hw/usnic/usnic_transport.c +++ b/drivers/infiniband/hw/usnic/usnic_transport.c @@ -16,8 +16,10 @@ * */ #include +#include #include #include +#include #include "usnic_transport.h" #include "usnic_log.h" @@ -28,13 +30,15 @@ static u16 roce_next_port = 1; #define ROCE_BITMAP_SZ ((1 << (8 /*CHAR_BIT*/ * sizeof(u16)))/8 /*CHAR BIT*/) static DEFINE_SPINLOCK(roce_bitmap_lock); -static const char *transport_to_str(enum usnic_transport_type type) +const char *usnic_transport_to_str(enum usnic_transport_type type) { switch (type) { case USNIC_TRANSPORT_UNKNOWN: return "Unknown"; case USNIC_TRANSPORT_ROCE_CUSTOM: return "roce custom"; + case USNIC_TRANSPORT_IPV4_UDP: + return "IPv4 UDP"; case USNIC_TRANSPORT_MAX: return "Max?"; default: @@ -42,6 +46,24 @@ static const char *transport_to_str(enum usnic_transport_type type) } } +int usnic_transport_sock_to_str(char *buf, int buf_sz, + struct socket *sock) +{ + int err; + uint32_t addr; + uint16_t port; + int proto; + + memset(buf, 0, buf_sz); + err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port); + if (err) + return 0; + + addr = htonl(addr); + return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4 Port:%hu", + proto, &addr, port); +} + /* * reserve a port number. if "0" specified, we will try to pick one * starting at roce_next_port. roce_next_port will take on the values @@ -60,7 +82,7 @@ u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num) roce_next_port = (port_num & 4095) + 1; } else if (test_bit(port_num, roce_bitmap)) { usnic_err("Failed to allocate port for %s\n", - transport_to_str(type)); + usnic_transport_to_str(type)); spin_unlock(&roce_bitmap_lock); goto out_fail; } @@ -68,12 +90,12 @@ u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num) spin_unlock(&roce_bitmap_lock); } else { usnic_err("Failed to allocate port - transport %s unsupported\n", - transport_to_str(type)); + usnic_transport_to_str(type)); goto out_fail; } usnic_dbg("Allocating port %hu for %s\n", port_num, - transport_to_str(type)); + usnic_transport_to_str(type)); return port_num; out_fail: @@ -86,19 +108,19 @@ void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num) spin_lock(&roce_bitmap_lock); if (!port_num) { usnic_err("Unreserved unvalid port num 0 for %s\n", - transport_to_str(type)); + usnic_transport_to_str(type)); goto out_roce_custom; } if (!test_bit(port_num, roce_bitmap)) { usnic_err("Unreserving invalid %hu for %s\n", port_num, - transport_to_str(type)); + usnic_transport_to_str(type)); goto out_roce_custom; } bitmap_clear(roce_bitmap, port_num, 1); usnic_dbg("Freeing port %hu for %s\n", port_num, - transport_to_str(type)); + usnic_transport_to_str(type)); out_roce_custom: spin_unlock(&roce_bitmap_lock); } else { @@ -106,6 +128,63 @@ out_roce_custom: } } +struct socket *usnic_transport_get_socket(int sock_fd) +{ + struct socket *sock; + int err; + char buf[25]; + + /* sockfd_lookup will internally do a fget */ + sock = sockfd_lookup(sock_fd, &err); + if (!sock) { + usnic_err("Unable to lookup socket for fd %d with err %d\n", + sock_fd, err); + return ERR_PTR(-ENOENT); + } + + usnic_transport_sock_to_str(buf, sizeof(buf), sock); + usnic_dbg("Get sock %s\n", buf); + + return sock; +} + +void usnic_transport_put_socket(struct socket *sock) +{ + char buf[100]; + + usnic_transport_sock_to_str(buf, sizeof(buf), sock); + usnic_dbg("Put sock %s\n", buf); + sockfd_put(sock); +} + +int usnic_transport_sock_get_addr(struct socket *sock, int *proto, + uint32_t *addr, uint16_t *port) +{ + int len; + int err; + struct sockaddr_in sock_addr; + + err = sock->ops->getname(sock, + (struct sockaddr *)&sock_addr, + &len, 0); + if (err) + return err; + + if (sock_addr.sin_family != AF_INET) + return -EINVAL; + + if (proto) + *proto = sock->sk->sk_protocol; + if (port) + *port = ntohs(((struct sockaddr_in *)&sock_addr)->sin_port); + if (addr) + *addr = ntohl(((struct sockaddr_in *) + &sock_addr)->sin_addr.s_addr); + + return 0; +} + + int usnic_transport_init(void) { roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL); diff --git a/drivers/infiniband/hw/usnic/usnic_transport.h b/drivers/infiniband/hw/usnic/usnic_transport.h index 091fdaf4d25a..21371bb86c71 100644 --- a/drivers/infiniband/hw/usnic/usnic_transport.h +++ b/drivers/infiniband/hw/usnic/usnic_transport.h @@ -21,8 +21,27 @@ #include "usnic_abi.h" +const char *usnic_transport_to_str(enum usnic_transport_type trans_type); +/* + * Returns number of bytes written, excluding null terminator. If + * nothing was written, the function returns 0. + */ +int usnic_transport_sock_to_str(char *buf, int buf_sz, + struct socket *sock); u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num); void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num); +/* + * Do a fget on the socket refered to by sock_fd and returns the socket. + * Socket will not be destroyed before usnic_transport_put_socket has + * been called. + */ +struct socket *usnic_transport_get_socket(int sock_fd); +void usnic_transport_put_socket(struct socket *sock); +/* + * Call usnic_transport_get_socket before calling *_sock_get_addr + */ +int usnic_transport_sock_get_addr(struct socket *sock, int *proto, + uint32_t *addr, uint16_t *port); int usnic_transport_init(void); void usnic_transport_fini(void); #endif /* !USNIC_TRANSPORT_H */ From c7845bcafe4d2ecd5c479fa64d1b425c21dde17c Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:17 -0800 Subject: [PATCH 13/61] IB/usnic: Add UDP support in u*verbs.c, u*main.c and u*util.h Add supports for: 1) Parsing the socket file descriptor pass down from userspace. 2) IP notifiers 3) Encoding the IP in the GID 4) Other aux. changes to support UDP Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- .../infiniband/hw/usnic/usnic_common_util.h | 17 ++++ drivers/infiniband/hw/usnic/usnic_ib_main.c | 84 ++++++++++++++++++- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 44 ++++++++-- 3 files changed, 136 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h index 128550a4f9e2..afd8bfa379ed 100644 --- a/drivers/infiniband/hw/usnic/usnic_common_util.h +++ b/drivers/infiniband/hw/usnic/usnic_common_util.h @@ -35,6 +35,23 @@ usnic_mac_to_gid(const char *const mac, char *raw_gid) raw_gid[15] = mac[5]; } +static inline void +usnic_mac_ip_to_gid(const char *const mac, const uint32_t inaddr, char *raw_gid) +{ + raw_gid[0] = 0xfe; + raw_gid[1] = 0x80; + memset(&raw_gid[2], 0, 2); + memcpy(&raw_gid[4], &inaddr, 4); + raw_gid[8] = mac[0]^2; + raw_gid[9] = mac[1]; + raw_gid[10] = mac[2]; + raw_gid[11] = 0xff; + raw_gid[12] = 0xfe; + raw_gid[13] = mac[3]; + raw_gid[14] = mac[4]; + raw_gid[15] = mac[5]; +} + static inline void usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid) { diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 6ab0b41be9c5..3b7e8bd22df6 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -25,6 +25,7 @@ */ #include +#include #include #include #include @@ -236,13 +237,79 @@ static struct notifier_block usnic_ib_netdevice_notifier = { }; /* End of netdev section */ +/* Start of inet section */ +static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct ib_event ib_event; + + mutex_lock(&us_ibdev->usdev_lock); + + switch (event) { + case NETDEV_DOWN: + usnic_info("%s via ip notifiers", + usnic_ib_netdev_event_to_string(event)); + usnic_fwd_del_ipaddr(us_ibdev->ufdev); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + ib_event.event = IB_EVENT_GID_CHANGE; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + break; + case NETDEV_UP: + usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address); + usnic_info("%s via ip notifiers: ip %pI4", + usnic_ib_netdev_event_to_string(event), + &us_ibdev->ufdev->inaddr); + ib_event.event = IB_EVENT_GID_CHANGE; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + break; + default: + usnic_info("Ignorning event %s on %s", + usnic_ib_netdev_event_to_string(event), + us_ibdev->ib_dev.name); + } + mutex_unlock(&us_ibdev->usdev_lock); + + return NOTIFY_DONE; +} + +static int usnic_ib_inetaddr_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct usnic_ib_dev *us_ibdev; + struct in_ifaddr *ifa = ptr; + struct net_device *netdev = ifa->ifa_dev->dev; + + mutex_lock(&usnic_ib_ibdev_list_lock); + list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) { + if (us_ibdev->netdev == netdev) { + usnic_ib_handle_inet_event(us_ibdev, event, ptr); + break; + } + } + mutex_unlock(&usnic_ib_ibdev_list_lock); + + return NOTIFY_DONE; +} +static struct notifier_block usnic_ib_inetaddr_notifier = { + .notifier_call = usnic_ib_inetaddr_event +}; +/* End of inet section*/ + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { struct usnic_ib_dev *us_ibdev; union ib_gid gid; + struct in_ifaddr *in; + struct net_device *netdev; usnic_dbg("\n"); + netdev = pci_get_drvdata(dev); us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); if (IS_ERR_OR_NULL(us_ibdev)) { @@ -326,6 +393,12 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (netif_carrier_ok(us_ibdev->netdev)) usnic_fwd_carrier_up(us_ibdev->ufdev); + in = ((struct in_device *)(netdev->ip_ptr))->ifa_list; + if (in != NULL) + usnic_fwd_add_ipaddr(us_ibdev->ufdev, in->ifa_address); + + usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr, + us_ibdev->ufdev->inaddr, &gid.raw[0]); memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id, sizeof(gid.global.interface_id)); kref_init(&us_ibdev->vf_cnt); @@ -555,16 +628,24 @@ static int __init usnic_ib_init(void) goto out_pci_unreg; } + err = register_inetaddr_notifier(&usnic_ib_inetaddr_notifier); + if (err) { + usnic_err("Failed to register inet addr notifier\n"); + goto out_unreg_netdev_notifier; + } + err = usnic_transport_init(); if (err) { usnic_err("Failed to initialize transport\n"); - goto out_unreg_netdev_notifier; + goto out_unreg_inetaddr_notifier; } usnic_debugfs_init(); return 0; +out_unreg_inetaddr_notifier: + unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier); out_unreg_netdev_notifier: unregister_netdevice_notifier(&usnic_ib_netdevice_notifier); out_pci_unreg: @@ -580,6 +661,7 @@ static void __exit usnic_ib_destroy(void) usnic_dbg("\n"); usnic_debugfs_exit(); usnic_transport_fini(); + unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier); unregister_netdevice_notifier(&usnic_ib_netdevice_notifier); pci_unregister_driver(&usnic_ib_pci_driver); usnic_uiom_fini(); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 2217bc0a6bbe..937113f2eeec 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -230,6 +230,15 @@ static void eth_speed_to_ib_speed(int speed, u8 *active_speed, } } +static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd) +{ + if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN || + cmd.spec.trans_type >= USNIC_TRANSPORT_MAX) + return -EINVAL; + + return 0; +} + /* Start of ib callback functions */ enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, @@ -252,7 +261,8 @@ int usnic_ib_query_device(struct ib_device *ibdev, us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); memset(props, 0, sizeof(*props)); - usnic_mac_to_gid(us_ibdev->ufdev->mac, &gid.raw[0]); + usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr, + &gid.raw[0]); memcpy(&props->sys_image_guid, &gid.global.interface_id, sizeof(gid.global.interface_id)); usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver); @@ -310,12 +320,15 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, props->sm_lid = 0; props->sm_sl = 0; - if (us_ibdev->ufdev->link_up) { - props->state = IB_PORT_ACTIVE; - props->phys_state = 5; - } else { + if (!us_ibdev->ufdev->link_up) { props->state = IB_PORT_DOWN; props->phys_state = 3; + } else if (!us_ibdev->ufdev->inaddr) { + props->state = IB_PORT_INIT; + props->phys_state = 4; + } else { + props->state = IB_PORT_ACTIVE; + props->phys_state = 5; } props->port_cap_flags = 0; @@ -385,7 +398,8 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, mutex_lock(&us_ibdev->usdev_lock); memset(&(gid->raw[0]), 0, sizeof(gid->raw)); - usnic_mac_to_gid(us_ibdev->ufdev->mac, &gid->raw[0]); + usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr, + &gid->raw[0]); mutex_unlock(&us_ibdev->usdev_lock); return 0; @@ -444,6 +458,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, struct usnic_ib_ucontext *ucontext; int cq_cnt; struct usnic_vnic_res_spec res_spec; + struct usnic_ib_create_qp_cmd cmd; struct usnic_transport_spec trans_spec; usnic_dbg("\n"); @@ -451,14 +466,27 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, ucontext = to_uucontext(pd->uobject->context); us_ibdev = to_usdev(pd->device); + err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); + if (err) { + usnic_err("%s: cannot copy udata for create_qp\n", + us_ibdev->ib_dev.name); + return ERR_PTR(-EINVAL); + } + + err = create_qp_validate_user_data(cmd); + if (err) { + usnic_err("%s: Failed to validate user data\n", + us_ibdev->ib_dev.name); + return ERR_PTR(-EINVAL); + } + if (init_attr->qp_type != IB_QPT_UD) { usnic_err("%s asked to make a non-UD QP: %d\n", us_ibdev->ib_dev.name, init_attr->qp_type); return ERR_PTR(-EINVAL); } - memset(&trans_spec, 0, sizeof(trans_spec)); - trans_spec.trans_type = USNIC_TRANSPORT_ROCE_CUSTOM; + trans_spec = cmd.spec; mutex_lock(&us_ibdev->usdev_lock); cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2; res_spec = min_transport_spec[trans_spec.trans_type]; From e45e614e4015a489d2f8013eaed45d498d884e86 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:18 -0800 Subject: [PATCH 14/61] IB/usnic: Add UDP support in usnic_ib_qp_grp.[hc] UDP support for qp_grps/qps. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c | 88 +++++++++++++++++++ drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h | 11 +++ 2 files changed, 99 insertions(+) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 2b7e0a1a07b4..d6667a198d0b 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -245,6 +245,80 @@ static void release_roce_custom_flow(struct usnic_ib_qp_grp_flow *qp_flow) kfree(qp_flow); } +static struct usnic_ib_qp_grp_flow* +create_udp_flow(struct usnic_ib_qp_grp *qp_grp, + struct usnic_transport_spec *trans_spec) +{ + struct socket *sock; + int sock_fd; + int err; + struct filter filter; + struct usnic_filter_action uaction; + struct usnic_ib_qp_grp_flow *qp_flow; + struct usnic_fwd_flow *flow; + enum usnic_transport_type trans_type; + uint32_t addr; + uint16_t port_num; + int proto; + + trans_type = trans_spec->trans_type; + sock_fd = trans_spec->udp.sock_fd; + + /* Get and check socket */ + sock = usnic_transport_get_socket(sock_fd); + if (IS_ERR_OR_NULL(sock)) + return ERR_CAST(sock); + + err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port_num); + if (err) + goto out_put_sock; + + if (proto != IPPROTO_UDP) { + usnic_err("Protocol for fd %d is not UDP", sock_fd); + err = -EPERM; + goto out_put_sock; + } + + /* Create flow */ + usnic_fwd_init_udp_filter(&filter, addr, port_num); + err = init_filter_action(qp_grp, &uaction); + if (err) + goto out_put_sock; + + flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction); + if (IS_ERR_OR_NULL(flow)) { + usnic_err("Unable to alloc flow failed with err %ld\n", + PTR_ERR(flow)); + err = (flow) ? PTR_ERR(flow) : -EFAULT; + goto out_put_sock; + } + + /* Create qp_flow */ + qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); + if (IS_ERR_OR_NULL(qp_flow)) { + err = (qp_flow) ? PTR_ERR(qp_flow) : -ENOMEM; + goto out_dealloc_flow; + } + qp_flow->flow = flow; + qp_flow->trans_type = trans_type; + qp_flow->udp.sock = sock; + qp_flow->qp_grp = qp_grp; + return qp_flow; + +out_dealloc_flow: + usnic_fwd_dealloc_flow(flow); +out_put_sock: + usnic_transport_put_socket(sock); + return ERR_PTR(err); +} + +static void release_udp_flow(struct usnic_ib_qp_grp_flow *qp_flow) +{ + usnic_fwd_dealloc_flow(qp_flow->flow); + usnic_transport_put_socket(qp_flow->udp.sock); + kfree(qp_flow); +} + static struct usnic_ib_qp_grp_flow* create_and_add_flow(struct usnic_ib_qp_grp *qp_grp, struct usnic_transport_spec *trans_spec) @@ -257,6 +331,9 @@ create_and_add_flow(struct usnic_ib_qp_grp *qp_grp, case USNIC_TRANSPORT_ROCE_CUSTOM: qp_flow = create_roce_custom_flow(qp_grp, trans_spec); break; + case USNIC_TRANSPORT_IPV4_UDP: + qp_flow = create_udp_flow(qp_grp, trans_spec); + break; default: usnic_err("Unsupported transport %u\n", trans_spec->trans_type); @@ -278,6 +355,9 @@ static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow) case USNIC_TRANSPORT_ROCE_CUSTOM: release_roce_custom_flow(qp_flow); break; + case USNIC_TRANSPORT_IPV4_UDP: + release_udp_flow(qp_flow); + break; default: WARN(1, "Unsupported transport %u\n", qp_flow->trans_type); @@ -544,11 +624,19 @@ static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow, uint32_t *id) { enum usnic_transport_type trans_type = qp_flow->trans_type; + int err; switch (trans_type) { case USNIC_TRANSPORT_ROCE_CUSTOM: *id = qp_flow->usnic_roce.port_num; break; + case USNIC_TRANSPORT_IPV4_UDP: + err = usnic_transport_sock_get_addr(qp_flow->udp.sock, + NULL, NULL, + (uint16_t *) id); + if (err) + return err; + break; default: usnic_err("Unsupported transport %u\n", trans_type); return -EINVAL; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h index 570fea2e2cb9..a8ba1b9224d8 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h @@ -56,6 +56,9 @@ struct usnic_ib_qp_grp_flow { struct { uint16_t port_num; } usnic_roce; + struct { + struct socket *sock; + } udp; }; struct usnic_ib_qp_grp *qp_grp; struct list_head link; @@ -76,6 +79,14 @@ usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = { {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, }, }, + { /*USNIC_TRANSPORT_IPV4_UDP*/ + .resources = { + {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, + }, + }, }; const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state); From 248567f79304b953ea492fb92ade097b62ed09b2 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:19 -0800 Subject: [PATCH 15/61] IB/core: Add RDMA_TRANSPORT_USNIC_UDP Add RDMA_TRANSPORT_USNIC_UDP which will be used by usNIC. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 3 ++- include/rdma/ib_verbs.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index d4f6ddf72ffa..fae0b08248e4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -115,7 +115,7 @@ rdma_node_get_transport(enum rdma_node_type node_type) case RDMA_NODE_RNIC: return RDMA_TRANSPORT_IWARP; case RDMA_NODE_USNIC: - return RDMA_TRANSPORT_USNIC; + return RDMA_TRANSPORT_USNIC_UDP; default: BUG(); return 0; @@ -133,6 +133,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_ return IB_LINK_LAYER_INFINIBAND; case RDMA_TRANSPORT_IWARP: case RDMA_TRANSPORT_USNIC: + case RDMA_TRANSPORT_USNIC_UDP: return IB_LINK_LAYER_ETHERNET; default: return IB_LINK_LAYER_UNSPECIFIED; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 61e1935c91b1..b19aa7285ea3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -74,7 +74,8 @@ enum rdma_node_type { enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, - RDMA_TRANSPORT_USNIC + RDMA_TRANSPORT_USNIC, + RDMA_TRANSPORT_USNIC_UDP }; enum rdma_transport_type From 6a54d9f9a04ed35e6615a47974c1ef02ff3a62cb Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:36 -0800 Subject: [PATCH 16/61] IB/usnic: Remove superflous parentheses Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 6 +++--- drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c | 18 +++++++++--------- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 3b7e8bd22df6..f7be44543c0a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -470,7 +470,7 @@ static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) us_ibdev = usnic_ib_device_add(parent_pci); if (IS_ERR_OR_NULL(us_ibdev)) { - us_ibdev = (us_ibdev) ? us_ibdev : ERR_PTR(-EFAULT); + us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT); goto out; } @@ -526,7 +526,7 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, vf->vnic = usnic_vnic_alloc(pdev); if (IS_ERR_OR_NULL(vf->vnic)) { - err = (vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM); + err = vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM; usnic_err("Failed to alloc vnic for %s with err %d\n", pci_name(pdev), err); goto out_release_regions; @@ -536,7 +536,7 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, if (IS_ERR_OR_NULL(pf)) { usnic_err("Failed to discover pf of vnic %s with err%ld\n", pci_name(pdev), PTR_ERR(pf)); - err = (pf ? PTR_ERR(pf) : -EFAULT); + err = pf ? PTR_ERR(pf) : -EFAULT; goto out_clean_vnic; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index d6667a198d0b..3e17c7c156c3 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -214,14 +214,14 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp, if (IS_ERR_OR_NULL(flow)) { usnic_err("Unable to alloc flow failed with err %ld\n", PTR_ERR(flow)); - err = (flow) ? PTR_ERR(flow) : -EFAULT; + err = flow ? PTR_ERR(flow) : -EFAULT; goto out_unreserve_port; } /* Create Flow Handle */ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); if (IS_ERR_OR_NULL(qp_flow)) { - err = (qp_flow) ? PTR_ERR(qp_flow) : -ENOMEM; + err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; goto out_dealloc_flow; } qp_flow->flow = flow; @@ -289,14 +289,14 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp, if (IS_ERR_OR_NULL(flow)) { usnic_err("Unable to alloc flow failed with err %ld\n", PTR_ERR(flow)); - err = (flow) ? PTR_ERR(flow) : -EFAULT; + err = flow ? PTR_ERR(flow) : -EFAULT; goto out_put_sock; } /* Create qp_flow */ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); if (IS_ERR_OR_NULL(qp_flow)) { - err = (qp_flow) ? PTR_ERR(qp_flow) : -ENOMEM; + err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; goto out_dealloc_flow; } qp_flow->flow = flow; @@ -415,7 +415,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, qp_flow = create_and_add_flow(qp_grp, trans_spec); if (IS_ERR_OR_NULL(qp_flow)) { - status = (qp_flow) ? PTR_ERR(qp_flow) : -EFAULT; + status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT; break; } } else { @@ -430,7 +430,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, qp_flow = create_and_add_flow(qp_grp, trans_spec); if (IS_ERR_OR_NULL(qp_flow)) { - status = (qp_flow) ? PTR_ERR(qp_flow) : -EFAULT; + status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT; break; } } else { @@ -543,8 +543,8 @@ alloc_res_chunk_list(struct usnic_vnic *vnic, res_chunk_list[i] = usnic_vnic_get_resources(vnic, res_type, res_cnt, owner_obj); if (IS_ERR_OR_NULL(res_chunk_list[i])) { - err = (res_chunk_list[i] ? - PTR_ERR(res_chunk_list[i]) : -ENOMEM); + err = res_chunk_list[i] ? + PTR_ERR(res_chunk_list[i]) : -ENOMEM; usnic_err("Failed to get %s from %s with err %d\n", usnic_vnic_res_type_to_str(res_type), usnic_vnic_pci_name(vnic), @@ -697,7 +697,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, if (IS_ERR_OR_NULL(qp_flow)) { usnic_err("Unable to create and add flow with err %ld\n", PTR_ERR(qp_flow)); - err = (qp_flow) ? PTR_ERR(qp_flow) : -EFAULT; + err = qp_flow ? PTR_ERR(qp_flow) : -EFAULT; goto out_qp_grp_vf_unbind; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 937113f2eeec..d48d2c0a2e3c 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -495,7 +495,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, &trans_spec, &res_spec); if (IS_ERR_OR_NULL(qp_grp)) { - err = (qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); + err = qp_grp ? PTR_ERR(qp_grp) : -ENOMEM; goto out_release_mutex; } @@ -605,7 +605,7 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length, access_flags, 0); if (IS_ERR_OR_NULL(mr->umem)) { - err = (mr->umem) ? PTR_ERR(mr->umem) : -EFAULT; + err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT; goto err_free; } From c5f855e08a97edc107c4a3b73809ed629c1dcac1 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 14:48:45 -0800 Subject: [PATCH 17/61] IB/usnic: Use for_each_sg instead of a for-loop Use for_each_sg() instead of an explicit for-loop to iterate over scatter-gather list. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_uiom.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index c841a752dbd0..ae6934c0d05a 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -80,13 +80,14 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) { struct usnic_uiom_chunk *chunk, *tmp; struct page *page; + struct scatterlist *sg; int i; dma_addr_t pa; list_for_each_entry_safe(chunk, tmp, chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { - page = sg_page(&chunk->page_list[i]); - pa = sg_phys(&chunk->page_list[i]); + for_each_sg(chunk->page_list, sg, chunk->nents, i) { + page = sg_page(sg); + pa = sg_phys(sg); if (dirty) set_page_dirty_lock(page); put_page(page); @@ -100,6 +101,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int dmasync, struct list_head *chunk_list) { struct page **page_list; + struct scatterlist *sg; struct usnic_uiom_chunk *chunk; unsigned long locked; unsigned long lock_limit; @@ -165,11 +167,10 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK); sg_init_table(chunk->page_list, chunk->nents); - for (i = 0; i < chunk->nents; ++i) { - sg_set_page(&chunk->page_list[i], - page_list[i + off], - PAGE_SIZE, 0); - pa = sg_phys(&chunk->page_list[i]); + for_each_sg(chunk->page_list, sg, chunk->nents, i) { + sg_set_page(sg, page_list[i + off], + PAGE_SIZE, 0); + pa = sg_phys(sg); usnic_dbg("va: 0x%lx pa: %pa\n", cur_base + i*PAGE_SIZE, &pa); } From 9f637f7936025aef57f247b11036bad18bb87c06 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 9 Jan 2014 15:40:58 -0800 Subject: [PATCH 18/61] IB/usnic: Expose flows via debugfs Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_debugfs.c | 91 ++++++++++++++++++- drivers/infiniband/hw/usnic/usnic_debugfs.h | 4 + drivers/infiniband/hw/usnic/usnic_ib.h | 7 ++ drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c | 8 +- drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h | 5 + drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 6 -- 6 files changed, 111 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c index 91386df025ae..6cb2e7ca7df2 100644 --- a/drivers/infiniband/hw/usnic/usnic_debugfs.c +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c @@ -22,8 +22,11 @@ #include "usnic.h" #include "usnic_log.h" #include "usnic_debugfs.h" +#include "usnic_ib_qp_grp.h" +#include "usnic_transport.h" static struct dentry *debugfs_root; +static struct dentry *flows_dentry; static ssize_t usnic_debugfs_buildinfo_read(struct file *f, char __user *data, size_t count, loff_t *ppos) @@ -48,17 +51,73 @@ static const struct file_operations usnic_debugfs_buildinfo_ops = { .read = usnic_debugfs_buildinfo_read }; +static ssize_t flowinfo_read(struct file *f, char __user *data, + size_t count, loff_t *ppos) +{ + struct usnic_ib_qp_grp_flow *qp_flow; + int n; + int left; + char *ptr; + char buf[512]; + + qp_flow = f->private_data; + ptr = buf; + left = count; + + if (*ppos > 0) + return 0; + + spin_lock(&qp_flow->qp_grp->lock); + n = scnprintf(ptr, left, + "QP Grp ID: %d Transport: %s ", + qp_flow->qp_grp->grp_id, + usnic_transport_to_str(qp_flow->trans_type)); + UPDATE_PTR_LEFT(n, ptr, left); + if (qp_flow->trans_type == USNIC_TRANSPORT_ROCE_CUSTOM) { + n = scnprintf(ptr, left, "Port_Num:%hu\n", + qp_flow->usnic_roce.port_num); + UPDATE_PTR_LEFT(n, ptr, left); + } else if (qp_flow->trans_type == USNIC_TRANSPORT_IPV4_UDP) { + n = usnic_transport_sock_to_str(ptr, left, + qp_flow->udp.sock); + UPDATE_PTR_LEFT(n, ptr, left); + n = scnprintf(ptr, left, "\n"); + UPDATE_PTR_LEFT(n, ptr, left); + } + spin_unlock(&qp_flow->qp_grp->lock); + + return simple_read_from_buffer(data, count, ppos, buf, ptr - buf); +} + +static const struct file_operations flowinfo_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = flowinfo_read, +}; + void usnic_debugfs_init(void) { debugfs_root = debugfs_create_dir(DRV_NAME, NULL); if (IS_ERR(debugfs_root)) { usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n"); - debugfs_root = NULL; - return; + goto out_clear_root; + } + + flows_dentry = debugfs_create_dir("flows", debugfs_root); + if (IS_ERR_OR_NULL(flows_dentry)) { + usnic_err("Failed to create debugfs flow dir with err %ld\n", + PTR_ERR(flows_dentry)); + goto out_free_root; } debugfs_create_file("build-info", S_IRUGO, debugfs_root, NULL, &usnic_debugfs_buildinfo_ops); + return; + +out_free_root: + debugfs_remove_recursive(debugfs_root); +out_clear_root: + debugfs_root = NULL; } void usnic_debugfs_exit(void) @@ -69,3 +128,31 @@ void usnic_debugfs_exit(void) debugfs_remove_recursive(debugfs_root); debugfs_root = NULL; } + +void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow) +{ + struct usnic_ib_qp_grp *qp_grp; + + if (IS_ERR_OR_NULL(flows_dentry)) + return; + + qp_grp = qp_flow->qp_grp; + + scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name), + "%u", qp_flow->flow->flow_id); + qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name, + S_IRUGO, + flows_dentry, + qp_flow, + &flowinfo_ops); + if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) { + usnic_err("Failed to create dbg fs entry for flow %u\n", + qp_flow->flow->flow_id); + } +} + +void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow) +{ + if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) + debugfs_remove(qp_flow->dbgfs_dentry); +} diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.h b/drivers/infiniband/hw/usnic/usnic_debugfs.h index 914a330dd5e7..4087d24a88f6 100644 --- a/drivers/infiniband/hw/usnic/usnic_debugfs.h +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.h @@ -18,8 +18,12 @@ #ifndef USNIC_DEBUGFS_H_ #define USNIC_DEBUGFS_H_ +#include "usnic_ib_qp_grp.h" + void usnic_debugfs_init(void); void usnic_debugfs_exit(void); +void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow); +void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow); #endif /*!USNIC_DEBUGFS_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h index 92d9d9a60b3b..111a86e680d5 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib.h +++ b/drivers/infiniband/hw/usnic/usnic_ib.h @@ -109,4 +109,11 @@ struct usnic_ib_mr *to_umr(struct ib_mr *ibmr) return container_of(ibmr, struct usnic_ib_mr, ibmr); } void usnic_ib_log_vf(struct usnic_ib_vf *vf); + +#define UPDATE_PTR_LEFT(N, P, L) \ +do { \ + L -= (N); \ + P += (N); \ +} while (0) + #endif /* USNIC_IB_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 3e17c7c156c3..7ecc6061f1f4 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -24,6 +24,7 @@ #include "usnic_vnic.h" #include "usnic_fwd.h" #include "usnic_uiom.h" +#include "usnic_debugfs.h" #include "usnic_ib_qp_grp.h" #include "usnic_ib_sysfs.h" #include "usnic_transport.h" @@ -340,8 +341,10 @@ create_and_add_flow(struct usnic_ib_qp_grp *qp_grp, return ERR_PTR(-EINVAL); } - if (!IS_ERR_OR_NULL(qp_flow)) + if (!IS_ERR_OR_NULL(qp_flow)) { list_add_tail(&qp_flow->link, &qp_grp->flows_lst); + usnic_debugfs_flow_add(qp_flow); + } return qp_flow; @@ -349,6 +352,7 @@ create_and_add_flow(struct usnic_ib_qp_grp *qp_grp, static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow) { + usnic_debugfs_flow_remove(qp_flow); list_del(&qp_flow->link); switch (qp_flow->trans_type) { @@ -728,9 +732,9 @@ void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) WARN_ON(qp_grp->state != IB_QPS_RESET); lockdep_assert_held(&qp_grp->vf->lock); + release_and_remove_all_flows(qp_grp); usnic_ib_sysfs_qpn_remove(qp_grp); qp_grp_and_vf_unbind(qp_grp); - release_and_remove_all_flows(qp_grp); free_qp_grp_res(qp_grp->res_chunk_list); kfree(qp_grp); } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h index a8ba1b9224d8..b0aafe8db0c3 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h @@ -19,6 +19,7 @@ #ifndef USNIC_IB_QP_GRP_H_ #define USNIC_IB_QP_GRP_H_ +#include #include #include "usnic_ib.h" @@ -62,6 +63,10 @@ struct usnic_ib_qp_grp_flow { }; struct usnic_ib_qp_grp *qp_grp; struct list_head link; + + /* Debug FS */ + struct dentry *dbgfs_dentry; + char dentry_name[32]; }; static const struct diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 3e5884232342..27dc67c1689f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -30,12 +30,6 @@ #include "usnic_ib_verbs.h" #include "usnic_log.h" -#define UPDATE_PTR_LEFT(N, P, L) \ -do { \ - L -= (N); \ - P += (N); \ -} while (0) - static ssize_t usnic_ib_show_fw_ver(struct device *device, struct device_attribute *attr, char *buf) From c30392ab5bb536fef268c22804dafded15170d14 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 13 Jan 2014 13:12:48 -0800 Subject: [PATCH 19/61] IB/usnic: Fix typo "Ignorning" -> "Ignoring" Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index f7be44543c0a..7d2efd4d80d5 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -169,7 +169,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, ib_event.element.port_num = 1; ib_dispatch_event(&ib_event); } else { - usnic_dbg("Ignorning %s on %s\n", + usnic_dbg("Ignoring %s on %s\n", usnic_ib_netdev_event_to_string(event), us_ibdev->ib_dev.name); } @@ -177,7 +177,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, case NETDEV_CHANGEADDR: if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr, sizeof(us_ibdev->ufdev->mac))) { - usnic_dbg("Ignorning addr change on %s\n", + usnic_dbg("Ignoring addr change on %s\n", us_ibdev->ib_dev.name); } else { usnic_info(" %s old mac: %pM new mac: %pM\n", @@ -206,7 +206,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, } break; default: - usnic_dbg("Ignorning event %s on %s", + usnic_dbg("Ignoring event %s on %s", usnic_ib_netdev_event_to_string(event), us_ibdev->ib_dev.name); } @@ -268,7 +268,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev, ib_dispatch_event(&ib_event); break; default: - usnic_info("Ignorning event %s on %s", + usnic_info("Ignoring event %s on %s", usnic_ib_netdev_event_to_string(event), us_ibdev->ib_dev.name); } From 3108bccb3d9afbd32931d775f5dd5ee157eaa5a9 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Fri, 20 Dec 2013 21:41:23 +0000 Subject: [PATCH 20/61] IB/usnic: Append documentation to usnic_transport.h and cleanup Add comment describing usnic_transport_rsrv port and remove extraneous space from usnic_transport.c. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_transport.c | 1 - drivers/infiniband/hw/usnic/usnic_transport.h | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c index 73dffc9c535a..9182cc8f8b93 100644 --- a/drivers/infiniband/hw/usnic/usnic_transport.c +++ b/drivers/infiniband/hw/usnic/usnic_transport.c @@ -184,7 +184,6 @@ int usnic_transport_sock_get_addr(struct socket *sock, int *proto, return 0; } - int usnic_transport_init(void) { roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL); diff --git a/drivers/infiniband/hw/usnic/usnic_transport.h b/drivers/infiniband/hw/usnic/usnic_transport.h index 21371bb86c71..7e5dc6d9f462 100644 --- a/drivers/infiniband/hw/usnic/usnic_transport.h +++ b/drivers/infiniband/hw/usnic/usnic_transport.h @@ -28,6 +28,10 @@ const char *usnic_transport_to_str(enum usnic_transport_type trans_type); */ int usnic_transport_sock_to_str(char *buf, int buf_sz, struct socket *sock); +/* + * Reserve a port. If "port_num" is set, then the function will try + * to reserve that particular port. + */ u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num); void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num); /* From d9fe40916387bab884e458c99399c149b033506c Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:10 +0200 Subject: [PATCH 21/61] IB/mlx5: Remove unused code in mr.c The variable start in struct mlx5_ib_mr is never used. Remove it. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/mr.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4c134d93d4fc..5acef30a4998 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -264,7 +264,6 @@ struct mlx5_ib_mr { enum ib_wc_status status; struct mlx5_ib_dev *dev; struct mlx5_create_mkey_mbox_out out; - unsigned long start; }; struct mlx5_ib_fast_reg_page_list { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 039c3e40fcb4..7c95ca1f0c25 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -146,7 +146,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - mr->start = jiffies; err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), reg_mr_callback, mr, &mr->out); From 24e42754f676d34e5c26d6b7b30f36df8004ec08 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Jan 2014 17:45:11 +0200 Subject: [PATCH 22/61] mlx5_core: Remove dead code Remove leftover of debug code. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 37b6ad1f9a1b..c35c4320225a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -192,10 +192,8 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr) struct fw_page *fp; unsigned n; - if (list_empty(&dev->priv.free_list)) { + if (list_empty(&dev->priv.free_list)) return -ENOMEM; - mlx5_core_warn(dev, "\n"); - } fp = list_entry(dev->priv.free_list.next, struct fw_page, list); n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask)); From c1be5232d21de68f46637e617225b9b7c586451a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:12 +0200 Subject: [PATCH 23/61] IB/mlx5: Fix micro UAR allocator The micro UAR (uuar) allocator had a bug which resulted from the fact that in each UAR we only have two micro UARs avaialable, those at index 0 and 1. This patch defines iterators to aid in traversing the list of available micro UARs when allocating a uuar. In addition, change the logic in create_user_qp() so that if high class allocation fails (high class means lower latency), we revert to medium class and not to the low class. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 13 ++++-- drivers/infiniband/hw/mlx5/qp.c | 77 +++++++++++++++++++++++-------- include/linux/mlx5/device.h | 7 +-- 3 files changed, 70 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 306534109627..9660d093f8cf 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -541,6 +541,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_ib_ucontext *context; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; + int gross_uuars; int num_uars; int uuarn; int err; @@ -559,11 +560,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (req.total_num_uuars == 0) return ERR_PTR(-EINVAL); - req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE); + req.total_num_uuars = ALIGN(req.total_num_uuars, + MLX5_NON_FP_BF_REGS_PER_PAGE); if (req.num_low_latency_uuars > req.total_num_uuars - 1) return ERR_PTR(-EINVAL); - num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE; + num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; + gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp; resp.bf_reg_size = dev->mdev.caps.bf_reg_size; resp.cache_line_size = L1_CACHE_BYTES; @@ -585,7 +588,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, goto out_ctx; } - uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars), + uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars), sizeof(*uuari->bitmap), GFP_KERNEL); if (!uuari->bitmap) { @@ -595,13 +598,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, /* * clear all fast path uuars */ - for (i = 0; i < req.total_num_uuars; i++) { + for (i = 0; i < gross_uuars; i++) { uuarn = i & 3; if (uuarn == 2 || uuarn == 3) set_bit(i, uuari->bitmap); } - uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL); + uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL); if (!uuari->count) { err = -ENOMEM; goto out_bitmap; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7c6b4ba49bec..a056c243ddcd 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -340,14 +340,57 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return 1; } +static int first_med_uuar(void) +{ + return 1; +} + +static int next_uuar(int n) +{ + n++; + + while (((n % 4) & 2)) + n++; + + return n; +} + +static int num_med_uuar(struct mlx5_uuar_info *uuari) +{ + int n; + + n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE - + uuari->num_low_latency_uuars - 1; + + return n >= 0 ? n : 0; +} + +static int max_uuari(struct mlx5_uuar_info *uuari) +{ + return uuari->num_uars * 4; +} + +static int first_hi_uuar(struct mlx5_uuar_info *uuari) +{ + int med; + int i; + int t; + + med = num_med_uuar(uuari); + for (t = 0, i = first_med_uuar();; i = next_uuar(i)) { + t++; + if (t == med) + return next_uuar(i); + } + + return 0; +} + static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) { - int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; - int start_uuar; int i; - start_uuar = nuuars - uuari->num_low_latency_uuars; - for (i = start_uuar; i < nuuars; i++) { + for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) { if (!test_bit(i, uuari->bitmap)) { set_bit(i, uuari->bitmap); uuari->count[i]++; @@ -360,19 +403,10 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari) { - int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; - int minidx = 1; - int uuarn; - int end; + int minidx = first_med_uuar(); int i; - end = nuuars - uuari->num_low_latency_uuars; - - for (i = 1; i < end; i++) { - uuarn = i & 3; - if (uuarn == 2 || uuarn == 3) - continue; - + for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) { if (uuari->count[i] < uuari->count[minidx]) minidx = i; } @@ -510,11 +544,16 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); if (uuarn < 0) { mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + mlx5_ib_dbg(dev, "reverting to medium latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); if (uuarn < 0) { - mlx5_ib_dbg(dev, "uuar allocation failed\n"); - return uuarn; + mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); + mlx5_ib_dbg(dev, "reverting to high latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + if (uuarn < 0) { + mlx5_ib_warn(dev, "uuar allocation failed\n"); + return uuarn; + } } } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index da78875807fc..2c1c62e339ca 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -104,9 +104,10 @@ enum { }; enum { - MLX5_BF_REGS_PER_PAGE = 4, - MLX5_MAX_UAR_PAGES = 1 << 8, - MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE, + MLX5_BF_REGS_PER_PAGE = 4, + MLX5_MAX_UAR_PAGES = 1 << 8, + MLX5_NON_FP_BF_REGS_PER_PAGE = 2, + MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE, }; enum { From 90f1d1b41b70474bf73d07d4300196901cd81718 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 7 Nov 2013 15:25:12 +0200 Subject: [PATCH 24/61] IB/core: Add flow steering support for IPoIB UD traffic When creating an IPoIB UD QP, provide a hint to the low level driver that the QP should support flow-steering. This means that privileged user space applications can steer TCP/IP IPoIB traffic from the network stack, in a similar manner done with Ethernet RAW_PACKET QPs. The hint is provided through new QP creation flag called NETIF_QP. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 3 +++ include/rdma/ib_verbs.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 049a997caff3..c56d5d44c53b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -192,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) + init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; + if (dev->features & NETIF_F_SG) init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 61e1935c91b1..d4d16394455f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -633,6 +633,7 @@ enum ib_qp_type { enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + IB_QP_CREATE_NETIF_QP = 1 << 5, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, From 240ae00e4d834e387f4f09e236130f520e357a70 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 7 Nov 2013 15:25:13 +0200 Subject: [PATCH 25/61] IB/core: Add support for IB L2 device-managed steering This patch adds preliminary support for IB L2 device-managed steering, currently exposed only in the kernel. This flow spec can be used by low-level drivers that need to indicate the link layer type when creating device-managed flow rules. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d4d16394455f..aa24760f4bc2 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1100,13 +1100,14 @@ enum ib_flow_attr_type { enum ib_flow_spec_type { /* L2 headers*/ IB_FLOW_SPEC_ETH = 0x20, + IB_FLOW_SPEC_IB = 0x22, /* L3 header*/ IB_FLOW_SPEC_IPV4 = 0x30, /* L4 headers*/ IB_FLOW_SPEC_TCP = 0x40, IB_FLOW_SPEC_UDP = 0x41 }; - +#define IB_FLOW_SPEC_LAYER_MASK 0xF0 #define IB_FLOW_SPEC_SUPPORT_LAYERS 4 /* Flow steering rule priority is set according to it's domain. @@ -1134,6 +1135,18 @@ struct ib_flow_spec_eth { struct ib_flow_eth_filter mask; }; +struct ib_flow_ib_filter { + __be16 dlid; + __u8 sl; +}; + +struct ib_flow_spec_ib { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_ib_filter val; + struct ib_flow_ib_filter mask; +}; + struct ib_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; @@ -1164,6 +1177,7 @@ union ib_flow_spec { u16 size; }; struct ib_flow_spec_eth eth; + struct ib_flow_spec_ib ib; struct ib_flow_spec_ipv4 ipv4; struct ib_flow_spec_tcp_udp tcp_udp; }; From 4de6580360867d44adecb2d05febed1c8d186c82 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 7 Nov 2013 15:25:14 +0200 Subject: [PATCH 26/61] mlx4_core: Add support for steerable IB UD QPs This patch adds support for allocating IB UD QPs that we can steer traffic from. We introduce a new firmware command FLOW_STEERING_IB_UC_QP_RANGE and a capability bit. This command isn't supported for VFs. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++++++++ drivers/net/ethernet/mellanox/mlx4/fw.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx4/mcg.c | 17 +++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 5 +++++ .../ethernet/mellanox/mlx4/resource_tracker.c | 10 ++++++++++ include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 6 +++++- 7 files changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 1e9970d2f0f3..0d02fba94536 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1371,6 +1371,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper }, + { + .opcode = MLX4_FLOW_STEERING_IB_UC_QP_RANGE, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper + }, }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 194928214606..4bd2d80d065e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -513,6 +513,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68 #define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70 +#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 @@ -603,6 +604,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); dev_cap->fs_max_num_qp_per_entry = field; MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); @@ -860,6 +864,12 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); } + + /* turn off ipoib managed steering for guests */ + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + field &= ~0x80; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index acf9d5f1f922..34dffcf61bff 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -895,6 +895,23 @@ int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id) } EXPORT_SYMBOL_GPL(mlx4_flow_detach); +int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, + u32 max_range_qpn) +{ + int err; + u64 in_param; + + in_param = ((u64) min_range_qpn) << 32; + in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF; + + err = mlx4_cmd(dev, in_param, 0, 0, + MLX4_FLOW_STEERING_IB_UC_QP_RANGE, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE); + int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot, enum mlx4_steer_type steer) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e582a41a802b..59f67f9086dc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1236,6 +1236,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_get_mgm_entry_size(struct mlx4_dev *dev); int mlx4_get_qp_per_mgm(struct mlx4_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 2f3f2bc7f283..663510325c22 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3844,6 +3844,16 @@ int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave, return err; } +int mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + return -EPERM; +} + + static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp) { struct res_gid *rgid; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 8df61bc5da00..ff36620f88a7 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -157,6 +157,7 @@ enum { /* register/delete flow steering network rules */ MLX4_QP_FLOW_STEERING_ATTACH = 0x65, MLX4_QP_FLOW_STEERING_DETACH = 0x66, + MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64, }; enum { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7d3a523160ba..7de9fde3a9dd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -160,7 +160,8 @@ enum { MLX4_DEV_CAP_FLAG2_TS = 1LL << 5, MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6, MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7, - MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8 + MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8, + MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9 }; enum { @@ -1144,6 +1145,9 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); +int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, + u32 max_range_qpn); + cycle_t mlx4_read_clock(struct mlx4_dev *dev); #endif /* MLX4_DEVICE_H */ From 0a9b7d59d5a8e2b97406a29a8a807bbc5ce7092e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 7 Nov 2013 15:25:15 +0200 Subject: [PATCH 27/61] IB/mlx4: Enable device-managed steering support for IB ports too Up until now, flow steering wasn't supported when using IB ports. This patch enables support for flow steering if all hardware ports support that, for example the new MLX4_DEV_CAP_FLAG2_DMFS_IPOIB mlx4 device capability. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 29 +++++++++++++++++----------- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1958c5ca792a..e8d0c5592fd3 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -92,21 +92,27 @@ static union ib_gid zgid; static int check_flow_steering_support(struct mlx4_dev *dev) { + int eth_num_ports = 0; int ib_num_ports = 0; - int i; - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) - ib_num_ports++; + int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED; - if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { - if (ib_num_ports || mlx4_is_mfunc(dev)) { - pr_warn("Device managed flow steering is unavailable " - "for IB ports or in multifunction env.\n"); - return 0; + if (dmfs) { + int i; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + eth_num_ports++; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + ib_num_ports++; + dmfs &= (!ib_num_ports || + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) && + (!eth_num_ports || + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)); + if (ib_num_ports && mlx4_is_mfunc(dev)) { + pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n"); + dmfs = 0; } - return 1; } - return 0; + return dmfs; } static int mlx4_ib_query_device(struct ib_device *ibdev, @@ -165,7 +171,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; else props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; - if (check_flow_steering_support(dev->dev)) + if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; } @@ -1682,6 +1688,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) } if (check_flow_steering_support(dev)) { + ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; ibdev->ib_dev.create_flow = mlx4_ib_create_flow; ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 036b663dd26e..e9fb39603ee9 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -494,6 +494,7 @@ struct mlx4_ib_dev { struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS]; struct pkey_mgt pkeys; + int steering_support; }; struct ib_event_work { From a37a1a428431d3e7e9f53530b5c56ff7867bd487 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 7 Nov 2013 15:25:16 +0200 Subject: [PATCH 28/61] IB/mlx4: Add mechanism to support flow steering over IB links The mlx4 device requires adding IB flow spec to rules that apply over infiniband link layer. This patch adds a mechanism to add such a rule. If higher levels e.g. IP/UDP/TCP flow specs are provided, the device requires us to add an empty wild-carded IB rule. Furthermore, the device requires the QPN to be put in the rule. Add here specific parsing support for IB empty rules and the ability to self-generate missing specs based on existing ones. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 135 +++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e8d0c5592fd3..6b7f227ca9e4 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -55,6 +55,7 @@ #define DRV_RELDATE "April 4, 2008" #define MLX4_IB_FLOW_MAX_PRIO 0xFFF +#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); @@ -825,6 +826,7 @@ struct mlx4_ib_steering { }; static int parse_flow_attr(struct mlx4_dev *dev, + u32 qp_num, union ib_flow_spec *ib_spec, struct _rule_hw *mlx4_spec) { @@ -840,6 +842,14 @@ static int parse_flow_attr(struct mlx4_dev *dev, mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag; mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag; break; + case IB_FLOW_SPEC_IB: + type = MLX4_NET_TRANS_RULE_ID_IB; + mlx4_spec->ib.l3_qpn = + cpu_to_be32(qp_num); + mlx4_spec->ib.qpn_mask = + cpu_to_be32(MLX4_IB_FLOW_QPN_MASK); + break; + case IB_FLOW_SPEC_IPV4: type = MLX4_NET_TRANS_RULE_ID_IPV4; @@ -871,6 +881,115 @@ static int parse_flow_attr(struct mlx4_dev *dev, return mlx4_hw_rule_sz(dev, type); } +struct default_rules { + __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS]; + __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS]; + __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS]; + __u8 link_layer; +}; +static const struct default_rules default_table[] = { + { + .mandatory_fields = {IB_FLOW_SPEC_IPV4}, + .mandatory_not_fields = {IB_FLOW_SPEC_ETH}, + .rules_create_list = {IB_FLOW_SPEC_IB}, + .link_layer = IB_LINK_LAYER_INFINIBAND + } +}; + +static int __mlx4_ib_default_rules_match(struct ib_qp *qp, + struct ib_flow_attr *flow_attr) +{ + int i, j, k; + void *ib_flow; + const struct default_rules *pdefault_rules = default_table; + u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port); + + for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++, + pdefault_rules++) { + __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS]; + memset(&field_types, 0, sizeof(field_types)); + + if (link_layer != pdefault_rules->link_layer) + continue; + + ib_flow = flow_attr + 1; + /* we assume the specs are sorted */ + for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS && + j < flow_attr->num_of_specs; k++) { + union ib_flow_spec *current_flow = + (union ib_flow_spec *)ib_flow; + + /* same layer but different type */ + if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) == + (pdefault_rules->mandatory_fields[k] & + IB_FLOW_SPEC_LAYER_MASK)) && + (current_flow->type != + pdefault_rules->mandatory_fields[k])) + goto out; + + /* same layer, try match next one */ + if (current_flow->type == + pdefault_rules->mandatory_fields[k]) { + j++; + ib_flow += + ((union ib_flow_spec *)ib_flow)->size; + } + } + + ib_flow = flow_attr + 1; + for (j = 0; j < flow_attr->num_of_specs; + j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size) + for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++) + /* same layer and same type */ + if (((union ib_flow_spec *)ib_flow)->type == + pdefault_rules->mandatory_not_fields[k]) + goto out; + + return i; + } +out: + return -1; +} + +static int __mlx4_ib_create_default_rules( + struct mlx4_ib_dev *mdev, + struct ib_qp *qp, + const struct default_rules *pdefault_rules, + struct _rule_hw *mlx4_spec) { + int size = 0; + int i; + + for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/ + sizeof(pdefault_rules->rules_create_list[0]); i++) { + int ret; + union ib_flow_spec ib_spec; + switch (pdefault_rules->rules_create_list[i]) { + case 0: + /* no rule */ + continue; + case IB_FLOW_SPEC_IB: + ib_spec.type = IB_FLOW_SPEC_IB; + ib_spec.size = sizeof(struct ib_flow_spec_ib); + + break; + default: + /* invalid rule */ + return -EINVAL; + } + /* We must put empty rule, qpn is being ignored */ + ret = parse_flow_attr(mdev->dev, 0, &ib_spec, + mlx4_spec); + if (ret < 0) { + pr_info("invalid parsing\n"); + return -EINVAL; + } + + mlx4_spec = (void *)mlx4_spec + ret; + size += ret; + } + return size; +} + static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain, enum mlx4_net_trans_promisc_mode flow_type, @@ -882,6 +1001,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att struct mlx4_ib_dev *mdev = to_mdev(qp->device); struct mlx4_cmd_mailbox *mailbox; struct mlx4_net_trans_rule_hw_ctrl *ctrl; + int default_flow; static const u16 __mlx4_domain[] = { [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS, @@ -916,8 +1036,21 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att ib_flow = flow_attr + 1; size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); + /* Add default flows */ + default_flow = __mlx4_ib_default_rules_match(qp, flow_attr); + if (default_flow >= 0) { + ret = __mlx4_ib_create_default_rules( + mdev, qp, default_table + default_flow, + mailbox->buf + size); + if (ret < 0) { + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return -EINVAL; + } + size += ret; + } for (i = 0; i < flow_attr->num_of_specs; i++) { - ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size); + ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow, + mailbox->buf + size); if (ret < 0) { mlx4_free_cmd_mailbox(mdev->dev, mailbox); return -EINVAL; From c1c98501121eefa0888a42566ec7233a1626f678 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 7 Nov 2013 15:25:17 +0200 Subject: [PATCH 29/61] IB/mlx4: Add support for steerable IB UD QPs This patch adds support for steerable (NETIF) QP creation. When we create the device, we allocate a range of steerable QPs. Afterward when a QP is created with the NETIF flag, it's allocated from this range. Allocation is managed by bitmap allocator. Internal steering rules for those QPs is automatically generated on their creation. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 106 ++++++++++++++++++++++++++- drivers/infiniband/hw/mlx4/mlx4_ib.h | 11 +++ drivers/infiniband/hw/mlx4/qp.c | 53 ++++++++++++-- 3 files changed, 163 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 6b7f227ca9e4..ea5844e89b2a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1850,8 +1850,35 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) spin_lock_init(&ibdev->sm_lock); mutex_init(&ibdev->cap_mask_mutex); + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { + ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; + err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, + MLX4_IB_UC_STEER_QPN_ALIGN, + &ibdev->steer_qpn_base); + if (err) + goto err_counter; + + ibdev->ib_uc_qpns_bitmap = + kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) * + sizeof(long), + GFP_KERNEL); + if (!ibdev->ib_uc_qpns_bitmap) { + dev_err(&dev->pdev->dev, "bit map alloc failed\n"); + goto err_steer_qp_release; + } + + bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); + + err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( + dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_base + + ibdev->steer_qpn_count - 1); + if (err) + goto err_steer_free_bitmap; + } + if (ib_register_device(&ibdev->ib_dev, NULL)) - goto err_counter; + goto err_steer_free_bitmap; if (mlx4_ib_mad_init(ibdev)) goto err_reg; @@ -1902,6 +1929,13 @@ err_mad: err_reg: ib_unregister_device(&ibdev->ib_dev); +err_steer_free_bitmap: + kfree(ibdev->ib_uc_qpns_bitmap); + +err_steer_qp_release: + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_count); err_counter: for (; i; --i) if (ibdev->counters[i - 1] != -1) @@ -1922,6 +1956,69 @@ err_dealloc: return NULL; } +int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn) +{ + int offset; + + WARN_ON(!dev->ib_uc_qpns_bitmap); + + offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap, + dev->steer_qpn_count, + get_count_order(count)); + if (offset < 0) + return offset; + + *qpn = dev->steer_qpn_base + offset; + return 0; +} + +void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count) +{ + if (!qpn || + dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED) + return; + + BUG_ON(qpn < dev->steer_qpn_base); + + bitmap_release_region(dev->ib_uc_qpns_bitmap, + qpn - dev->steer_qpn_base, + get_count_order(count)); +} + +int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + int is_attach) +{ + int err; + size_t flow_size; + struct ib_flow_attr *flow = NULL; + struct ib_flow_spec_ib *ib_spec; + + if (is_attach) { + flow_size = sizeof(struct ib_flow_attr) + + sizeof(struct ib_flow_spec_ib); + flow = kzalloc(flow_size, GFP_KERNEL); + if (!flow) + return -ENOMEM; + flow->port = mqp->port; + flow->num_of_specs = 1; + flow->size = flow_size; + ib_spec = (struct ib_flow_spec_ib *)(flow + 1); + ib_spec->type = IB_FLOW_SPEC_IB; + ib_spec->size = sizeof(struct ib_flow_spec_ib); + /* Add an empty rule for IB L2 */ + memset(&ib_spec->mask, 0, sizeof(ib_spec->mask)); + + err = __mlx4_ib_create_flow(&mqp->ibqp, flow, + IB_FLOW_DOMAIN_NIC, + MLX4_FS_REGULAR, + &mqp->reg_id); + } else { + err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); + } + kfree(flow); + return err; +} + static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) { struct mlx4_ib_dev *ibdev = ibdev_ptr; @@ -1935,6 +2032,13 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } + + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_count); + kfree(ibdev->ib_uc_qpns_bitmap); + } + iounmap(ibdev->uar_map); for (p = 0; p < ibdev->num_ports; ++p) if (ibdev->counters[p] != -1) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e9fb39603ee9..837f9aa3d2a2 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -68,6 +68,8 @@ enum { /*module param to indicate if SM assigns the alias_GUID*/ extern int mlx4_ib_sm_guid_assign; +#define MLX4_IB_UC_STEER_QPN_ALIGN 1 +#define MLX4_IB_UC_MAX_NUM_QPS 256 struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; @@ -153,6 +155,7 @@ struct mlx4_ib_wq { enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, + MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, MLX4_IB_SRIOV_SQP = 1 << 31, }; @@ -270,6 +273,7 @@ struct mlx4_ib_qp { struct list_head gid_list; struct list_head steering_rules; struct mlx4_ib_buf *sqp_proxy_rcv; + u64 reg_id; }; @@ -494,6 +498,9 @@ struct mlx4_ib_dev { struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS]; struct pkey_mgt pkeys; + unsigned long *ib_uc_qpns_bitmap; + int steer_qpn_count; + int steer_qpn_base; int steering_support; }; @@ -753,5 +760,9 @@ void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device); __be64 mlx4_ib_gen_node_guid(void); +int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); +void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); +int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + int is_attach); #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 4f10af2905b5..387fbf274151 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -716,6 +716,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; + if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { + if (dev->steering_support == + MLX4_STEERING_MODE_DEVICE_MANAGED) + qp->flags |= MLX4_IB_QP_NETIF; + else + goto err; + } + err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); if (err) goto err; @@ -765,7 +773,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->qp_type == IB_QPT_RAW_PACKET) err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn); else - err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); + if (qp->flags & MLX4_IB_QP_NETIF) + err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn); + else + err = mlx4_qp_reserve_range(dev->dev, 1, 1, + &qpn); if (err) goto err_proxy; } @@ -790,8 +802,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, return 0; err_qpn: - if (!sqpn) - mlx4_qp_release_range(dev->dev, qpn, 1); + if (!sqpn) { + if (qp->flags & MLX4_IB_QP_NETIF) + mlx4_ib_steer_qp_free(dev, qpn, 1); + else + mlx4_qp_release_range(dev->dev, qpn, 1); + } err_proxy: if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) free_proxy_bufs(pd->device, qp); @@ -932,8 +948,12 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_qp_free(dev->dev, &qp->mqp); - if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) - mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); + if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) { + if (qp->flags & MLX4_IB_QP_NETIF) + mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1); + else + mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); + } mlx4_mtt_cleanup(dev->dev, &qp->mtt); @@ -987,9 +1007,16 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, */ if (init_attr->create_flags & ~(MLX4_IB_QP_LSO | MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | - MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP)) + MLX4_IB_SRIOV_TUNNEL_QP | + MLX4_IB_SRIOV_SQP | + MLX4_IB_QP_NETIF)) return ERR_PTR(-EINVAL); + if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { + if (init_attr->qp_type != IB_QPT_UD) + return ERR_PTR(-EINVAL); + } + if (init_attr->create_flags && (udata || ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) && @@ -1235,6 +1262,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, struct mlx4_qp_context *context; enum mlx4_qp_optpar optpar = 0; int sqd_event; + int steer_qp = 0; int err = -EINVAL; context = kzalloc(sizeof *context, GFP_KERNEL); @@ -1319,6 +1347,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; } else context->pri_path.counter_index = 0xff; + + if (qp->flags & MLX4_IB_QP_NETIF) { + mlx4_ib_steer_qp_reg(dev, qp, 1); + steer_qp = 1; + } } if (attr_mask & IB_QP_PKEY_INDEX) { @@ -1547,9 +1580,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, qp->sq_next_wqe = 0; if (qp->rq.wqe_cnt) *qp->db.db = 0; + + if (qp->flags & MLX4_IB_QP_NETIF) + mlx4_ib_steer_qp_reg(dev, qp, 0); } out: + if (err && steer_qp) + mlx4_ib_steer_qp_reg(dev, qp, 0); kfree(context); return err; } @@ -2762,6 +2800,9 @@ done: if (qp->flags & MLX4_IB_QP_LSO) qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + if (qp->flags & MLX4_IB_QP_NETIF) + qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP; + qp_init_attr->sq_sig_type = qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; From 4942c0b4b64478ff45c3bbf4d40aebd66de0bcc5 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Wed, 15 Jan 2014 17:02:20 -0800 Subject: [PATCH 30/61] IB/usnic: Fix endianness-related warnings Fix sparse endianness related warnings. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_common_util.h | 2 +- drivers/infiniband/hw/usnic/usnic_transport.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h index afd8bfa379ed..9d737ed5e55d 100644 --- a/drivers/infiniband/hw/usnic/usnic_common_util.h +++ b/drivers/infiniband/hw/usnic/usnic_common_util.h @@ -36,7 +36,7 @@ usnic_mac_to_gid(const char *const mac, char *raw_gid) } static inline void -usnic_mac_ip_to_gid(const char *const mac, const uint32_t inaddr, char *raw_gid) +usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid) { raw_gid[0] = 0xfe; raw_gid[1] = 0x80; diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c index 9182cc8f8b93..ddef6f77a78c 100644 --- a/drivers/infiniband/hw/usnic/usnic_transport.c +++ b/drivers/infiniband/hw/usnic/usnic_transport.c @@ -59,8 +59,7 @@ int usnic_transport_sock_to_str(char *buf, int buf_sz, if (err) return 0; - addr = htonl(addr); - return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4 Port:%hu", + return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4h Port:%hu", proto, &addr, port); } From 2d97436f5b06217beb6c91a0cd0ae0f0d79b61cc Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Wed, 15 Jan 2014 17:02:21 -0800 Subject: [PATCH 31/61] IB/usnic: Add dependency on CONFIG_INET usNIC needs inet notifiers to function correctly, so add a Kconfig dependency on CONFIG_INET. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig index 2cc8ba00b34b..29ab11c34f3f 100644 --- a/drivers/infiniband/hw/usnic/Kconfig +++ b/drivers/infiniband/hw/usnic/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_USNIC tristate "Verbs support for Cisco VIC" - depends on NETDEVICES && ETHERNET && PCI && INTEL_IOMMU + depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU select ENIC select NET_VENDOR_CISCO select PCI_IOV From 5db5765e255de4072eb0e35facfeafce53af001b Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Wed, 15 Jan 2014 17:02:36 -0800 Subject: [PATCH 32/61] IB/core: Add support for RDMA_NODE_USNIC_UDP Add the complementary RDMA_NODE_USNIC_UDP for RDMA_TRANSPORT_USNIC_UDP. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/core/sysfs.c | 1 + drivers/infiniband/core/verbs.c | 2 ++ include/rdma/ib_verbs.h | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index faad2caf22b1..7d3292c7b4b4 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -613,6 +613,7 @@ static ssize_t show_node_type(struct device *device, case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); + case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); default: return sprintf(buf, "%d: \n", dev->node_type); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index fae0b08248e4..88590e3da8c0 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -115,6 +115,8 @@ rdma_node_get_transport(enum rdma_node_type node_type) case RDMA_NODE_RNIC: return RDMA_TRANSPORT_IWARP; case RDMA_NODE_USNIC: + return RDMA_TRANSPORT_USNIC; + case RDMA_NODE_USNIC_UDP: return RDMA_TRANSPORT_USNIC_UDP; default: BUG(); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b19aa7285ea3..688511ed52f5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -69,6 +69,7 @@ enum rdma_node_type { RDMA_NODE_IB_ROUTER, RDMA_NODE_RNIC, RDMA_NODE_USNIC, + RDMA_NODE_USNIC_UDP, }; enum rdma_transport_type { From 61f78268936e781a104b4ac06b7e47d760800c40 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Wed, 15 Jan 2014 17:02:37 -0800 Subject: [PATCH 33/61] IB/usnic: Advertise usNIC devices as RDMA_NODE_USNIC_UDP usNIC default transport is UDP. Hence, advertise RDMA_NODE_USNIC_UDP by default for usNIC devices. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 7d2efd4d80d5..fb6d026f92cd 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -332,7 +332,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->pdev = dev; us_ibdev->netdev = pci_get_drvdata(dev); us_ibdev->ib_dev.owner = THIS_MODULE; - us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC; + us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP; us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT; us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; us_ibdev->ib_dev.dma_device = &dev->dev; From 6dcebe614c667fca73aaf0cfbd1e70bc9179538e Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Wed, 15 Jan 2014 17:02:43 -0800 Subject: [PATCH 34/61] IB/usnic: Set userspace/kernel ABI ver to 4 usNIC userspace/kernel ABI should be set to 4 instead of 3. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h index 014bcfcabc0f..04a66229584e 100644 --- a/drivers/infiniband/hw/usnic/usnic_abi.h +++ b/drivers/infiniband/hw/usnic/usnic_abi.h @@ -21,7 +21,7 @@ #define USNIC_ABI_H /* ABI between userspace and kernel */ -#define USNIC_UVERBS_ABI_VERSION 3 +#define USNIC_UVERBS_ABI_VERSION 4 #define USNIC_QP_GRP_MAX_WQS 8 #define USNIC_QP_GRP_MAX_RQS 8 From d1db47c5eed89a1c8b60e780aeadd870bb0d3894 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 15 Jan 2014 13:52:55 +0800 Subject: [PATCH 35/61] IB/usnic: Remove unused variable in usnic_debugfs_exit() The variable qp_grp is initialized but never used otherwise, so remove the unused variable. Signed-off-by: Wei Yongjun Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_debugfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c index 6cb2e7ca7df2..5d13860161a4 100644 --- a/drivers/infiniband/hw/usnic/usnic_debugfs.c +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c @@ -131,13 +131,9 @@ void usnic_debugfs_exit(void) void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow) { - struct usnic_ib_qp_grp *qp_grp; - if (IS_ERR_OR_NULL(flows_dentry)) return; - qp_grp = qp_flow->qp_grp; - scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name), "%u", qp_flow->flow->flow_id); qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name, From af2e2e35a23e4aeecfe4332a7140c81e0f09b7e3 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 29 Dec 2013 23:47:20 +0100 Subject: [PATCH 36/61] IB/mlx4: Fix error return code Set the return variable to an error code as done elsewhere in the function. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/sysfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 97516eb363b7..db2ea31df832 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -582,8 +582,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, store_port_pkey, dev->dev->caps.pkey_table_len[port_num]); - if (!p->pkey_group.attrs) + if (!p->pkey_group.attrs) { + ret = -ENOMEM; goto err_alloc; + } ret = sysfs_create_group(&p->kobj, &p->pkey_group); if (ret) @@ -591,8 +593,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) p->gid_group.name = "gid_idx"; p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1); - if (!p->gid_group.attrs) + if (!p->gid_group.attrs) { + ret = -ENOMEM; goto err_free_pkey; + } ret = sysfs_create_group(&p->kobj, &p->gid_group); if (ret) From e6b45d49b41110e0d795d17666c5850240829e0f Mon Sep 17 00:00:00 2001 From: David Dillow Date: Fri, 3 Jan 2014 16:16:10 -0500 Subject: [PATCH 37/61] MAINTAINERS: Pass the torch of SRP submaintainership Today was my last day at ORNL, and my future endeavors will leave even less time to maintain the SRP initiator. My thanks especially go to Bart, for keeping the pressure to improve alive, and for driving so many of those improvements. [ Add Bart as new submaintainer. - Roland ] Signed-off-by: Roland Dreier --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 31a046213274..dc338d830136 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7468,7 +7468,7 @@ S: Maintained F: drivers/scsi/sr* SCSI RDMA PROTOCOL (SRP) INITIATOR -M: David Dillow +M: Bart Van Assche L: linux-rdma@vger.kernel.org S: Supported W: http://www.openfabrics.org From 18cc4e02508e3f1fadf81f697837567431ee5a9c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Dec 2013 17:05:22 +0100 Subject: [PATCH 38/61] scsi_transport_srp: Block rport upon TL error even with fast_io_fail_tmo = off The current behavior of the SRP transport layer when a transport layer error is encountered is to block SCSI command processing only if fast_io_fail_tmo != off. The current behavior of the FC transport layer when a transport layer error is encountered is to block SCSI command processing no matter which value fast_io_fail_tmo has been set to. Make the behavior of the SRP transport layer consistent with that of the FC transport layer to avoid confusion. Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/scsi/scsi_transport_srp.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 2700a5a09bd4..8b9cb22be963 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -67,7 +67,8 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r) * * The combination of the timeout parameters must be such that SCSI commands * are finished in a reasonable time. Hence do not allow the fast I/O fail - * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT. Furthermore, these + * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT nor allow dev_loss_tmo to + * exceed that limit if failing I/O fast has been disabled. Furthermore, these * parameters must be such that multipath can detect failed paths timely. * Hence do not allow all three parameters to be disabled simultaneously. */ @@ -79,6 +80,9 @@ int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, int dev_loss_tmo) return -EINVAL; if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT) return -EINVAL; + if (fast_io_fail_tmo < 0 && + dev_loss_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT) + return -EINVAL; if (dev_loss_tmo >= LONG_MAX / HZ) return -EINVAL; if (fast_io_fail_tmo >= 0 && dev_loss_tmo >= 0 && @@ -463,20 +467,20 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport) queue_delayed_work(system_long_wq, &rport->reconnect_work, 1UL * delay * HZ); - if (fast_io_fail_tmo >= 0 && - srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { + if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev), rport->state); scsi_target_block(&shost->shost_gendev); - queue_delayed_work(system_long_wq, - &rport->fast_io_fail_work, - 1UL * fast_io_fail_tmo * HZ); + if (fast_io_fail_tmo >= 0) + queue_delayed_work(system_long_wq, + &rport->fast_io_fail_work, + 1UL * fast_io_fail_tmo * HZ); + if (dev_loss_tmo >= 0) + queue_delayed_work(system_long_wq, + &rport->dev_loss_work, + 1UL * dev_loss_tmo * HZ); } - if (dev_loss_tmo >= 0) - queue_delayed_work(system_long_wq, - &rport->dev_loss_work, - 1UL * dev_loss_tmo * HZ); } else { pr_debug("%s has already been deleted\n", dev_name(&shost->shost_gendev)); @@ -578,9 +582,9 @@ int srp_reconnect_rport(struct srp_rport *rport) spin_unlock_irq(shost->host_lock); } else if (rport->state == SRP_RPORT_RUNNING) { /* - * srp_reconnect_rport() was invoked with fast_io_fail - * off. Mark the port as failed and start the TL failure - * timers if these had not yet been started. + * srp_reconnect_rport() has been invoked with fast_io_fail + * and dev_loss off. Mark the port as failed and start the TL + * failure timers if these had not yet been started. */ __rport_fail_io_fast(rport); scsi_target_unblock(&shost->shost_gendev, From 93079162bf0ed2934c7b0c3ee93ba894df8fb3cd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Dec 2013 17:06:14 +0100 Subject: [PATCH 39/61] scsi_transport_srp: Fix a race condition The rport timers must be stopped before the SRP initiator destroys the resources associated with the SCSI host. This is necessary because otherwise the callback functions invoked from the SRP transport layer could trigger a use-after-free. Stopping the rport timers before invoking scsi_remove_host() can trigger long delays in the SCSI error handler if a transport layer failure occurs while scsi_remove_host() is in progress. Hence move the code for stopping the rport timers from srp_rport_release() into a new function and invoke that function after scsi_remove_host() has finished. This patch fixes the following sporadic kernel crash: kernel BUG at include/asm-generic/dma-mapping-common.h:64! invalid opcode: 0000 [#1] SMP RIP: 0010:[] [] srp_unmap_data+0x121/0x130 [ib_srp] Call Trace: [] srp_free_req+0x3c/0x80 [ib_srp] [] srp_finish_req+0x48/0x70 [ib_srp] [] srp_terminate_io+0x4b/0x60 [ib_srp] [] __rport_fail_io_fast+0x75/0x80 [scsi_transport_srp] [] rport_fast_io_fail_timedout+0x88/0xc0 [scsi_transport_srp] [] worker_thread+0x170/0x2a0 [] kthread+0x96/0xa0 [] child_rip+0xa/0x20 Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 1 + drivers/scsi/scsi_transport_srp.c | 83 +++++++++++++++-------------- include/scsi/scsi_transport_srp.h | 4 +- 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index a88631918e85..529b6bcdca7a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -660,6 +660,7 @@ static void srp_remove_target(struct srp_target_port *target) srp_rport_get(target->rport); srp_remove_host(target->scsi_host); scsi_remove_host(target->scsi_host); + srp_stop_rport_timers(target->rport); srp_disconnect_target(target); ib_destroy_cm_id(target->cm_id); srp_free_target_ib(target); diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 8b9cb22be963..a349d44c4c36 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -456,37 +456,29 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport) lockdep_assert_held(&rport->mutex); - if (!rport->deleted) { - delay = rport->reconnect_delay; - fast_io_fail_tmo = rport->fast_io_fail_tmo; - dev_loss_tmo = rport->dev_loss_tmo; - pr_debug("%s current state: %d\n", - dev_name(&shost->shost_gendev), rport->state); + delay = rport->reconnect_delay; + fast_io_fail_tmo = rport->fast_io_fail_tmo; + dev_loss_tmo = rport->dev_loss_tmo; + pr_debug("%s current state: %d\n", dev_name(&shost->shost_gendev), + rport->state); - if (delay > 0) + if (rport->state == SRP_RPORT_LOST) + return; + if (delay > 0) + queue_delayed_work(system_long_wq, &rport->reconnect_work, + 1UL * delay * HZ); + if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { + pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev), + rport->state); + scsi_target_block(&shost->shost_gendev); + if (fast_io_fail_tmo >= 0) queue_delayed_work(system_long_wq, - &rport->reconnect_work, - 1UL * delay * HZ); - if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { - pr_debug("%s new state: %d\n", - dev_name(&shost->shost_gendev), - rport->state); - scsi_target_block(&shost->shost_gendev); - if (fast_io_fail_tmo >= 0) - queue_delayed_work(system_long_wq, - &rport->fast_io_fail_work, - 1UL * fast_io_fail_tmo * HZ); - if (dev_loss_tmo >= 0) - queue_delayed_work(system_long_wq, - &rport->dev_loss_work, - 1UL * dev_loss_tmo * HZ); - } - } else { - pr_debug("%s has already been deleted\n", - dev_name(&shost->shost_gendev)); - srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST); - scsi_target_unblock(&shost->shost_gendev, - SDEV_TRANSPORT_OFFLINE); + &rport->fast_io_fail_work, + 1UL * fast_io_fail_tmo * HZ); + if (dev_loss_tmo >= 0) + queue_delayed_work(system_long_wq, + &rport->dev_loss_work, + 1UL * dev_loss_tmo * HZ); } } @@ -560,7 +552,7 @@ int srp_reconnect_rport(struct srp_rport *rport) scsi_target_block(&shost->shost_gendev); while (scsi_request_fn_active(shost)) msleep(20); - res = i->f->reconnect(rport); + res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV; pr_debug("%s (state %d): transport.reconnect() returned %d\n", dev_name(&shost->shost_gendev), rport->state, res); if (res == 0) { @@ -626,10 +618,6 @@ static void srp_rport_release(struct device *dev) { struct srp_rport *rport = dev_to_rport(dev); - cancel_delayed_work_sync(&rport->reconnect_work); - cancel_delayed_work_sync(&rport->fast_io_fail_work); - cancel_delayed_work_sync(&rport->dev_loss_work); - put_device(dev->parent); kfree(rport); } @@ -784,12 +772,6 @@ void srp_rport_del(struct srp_rport *rport) device_del(dev); transport_destroy_device(dev); - mutex_lock(&rport->mutex); - if (rport->state == SRP_RPORT_BLOCKED) - __rport_fail_io_fast(rport); - rport->deleted = true; - mutex_unlock(&rport->mutex); - put_device(dev); } EXPORT_SYMBOL_GPL(srp_rport_del); @@ -814,6 +796,27 @@ void srp_remove_host(struct Scsi_Host *shost) } EXPORT_SYMBOL_GPL(srp_remove_host); +/** + * srp_stop_rport_timers - stop the transport layer recovery timers + * + * Must be called after srp_remove_host() and scsi_remove_host(). The caller + * must hold a reference on the rport (rport->dev) and on the SCSI host + * (rport->dev.parent). + */ +void srp_stop_rport_timers(struct srp_rport *rport) +{ + mutex_lock(&rport->mutex); + if (rport->state == SRP_RPORT_BLOCKED) + __rport_fail_io_fast(rport); + srp_rport_set_state(rport, SRP_RPORT_LOST); + mutex_unlock(&rport->mutex); + + cancel_delayed_work_sync(&rport->reconnect_work); + cancel_delayed_work_sync(&rport->fast_io_fail_work); + cancel_delayed_work_sync(&rport->dev_loss_work); +} +EXPORT_SYMBOL_GPL(srp_stop_rport_timers); + static int srp_tsk_mgmt_response(struct Scsi_Host *shost, u64 nexus, u64 tm_id, int result) { diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index 4ebf6913b7b2..f24e763fa430 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -19,7 +19,7 @@ struct srp_rport_identifiers { * @SRP_RPORT_BLOCKED: Transport layer not operational; fast I/O fail timer * is running and I/O has been blocked. * @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast. - * @SRP_RPORT_LOST: Device loss timer has expired; port is being removed. + * @SRP_RPORT_LOST: Port is being removed. */ enum srp_rport_state { SRP_RPORT_RUNNING, @@ -48,7 +48,6 @@ struct srp_rport { struct mutex mutex; enum srp_rport_state state; - bool deleted; int reconnect_delay; int failed_reconnects; struct delayed_work reconnect_work; @@ -101,6 +100,7 @@ extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, extern int srp_reconnect_rport(struct srp_rport *rport); extern void srp_start_tl_fail_timers(struct srp_rport *rport); extern void srp_remove_host(struct Scsi_Host *); +extern void srp_stop_rport_timers(struct srp_rport *rport); /** * srp_chkready() - evaluate the transport layer state before I/O From 31b90347dc9fb7649a8dc3ffefa52771ac30e10b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Dec 2013 17:08:43 +0100 Subject: [PATCH 40/61] scsi_transport_srp: Add rport state diagram Add a diagram in Documentation/scsi/scsi_transport_srp that illustrates the rport state transitions. Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- .../scsi/scsi_transport_srp/Makefile | 7 +++++ .../rport_state_diagram.dot | 26 +++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 Documentation/scsi/scsi_transport_srp/Makefile create mode 100644 Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot diff --git a/Documentation/scsi/scsi_transport_srp/Makefile b/Documentation/scsi/scsi_transport_srp/Makefile new file mode 100644 index 000000000000..5f6b567e955c --- /dev/null +++ b/Documentation/scsi/scsi_transport_srp/Makefile @@ -0,0 +1,7 @@ +all: rport_state_diagram.svg rport_state_diagram.png + +rport_state_diagram.svg: rport_state_diagram.dot + dot -Tsvg -o $@ $< + +rport_state_diagram.png: rport_state_diagram.dot + dot -Tpng -o $@ $< diff --git a/Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot b/Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot new file mode 100644 index 000000000000..75d610d6411a --- /dev/null +++ b/Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot @@ -0,0 +1,26 @@ +digraph srp_initiator { + node [shape = doublecircle]; running lost; + node [shape = circle]; + + { + rank = min; + running_rta [ label = "running;\nreconnect\ntimer\nactive" ]; + }; + running [ label = "running;\nreconnect\ntimer\nstopped" ]; + blocked; + failfast [ label = "fail I/O\nfast" ]; + lost; + + running -> running_rta [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nsrp_start_tl_fail_timers()" ]; + running_rta -> running [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nreconnecting succeeded" ]; + running -> blocked [ label = "fast_io_fail_tmo >= 0 or\ndev_loss_tmo >= 0;\nsrp_start_tl_fail_timers()" ]; + running -> failfast [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nreconnecting failed\n" ]; + blocked -> failfast [ label = "fast_io_fail_tmo\nexpired or\nreconnecting\nfailed" ]; + blocked -> lost [ label = "dev_loss_tmo\nexpired or\nsrp_stop_rport_timers()" ]; + failfast -> lost [ label = "dev_loss_tmo\nexpired or\nsrp_stop_rport_timers()" ]; + blocked -> running [ label = "reconnecting\nsucceeded" ]; + failfast -> failfast [ label = "reconnecting\nfailed" ]; + failfast -> running [ label = "reconnecting\nsucceeded" ]; + running -> lost [ label = "srp_stop_rport_timers()" ]; + running_rta -> lost [ label = "srp_stop_rport_timers()" ]; +} From 0c7f82189d2249153ee302fe4d02de15f165d220 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 13 Jan 2014 12:39:35 +0100 Subject: [PATCH 41/61] scsi_transport_srp: Fix kernel-doc warnings The following command has been used to verify that the kernel-doc tool no longer complains about undocumented fields: scripts/kernel-doc -html drivers/scsi/scsi_transport_srp.c \ include/scsi/scsi_transport_srp.h >srp-transport-doc.html Signed-off-by: Bart Van Assche Acked-by: Sebastian Riemer Acked-by: Randy Dunlap Signed-off-by: Roland Dreier --- drivers/scsi/scsi_transport_srp.c | 12 ++++++++++++ include/scsi/scsi_transport_srp.h | 32 +++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index a349d44c4c36..d47ffc8d3e43 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -64,6 +64,9 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r) /** * srp_tmo_valid() - check timeout combination validity + * @reconnect_delay: Reconnect delay in seconds. + * @fast_io_fail_tmo: Fast I/O fail timeout in seconds. + * @dev_loss_tmo: Device loss timeout in seconds. * * The combination of the timeout parameters must be such that SCSI commands * are finished in a reasonable time. Hence do not allow the fast I/O fail @@ -372,6 +375,7 @@ invalid: /** * srp_reconnect_work() - reconnect and schedule a new attempt if necessary + * @work: Work structure used for scheduling this operation. */ static void srp_reconnect_work(struct work_struct *work) { @@ -412,6 +416,7 @@ static void __rport_fail_io_fast(struct srp_rport *rport) /** * rport_fast_io_fail_timedout() - fast I/O failure timeout handler + * @work: Work structure used for scheduling this operation. */ static void rport_fast_io_fail_timedout(struct work_struct *work) { @@ -430,6 +435,7 @@ static void rport_fast_io_fail_timedout(struct work_struct *work) /** * rport_dev_loss_timedout() - device loss timeout handler + * @work: Work structure used for scheduling this operation. */ static void rport_dev_loss_timedout(struct work_struct *work) { @@ -484,6 +490,7 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport) /** * srp_start_tl_fail_timers() - start the transport layer failure timers + * @rport: SRP target port. * * Start the transport layer fast I/O failure and device loss timers. Do not * modify a timer that was already started. @@ -498,6 +505,7 @@ EXPORT_SYMBOL(srp_start_tl_fail_timers); /** * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() + * @shost: SCSI host for which to count the number of scsi_request_fn() callers. */ static int scsi_request_fn_active(struct Scsi_Host *shost) { @@ -518,6 +526,7 @@ static int scsi_request_fn_active(struct Scsi_Host *shost) /** * srp_reconnect_rport() - reconnect to an SRP target port + * @rport: SRP target port. * * Blocks SCSI command queueing before invoking reconnect() such that * queuecommand() won't be invoked concurrently with reconnect() from outside @@ -595,6 +604,7 @@ EXPORT_SYMBOL(srp_reconnect_rport); /** * srp_timed_out() - SRP transport intercept of the SCSI timeout EH + * @scmd: SCSI command. * * If a timeout occurs while an rport is in the blocked state, ask the SCSI * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core @@ -666,6 +676,7 @@ static int srp_host_match(struct attribute_container *cont, struct device *dev) /** * srp_rport_get() - increment rport reference count + * @rport: SRP target port. */ void srp_rport_get(struct srp_rport *rport) { @@ -675,6 +686,7 @@ EXPORT_SYMBOL(srp_rport_get); /** * srp_rport_put() - decrement rport reference count + * @rport: SRP target port. */ void srp_rport_put(struct srp_rport *rport) { diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index f24e763fa430..b11da5c1331e 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -29,10 +29,26 @@ enum srp_rport_state { }; /** - * struct srp_rport - * @lld_data: LLD private data. - * @mutex: Protects against concurrent rport reconnect / fast_io_fail / - * dev_loss_tmo activity. + * struct srp_rport - SRP initiator or target port + * + * Fields that are relevant for SRP initiator and SRP target drivers: + * @dev: Device associated with this rport. + * @port_id: 16-byte port identifier. + * @roles: Role of this port - initiator or target. + * + * Fields that are only relevant for SRP initiator drivers: + * @lld_data: LLD private data. + * @mutex: Protects against concurrent rport reconnect / + * fast_io_fail / dev_loss_tmo activity. + * @state: rport state. + * @deleted: Whether or not srp_rport_del() has already been invoked. + * @reconnect_delay: Reconnect delay in seconds. + * @failed_reconnects: Number of failed reconnect attempts. + * @reconnect_work: Work structure used for scheduling reconnect attempts. + * @fast_io_fail_tmo: Fast I/O fail timeout in seconds. + * @dev_loss_tmo: Device loss timeout in seconds. + * @fast_io_fail_work: Work structure used for scheduling fast I/O fail work. + * @dev_loss_work: Work structure used for scheduling device loss work. */ struct srp_rport { /* for initiator and target drivers */ @@ -59,6 +75,8 @@ struct srp_rport { /** * struct srp_function_template + * + * Fields that are only relevant for SRP initiator drivers: * @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and * dev_loss_tmo sysfs attribute for an rport. * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command @@ -70,6 +88,11 @@ struct srp_rport { * srp_reconnect_rport(). * @terminate_rport_io: Callback function for terminating all outstanding I/O * requests for an rport. + * @rport_delete: Callback function that deletes an rport. + * + * Fields that are only relevant for SRP target drivers: + * @tsk_mgmt_response: Callback function for sending a task management response. + * @it_nexus_response: Callback function for processing an IT nexus response. */ struct srp_function_template { /* for initiator drivers */ @@ -104,6 +127,7 @@ extern void srp_stop_rport_timers(struct srp_rport *rport); /** * srp_chkready() - evaluate the transport layer state before I/O + * @rport: SRP target port pointer. * * Returns a SCSI result code that can be returned by the LLD queuecommand() * implementation. The role of this function is similar to that of From 8ce96afa8239f13bdf5ab35839bac46c103bbedc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 20 Jan 2014 13:32:49 +0300 Subject: [PATCH 42/61] IB/usnic: Use GFP_ATOMIC under spinlock This is called from qp_grp_and_vf_bind() and we are holding the vf->lock so the allocation can't sleep. Fixes: e3cf00d0a87f ('IB/usnic: Add Cisco VIC low-level hardware driver') Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_uiom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index ae6934c0d05a..16755cdab2c0 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -498,7 +498,7 @@ int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev) struct usnic_uiom_dev *uiom_dev; int err; - uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_KERNEL); + uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_ATOMIC); if (!uiom_dev) return -ENOMEM; uiom_dev->dev = dev; From 05633102d85b50f35325dfbedafcedd6c5b3264c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 3 Nov 2013 10:20:43 +0200 Subject: [PATCH 43/61] IB/core: Fix unused variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the below "make W=1" build warning: drivers/infiniband/core/iwcm.c: In function ‘destroy_cm_id’: drivers/infiniband/core/iwcm.c:330: warning: variable ‘ret’ set but not used Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/iwcm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 0717940ec3b5..3d2e489ab732 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -334,7 +334,6 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; unsigned long flags; - int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* @@ -350,7 +349,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* destroy the listening endpoint */ - ret = cm_id->device->iwcm->destroy_listen(cm_id); + cm_id->device->iwcm->destroy_listen(cm_id); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_ESTABLISHED: From 437708c44395a11e474fb33b4fd7f29483118e51 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 17 Jan 2014 19:47:25 +0100 Subject: [PATCH 44/61] IPoIB: Report operstate consistently when brought up without a link After booting without a working link, "ip link" shows: 5: mlx4_ib1: mtu 2044 qdisc pfifo_fast state DOWN qlen 256 ... 7: mlx4_ib1.8003@mlx4_ib1: mtu 2044 qdisc pfifo_fast state DOWN qlen 256 ... Then after connecting and disconnecting the link, which should result in exactly the same state as before, it shows: 5: mlx4_ib1: mtu 2044 qdisc pfifo_fast state DOWN qlen 256 ... 7: mlx4_ib1.8003@mlx4_ib1: mtu 2044 qdisc pfifo_fast state LOWERLAYERDOWN qlen 256 ... Notice the (now correct) LOWERLAYERDOWN operstate shown for the mlx4_ib1.8003 interface. Ideally the identical state would be shown right after boot. The problem is related to the calling of netif_carrier_off() in network drivers. For a long time it was known that doing netif_carrier_off() before registering the netdevice would result in the interface's operstate being shown as UNKNOWN if the device was brought up without a working link. This problem was fixed in commit 8f4cccbbd92 ('net: Set device operstate at registration time'), but still there remains the minor inconsistency demonstrated above. This patch fixes it by moving ipoib's call to netif_carrier_off() into the .ndo_open method, which is where network drivers ordinarily do it. With the patch when doing the same test as above, the operstate of mlx4_ib1.8003 is shown as LOWERLAYERDOWN right after boot. Signed-off-by: Michal Schmidt Acked-by: Erez Shitrit Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d64ed05fb082..5786a78ff8bc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -104,6 +104,8 @@ int ipoib_open(struct net_device *dev) ipoib_dbg(priv, "bringing up interface\n"); + netif_carrier_off(dev); + set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); if (ipoib_pkey_dev_delay_open(dev)) @@ -1366,8 +1368,6 @@ void ipoib_setup(struct net_device *dev) memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); - netif_carrier_off(dev); - priv->dev = dev; spin_lock_init(&priv->lock); From d9d5713ca628dc211d8b4a1da5fb9e0cfe592b92 Mon Sep 17 00:00:00 2001 From: Svetlana Mavrina Date: Sun, 12 Jan 2014 11:56:09 +0000 Subject: [PATCH 45/61] RDMA/amso1100: Add check if cache memory was allocated before freeing it There is a path in handle_vq() where kmem_cache_free() can be called with pointer to a local variable. It can happen if vq_repbuf_alloc() failed to allocate memory from cache and req is NULL. The patch adds check if cache memory was allocated before freeing it. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Svetlana Mavrina Reviewed-by: Alexey Khoroshilov Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_intr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c index 8951db4ae29d..3a17d9b36dba 100644 --- a/drivers/infiniband/hw/amso1100/c2_intr.c +++ b/drivers/infiniband/hw/amso1100/c2_intr.c @@ -169,7 +169,8 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index) * We should never get here, as the adapter should * never send us a reply that we're not expecting. */ - vq_repbuf_free(c2dev, host_msg); + if (reply_msg != NULL) + vq_repbuf_free(c2dev, host_msg); pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n"); return; } From a384b20e417ae0f5f1f359600b4bdcc34265b256 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 21 Dec 2013 17:25:35 +0800 Subject: [PATCH 46/61] IB/usnic: Remove unused includes of Remove including that don't need it. Signed-off-by: Wei Yongjun Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_ib.h | 1 - drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c | 1 - drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h index 111a86e680d5..e5a9297dd1bd 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib.h +++ b/drivers/infiniband/hw/usnic/usnic_ib.h @@ -21,7 +21,6 @@ #include #include -#include #include diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c index 7e1dafccb11e..d135ad90d914 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c @@ -2,7 +2,6 @@ #include #include #include -#include #include #include "usnic_uiom_interval_tree.h" diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h index 030ba6e68a52..d4f752e258fd 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h @@ -19,7 +19,6 @@ #ifndef USNIC_UIOM_INTERVAL_TREE_H_ #define USNIC_UIOM_INTERVAL_TREE_H_ -#include #include struct usnic_uiom_interval_node { From 298589b1cb626adf4beba6dd8e3cd4b64e8799be Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 9 Jan 2014 11:53:27 +0100 Subject: [PATCH 47/61] RDMA/cxgb4: Fix gcc warning on 32-bit arch Building mem.o for 32 bits x86 triggers a GCC warning: drivers/infiniband/hw/cxgb4/mem.c: In function '_c4iw_write_mem_dma_aligned': drivers/infiniband/hw/cxgb4/mem.c:79:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] Silence that warning by casting "&wr_wait" to unsigned long before casting it to __be64. That's what _c4iw_write_mem_inline() already does. Signed-off-by: Paul Bolle Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 84e45006451c..41b11951a30a 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -76,7 +76,7 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, INIT_ULPTX_WR(req, wr_len, 0, 0); req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | (wait ? FW_WR_COMPL(1) : 0)); - req->wr.wr_lo = wait ? (__force __be64)&wr_wait : 0; + req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L; req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1)); From 6e0ea9e6cbcead7fa8c76e3e3b9de4a50c5131c5 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 18 Dec 2013 08:41:37 -0800 Subject: [PATCH 48/61] IB/qib: Fix QP check when looping back to/from QP1 The GSI QP type is compatible with and should be allowed to send data to/from any UD QP. This was found when testing ibacm on the same node as an SA. Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_ud.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index d6c7fe7f88d5..3ad651c3356c 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -57,13 +57,20 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) struct qib_sge *sge; struct ib_wc wc; u32 length; + enum ib_qp_type sqptype, dqptype; qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); if (!qp) { ibp->n_pkt_drops++; return; } - if (qp->ibqp.qp_type != sqp->ibqp.qp_type || + + sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ? + IB_QPT_UD : sqp->ibqp.qp_type; + dqptype = qp->ibqp.qp_type == IB_QPT_GSI ? + IB_QPT_UD : qp->ibqp.qp_type; + + if (dqptype != sqptype || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { ibp->n_pkt_drops++; goto drop; From 79adc5321e4dfe4df8ad19a805ed81c3b35c16dd Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Wed, 8 Jan 2014 10:53:39 +0800 Subject: [PATCH 49/61] RDMA/nes: Slight optimization of Ethernet address compare Use the possibly more efficient ether_addr_equal() instead of memcmp(). Signed-off-by: Wang Weidong Signed-off-by: Ding Tianhong Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6b29249aa85a..9c9f2f57e960 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1354,8 +1354,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi neigh->ha, ntohl(rt->rt_gateway)); if (arpindex >= 0) { - if (!memcmp(nesadapter->arp_table[arpindex].mac_addr, - neigh->ha, ETH_ALEN)) { + if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) { /* Mac address same as in nes_arp_table */ goto out; } From e08a8761d89b7625144c3fbf0ff9643159135c96 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Tue, 14 Jan 2014 17:45:13 +0200 Subject: [PATCH 50/61] mlx5_core: Fix out arg size in access_register command The output size should be the sum of the core access reg output struct plus the size of the specific register data provided by the caller. Signed-off-by: Haggai Eran Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index f6afe7b5a675..8c9ac870ecb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -57,7 +57,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, in->arg = cpu_to_be32(arg); in->register_id = cpu_to_be16(reg_num); err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out, - sizeof(out) + size_out); + sizeof(*out) + size_out); if (err) goto ex2; From 0b6e81b91070bdbe0defb9101384ebb26835e401 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:14 +0200 Subject: [PATCH 51/61] IB/mlx5: Clear out struct before create QP command Output structs are expected by firmware to be cleared when a command is called. Clear the "out" struct instead of "dout" which is used only later. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 54faf8bfcaf4..e7941843a72d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -74,7 +74,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_destroy_qp_mbox_out dout; int err; - memset(&dout, 0, sizeof(dout)); + memset(&out, 0, sizeof(out)); in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP); err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); From 042b9adae899e1b497282d92205d3fef42d5ca8d Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:15 +0200 Subject: [PATCH 52/61] mlx5_core: Use mlx5 core style warning Use mlx5_core_warn(), which is the standard warning emitter function, instead of pr_warn(). Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index e7941843a72d..510576213dd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -84,7 +84,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, } if (out.hdr.status) { - pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps)); + mlx5_core_warn(dev, "current num of QPs 0x%x\n", + atomic_read(&dev->num_qps)); return mlx5_cmd_status_to_err(&out.hdr); } From ada388f7afad1e2e87acbfe30600fdaff9bd6327 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:16 +0200 Subject: [PATCH 53/61] IB/mlx5: Make sure doorbell record is visible before doorbell Put a wmb() to make sure the doorbell record is visible to the HCA before we hit doorbell. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a056c243ddcd..87b7fb176f22 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2251,6 +2251,10 @@ out: qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell */ + wmb(); + if (bf->need_lock) spin_lock(&bf->lock); From 3bdb31f688276505ede23280885948e934304674 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:17 +0200 Subject: [PATCH 54/61] IB/mlx5: Implement modify CQ Modify CQ is used by ULPs like IPoIB to change moderation parameters. This patch adds support in mlx5. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/cq.c | 26 +++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 17 +++++++++++-- include/linux/mlx5/cq.h | 8 +++--- include/linux/mlx5/device.h | 15 +++++++++++ 4 files changed, 59 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b72627429745..b4c122eab484 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -818,7 +818,31 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { - return -ENOSYS; + struct mlx5_modify_cq_mbox_in *in; + struct mlx5_ib_dev *dev = to_mdev(cq->device); + struct mlx5_ib_cq *mcq = to_mcq(cq); + int err; + u32 fsel; + + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) + return -ENOSYS; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->cqn = cpu_to_be32(mcq->mcq.cqn); + fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); + in->ctx.cq_period = cpu_to_be16(cq_period); + in->ctx.cq_max_count = cpu_to_be16(cq_count); + in->field_select = cpu_to_be32(fsel); + err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in); + kfree(in); + + if (err) + mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); + + return err; } int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index c2d660be6f76..e6fedcf94182 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -201,10 +201,23 @@ EXPORT_SYMBOL(mlx5_core_query_cq); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - int type, struct mlx5_cq_modify_params *params) + struct mlx5_modify_cq_mbox_in *in) { - return -ENOSYS; + struct mlx5_modify_cq_mbox_out out; + int err; + + memset(&out, 0, sizeof(out)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ); + err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return 0; } +EXPORT_SYMBOL(mlx5_core_modify_cq); int mlx5_init_cq_table(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 3db67f73d96d..c3cf5a46abce 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -85,9 +85,9 @@ enum { }; enum { - MLX5_CQ_MODIFY_RESEIZE = 0, - MLX5_CQ_MODIFY_MODER = 1, - MLX5_CQ_MODIFY_MAPPING = 2, + MLX5_CQ_MODIFY_PERIOD = 1 << 0, + MLX5_CQ_MODIFY_COUNT = 1 << 1, + MLX5_CQ_MODIFY_OVERRUN = 1 << 2, }; struct mlx5_cq_modify_params { @@ -158,7 +158,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, struct mlx5_query_cq_mbox_out *out); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - int type, struct mlx5_cq_modify_params *params); + struct mlx5_modify_cq_mbox_in *in); int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 2c1c62e339ca..dbb03caa8aed 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -177,6 +177,7 @@ enum { MLX5_DEV_CAP_FLAG_APM = 1LL << 17, MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, + MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32, MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38, MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, @@ -698,6 +699,19 @@ struct mlx5_query_cq_mbox_out { __be64 pas[0]; }; +struct mlx5_modify_cq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 cqn; + __be32 field_select; + struct mlx5_cq_context ctx; + u8 rsvd[192]; + __be64 pas[0]; +}; + +struct mlx5_modify_cq_mbox_out { + struct mlx5_outbox_hdr hdr; +}; + struct mlx5_enable_hca_mbox_in { struct mlx5_inbox_hdr hdr; u8 rsvd[8]; @@ -872,6 +886,7 @@ struct mlx5_modify_mkey_mbox_in { struct mlx5_modify_mkey_mbox_out { struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; }; struct mlx5_dump_mkey_mbox_in { From bde51583f49bd87e452e9504d489926638046b11 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:18 +0200 Subject: [PATCH 55/61] IB/mlx5: Add support for resize CQ Implement resize CQ which is a mandatory verb in mlx5. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/cq.c | 282 +++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +- drivers/infiniband/hw/mlx5/user.h | 3 + drivers/net/ethernet/mellanox/mlx5/core/cq.c | 4 +- include/linux/mlx5/cq.h | 12 +- include/linux/mlx5/device.h | 2 + 6 files changed, 284 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b4c122eab484..50b03a8067e5 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -73,14 +73,24 @@ static void *get_cqe(struct mlx5_ib_cq *cq, int n) return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz); } +static u8 sw_ownership_bit(int n, int nent) +{ + return (n & nent) ? 1 : 0; +} + static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) { void *cqe = get_cqe(cq, n & cq->ibcq.cqe); struct mlx5_cqe64 *cqe64; cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; - return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ - !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; + + if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && + !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { + return cqe; + } else { + return NULL; + } } static void *next_cqe_sw(struct mlx5_ib_cq *cq) @@ -351,6 +361,11 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, qp->sq.last_poll = tail; } +static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) +{ + mlx5_buf_free(&dev->mdev, &buf->buf); +} + static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) @@ -366,6 +381,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, void *cqe; int idx; +repoll: cqe = next_cqe_sw(cq); if (!cqe) return -EAGAIN; @@ -379,7 +395,18 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, */ rmb(); - /* TBD: resize CQ */ + opcode = cqe64->op_own >> 4; + if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { + if (likely(cq->resize_buf)) { + free_cq_buf(dev, &cq->buf); + cq->buf = *cq->resize_buf; + kfree(cq->resize_buf); + cq->resize_buf = NULL; + goto repoll; + } else { + mlx5_ib_warn(dev, "unexpected resize cqe\n"); + } + } qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { @@ -398,7 +425,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, } wc->qp = &(*cur_qp)->ibqp; - opcode = cqe64->op_own >> 4; switch (opcode) { case MLX5_CQE_REQ: wq = &(*cur_qp)->sq; @@ -503,15 +529,11 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, return err; buf->cqe_size = cqe_size; + buf->nent = nent; return 0; } -static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) -{ - mlx5_buf_free(&dev->mdev, &buf->buf); -} - static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct ib_ucontext *context, struct mlx5_ib_cq *cq, int entries, struct mlx5_create_cq_mbox_in **cqb, @@ -576,16 +598,16 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) ib_umem_release(cq->buf.umem); } -static void init_cq_buf(struct mlx5_ib_cq *cq, int nent) +static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf) { int i; void *cqe; struct mlx5_cqe64 *cqe64; - for (i = 0; i < nent; i++) { - cqe = get_cqe(cq, i); - cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64; - cqe64->op_own = 0xf1; + for (i = 0; i < buf->nent; i++) { + cqe = get_cqe_from_buf(buf, i, buf->cqe_size); + cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; + cqe64->op_own = MLX5_CQE_INVALID << 4; } } @@ -610,7 +632,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, if (err) goto err_db; - init_cq_buf(cq, entries); + init_cq_buf(cq, &cq->buf); *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages; *cqb = mlx5_vzalloc(*inlen); @@ -836,7 +858,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) in->ctx.cq_period = cpu_to_be16(cq_period); in->ctx.cq_max_count = cpu_to_be16(cq_count); in->field_select = cpu_to_be32(fsel); - err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in); + err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in)); kfree(in); if (err) @@ -845,9 +867,235 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) return err; } +static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, struct ib_udata *udata, int *npas, + int *page_shift, int *cqe_size) +{ + struct mlx5_ib_resize_cq ucmd; + struct ib_umem *umem; + int err; + int npages; + struct ib_ucontext *context = cq->buf.umem->context; + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; + + umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size, + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(umem)) { + err = PTR_ERR(umem); + return err; + } + + mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift, + npas, NULL); + + cq->resize_umem = umem; + *cqe_size = ucmd.cqe_size; + + return 0; +} + +static void un_resize_user(struct mlx5_ib_cq *cq) +{ + ib_umem_release(cq->resize_umem); +} + +static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, int cqe_size) +{ + int err; + + cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL); + if (!cq->resize_buf) + return -ENOMEM; + + err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size); + if (err) + goto ex; + + init_cq_buf(cq, cq->resize_buf); + + return 0; + +ex: + kfree(cq->resize_buf); + return err; +} + +static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) +{ + free_cq_buf(dev, cq->resize_buf); + cq->resize_buf = NULL; +} + +static int copy_resize_cqes(struct mlx5_ib_cq *cq) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_cqe64 *scqe64; + struct mlx5_cqe64 *dcqe64; + void *start_cqe; + void *scqe; + void *dcqe; + int ssize; + int dsize; + int i; + u8 sw_own; + + ssize = cq->buf.cqe_size; + dsize = cq->resize_buf->cqe_size; + if (ssize != dsize) { + mlx5_ib_warn(dev, "resize from different cqe size is not supported\n"); + return -EINVAL; + } + + i = cq->mcq.cons_index; + scqe = get_sw_cqe(cq, i); + scqe64 = ssize == 64 ? scqe : scqe + 64; + start_cqe = scqe; + if (!scqe) { + mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); + return -EINVAL; + } + + while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { + dcqe = get_cqe_from_buf(cq->resize_buf, + (i + 1) & (cq->resize_buf->nent), + dsize); + dcqe64 = dsize == 64 ? dcqe : dcqe + 64; + sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent); + memcpy(dcqe, scqe, dsize); + dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; + + ++i; + scqe = get_sw_cqe(cq, i); + scqe64 = ssize == 64 ? scqe : scqe + 64; + if (!scqe) { + mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); + return -EINVAL; + } + + if (scqe == start_cqe) { + pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", + cq->mcq.cqn); + return -ENOMEM; + } + } + ++cq->mcq.cons_index; + return 0; +} + int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { - return -ENOSYS; + struct mlx5_ib_dev *dev = to_mdev(ibcq->device); + struct mlx5_ib_cq *cq = to_mcq(ibcq); + struct mlx5_modify_cq_mbox_in *in; + int err; + int npas; + int page_shift; + int inlen; + int uninitialized_var(cqe_size); + unsigned long flags; + + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { + pr_info("Firmware does not support resize CQ\n"); + return -ENOSYS; + } + + if (entries < 1) + return -EINVAL; + + entries = roundup_pow_of_two(entries + 1); + if (entries > dev->mdev.caps.max_cqes + 1) + return -EINVAL; + + if (entries == ibcq->cqe + 1) + return 0; + + mutex_lock(&cq->resize_mutex); + if (udata) { + err = resize_user(dev, cq, entries, udata, &npas, &page_shift, + &cqe_size); + } else { + cqe_size = 64; + err = resize_kernel(dev, cq, entries, cqe_size); + if (!err) { + npas = cq->resize_buf->buf.npages; + page_shift = cq->resize_buf->buf.page_shift; + } + } + + if (err) + goto ex; + + inlen = sizeof(*in) + npas * sizeof(in->pas[0]); + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto ex_resize; + } + + if (udata) + mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, + in->pas, 0); + else + mlx5_fill_page_array(&cq->resize_buf->buf, in->pas); + + in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE | + MLX5_MODIFY_CQ_MASK_PG_OFFSET | + MLX5_MODIFY_CQ_MASK_PG_SIZE); + in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; + in->ctx.page_offset = 0; + in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24); + in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE); + in->cqn = cpu_to_be32(cq->mcq.cqn); + + err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen); + if (err) + goto ex_alloc; + + if (udata) { + cq->ibcq.cqe = entries - 1; + ib_umem_release(cq->buf.umem); + cq->buf.umem = cq->resize_umem; + cq->resize_umem = NULL; + } else { + struct mlx5_ib_cq_buf tbuf; + int resized = 0; + + spin_lock_irqsave(&cq->lock, flags); + if (cq->resize_buf) { + err = copy_resize_cqes(cq); + if (!err) { + tbuf = cq->buf; + cq->buf = *cq->resize_buf; + kfree(cq->resize_buf); + cq->resize_buf = NULL; + resized = 1; + } + } + cq->ibcq.cqe = entries - 1; + spin_unlock_irqrestore(&cq->lock, flags); + if (resized) + free_cq_buf(dev, &tbuf); + } + mutex_unlock(&cq->resize_mutex); + + mlx5_vfree(in); + return 0; + +ex_alloc: + mlx5_vfree(in); + +ex_resize: + if (udata) + un_resize_user(cq); + else + un_resize_kernel(dev, cq); +ex: + mutex_unlock(&cq->resize_mutex); + return err; } int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5acef30a4998..389e31965773 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -195,6 +195,7 @@ struct mlx5_ib_cq_buf { struct mlx5_buf buf; struct ib_umem *umem; int cqe_size; + int nent; }; enum mlx5_ib_qp_flags { @@ -220,7 +221,7 @@ struct mlx5_ib_cq { /* protect resize cq */ struct mutex resize_mutex; - struct mlx5_ib_cq_resize *resize_buf; + struct mlx5_ib_cq_buf *resize_buf; struct ib_umem *resize_umem; int cqe_size; }; diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index a886de3e593c..32a2a5dfc523 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -93,6 +93,9 @@ struct mlx5_ib_create_cq_resp { struct mlx5_ib_resize_cq { __u64 buf_addr; + __u16 cqe_size; + __u16 reserved0; + __u32 reserved1; }; struct mlx5_ib_create_srq { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index e6fedcf94182..43c5f4809526 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -201,14 +201,14 @@ EXPORT_SYMBOL(mlx5_core_query_cq); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_modify_cq_mbox_in *in) + struct mlx5_modify_cq_mbox_in *in, int in_sz) { struct mlx5_modify_cq_mbox_out out; int err; memset(&out, 0, sizeof(out)); in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ); - err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out)); if (err) return err; diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index c3cf5a46abce..2202c7f72b75 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -79,9 +79,10 @@ enum { MLX5_CQE_RESP_SEND = 2, MLX5_CQE_RESP_SEND_IMM = 3, MLX5_CQE_RESP_SEND_INV = 4, - MLX5_CQE_RESIZE_CQ = 0xff, /* TBD */ + MLX5_CQE_RESIZE_CQ = 5, MLX5_CQE_REQ_ERR = 13, MLX5_CQE_RESP_ERR = 14, + MLX5_CQE_INVALID = 15, }; enum { @@ -90,6 +91,13 @@ enum { MLX5_CQ_MODIFY_OVERRUN = 1 << 2, }; +enum { + MLX5_CQ_OPMOD_RESIZE = 1, + MLX5_MODIFY_CQ_MASK_LOG_SIZE = 1 << 0, + MLX5_MODIFY_CQ_MASK_PG_OFFSET = 1 << 1, + MLX5_MODIFY_CQ_MASK_PG_SIZE = 1 << 2, +}; + struct mlx5_cq_modify_params { int type; union { @@ -158,7 +166,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, struct mlx5_query_cq_mbox_out *out); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_modify_cq_mbox_in *in); + struct mlx5_modify_cq_mbox_in *in, int in_sz); int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index dbb03caa8aed..87e23717df70 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -178,6 +178,7 @@ enum { MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, + MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32, MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38, MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, @@ -710,6 +711,7 @@ struct mlx5_modify_cq_mbox_in { struct mlx5_modify_cq_mbox_out { struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; }; struct mlx5_enable_hca_mbox_in { From db81a5c374b5bd650c5e6ae85d026709751db103 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:19 +0200 Subject: [PATCH 56/61] mlx5_core: Improve debugfs readability Use strings to display transport service or state of QPs. Use numeric value for MTU of a QP. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- .../net/ethernet/mellanox/mlx5/core/debugfs.c | 39 +++++++++++++--- include/linux/mlx5/qp.h | 45 +++++++++++++++++++ 2 files changed, 78 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 80f6d127257a..10e1f1a18255 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -275,7 +275,7 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) } static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - int index) + int index, int *is_str) { struct mlx5_query_qp_mbox_out *out; struct mlx5_qp_context *ctx; @@ -293,19 +293,40 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, goto out; } + *is_str = 0; ctx = &out->ctx; switch (index) { case QP_PID: param = qp->pid; break; case QP_STATE: - param = be32_to_cpu(ctx->flags) >> 28; + param = (u64)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28); + *is_str = 1; break; case QP_XPORT: - param = (be32_to_cpu(ctx->flags) >> 16) & 0xff; + param = (u64)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff); + *is_str = 1; break; case QP_MTU: - param = ctx->mtu_msgmax >> 5; + switch (ctx->mtu_msgmax >> 5) { + case IB_MTU_256: + param = 256; + break; + case IB_MTU_512: + param = 512; + break; + case IB_MTU_1024: + param = 1024; + break; + case IB_MTU_2048: + param = 2048; + break; + case IB_MTU_4096: + param = 4096; + break; + default: + param = 0; + } break; case QP_N_RECV: param = 1 << ((ctx->rq_size_stride >> 3) & 0xf); @@ -414,6 +435,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count, struct mlx5_field_desc *desc; struct mlx5_rsc_debug *d; char tbuf[18]; + int is_str = 0; u64 field; int ret; @@ -424,7 +446,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count, d = (void *)(desc - desc->i) - sizeof(*d); switch (d->type) { case MLX5_DBG_RSC_QP: - field = qp_read_field(d->dev, d->object, desc->i); + field = qp_read_field(d->dev, d->object, desc->i, &is_str); break; case MLX5_DBG_RSC_EQ: @@ -440,7 +462,12 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count, return -EINVAL; } - ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field); + + if (is_str) + ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)field); + else + ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field); + if (ret > 0) { if (copy_to_user(buf, tbuf, ret)) return -EFAULT; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index d9e3eacb3a7f..d51eff713549 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -464,4 +464,49 @@ void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); +static inline const char *mlx5_qp_type_str(int type) +{ + switch (type) { + case MLX5_QP_ST_RC: return "RC"; + case MLX5_QP_ST_UC: return "C"; + case MLX5_QP_ST_UD: return "UD"; + case MLX5_QP_ST_XRC: return "XRC"; + case MLX5_QP_ST_MLX: return "MLX"; + case MLX5_QP_ST_QP0: return "QP0"; + case MLX5_QP_ST_QP1: return "QP1"; + case MLX5_QP_ST_RAW_ETHERTYPE: return "RAW_ETHERTYPE"; + case MLX5_QP_ST_RAW_IPV6: return "RAW_IPV6"; + case MLX5_QP_ST_SNIFFER: return "SNIFFER"; + case MLX5_QP_ST_SYNC_UMR: return "SYNC_UMR"; + case MLX5_QP_ST_PTP_1588: return "PTP_1588"; + case MLX5_QP_ST_REG_UMR: return "REG_UMR"; + default: return "Invalid transport type"; + } +} + +static inline const char *mlx5_qp_state_str(int state) +{ + switch (state) { + case MLX5_QP_STATE_RST: + return "RST"; + case MLX5_QP_STATE_INIT: + return "INIT"; + case MLX5_QP_STATE_RTR: + return "RTR"; + case MLX5_QP_STATE_RTS: + return "RTS"; + case MLX5_QP_STATE_SQER: + return "SQER"; + case MLX5_QP_STATE_SQD: + return "SQD"; + case MLX5_QP_STATE_ERR: + return "ERR"; + case MLX5_QP_STATE_SQ_DRAINING: + return "SQ_DRAINING"; + case MLX5_QP_STATE_SUSPENDED: + return "SUSPENDED"; + default: return "Invalid QP state"; + } +} + #endif /* MLX5_QP_H */ From 05bdb2ab6b09f2306f0afe0f60f4b9abffa7aba4 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:20 +0200 Subject: [PATCH 57/61] mlx5_core: Fix PowerPC support 1. Fix derivation of sub-page index from the dma address in free_4k. 2. Fix the DMA address passed to dma_unmap_page by masking it properly. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 9 +++++---- include/linux/mlx5/device.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index c35c4320225a..d59790a82bc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -99,7 +99,7 @@ enum { enum { MLX5_MAX_RECLAIM_TIME_MILI = 5000, - MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / 4096, + MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, }; static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) @@ -206,7 +206,7 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr) if (!fp->free_count) list_del(&fp->list); - *addr = fp->addr + n * 4096; + *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE; return 0; } @@ -222,14 +222,15 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr) return; } - n = (addr & ~PAGE_MASK) % 4096; + n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; fwp->free_count++; set_bit(n, &fwp->bitmask); if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) { rb_erase(&fwp->rb_node, &dev->priv.page_root); if (fwp->free_count != 1) list_del(&fwp->list); - dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE, + DMA_BIDIRECTIONAL); __free_page(fwp->page); kfree(fwp); } else if (fwp->free_count == 1) { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 87e23717df70..1d059099226c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -234,7 +234,8 @@ enum { }; enum { - MLX5_ADAPTER_PAGE_SHIFT = 12 + MLX5_ADAPTER_PAGE_SHIFT = 12, + MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT, }; enum { From 9e9c47d07d447e09a66ee528c3ebad9ba359af6a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:21 +0200 Subject: [PATCH 58/61] IB/mlx5: Allow creation of QPs with zero-length work queues The current code attmepts to call ib_umem_get() even if the length is zero, which causes a failure. Since the spec allows zero length work queues, change the code so we don't call ib_umem_get() in those cases. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 49 +++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 87b7fb176f22..70dd77085db6 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -523,12 +523,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *context; struct mlx5_ib_create_qp ucmd; - int page_shift; + int page_shift = 0; int uar_index; int npages; - u32 offset; + u32 offset = 0; int uuarn; - int ncont; + int ncont = 0; int err; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); @@ -564,23 +564,29 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (err) goto err_uuar; - qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - qp->buf_size, 0, 0); - if (IS_ERR(qp->umem)) { - mlx5_ib_dbg(dev, "umem_get failed\n"); - err = PTR_ERR(qp->umem); - goto err_uuar; + if (ucmd.buf_addr && qp->buf_size) { + qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + qp->buf_size, 0, 0); + if (IS_ERR(qp->umem)) { + mlx5_ib_dbg(dev, "umem_get failed\n"); + err = PTR_ERR(qp->umem); + goto err_uuar; + } + } else { + qp->umem = NULL; } - mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, - &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; + if (qp->umem) { + mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, + &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", + ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset); } - mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", - ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset); *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; *in = mlx5_vzalloc(*inlen); @@ -588,7 +594,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, err = -ENOMEM; goto err_umem; } - mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); + if (qp->umem) + mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); (*in)->ctx.params2 = cpu_to_be32(offset << 6); @@ -619,7 +626,8 @@ err_free: mlx5_vfree(*in); err_umem: - ib_umem_release(qp->umem); + if (qp->umem) + ib_umem_release(qp->umem); err_uuar: free_uuar(&context->uuari, uuarn); @@ -632,7 +640,8 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp) context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &qp->db); - ib_umem_release(qp->umem); + if (qp->umem) + ib_umem_release(qp->umem); free_uuar(&context->uuari, qp->uuarn); } From 1bde6e301cf6217da9238086c958f532b16e504d Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:22 +0200 Subject: [PATCH 59/61] IB/mlx5: Abort driver cleanup if teardown hca fails Do not continue with cleanup flow. If this ever happens we can check which resources remained open. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 40a9f5ed814d..a064f06e0cb8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -460,7 +460,10 @@ disable_msix: err_stop_poll: mlx5_stop_health_poll(dev); - mlx5_cmd_teardown_hca(dev); + if (mlx5_cmd_teardown_hca(dev)) { + dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); + return err; + } err_pagealloc_stop: mlx5_pagealloc_stop(dev); @@ -503,7 +506,10 @@ void mlx5_dev_cleanup(struct mlx5_core_dev *dev) mlx5_eq_cleanup(dev); mlx5_disable_msix(dev); mlx5_stop_health_poll(dev); - mlx5_cmd_teardown_hca(dev); + if (mlx5_cmd_teardown_hca(dev)) { + dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); + return; + } mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev); From 8c8a49148b95c4d7c5f58a6866a30ea02485d7a3 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2014 17:45:23 +0200 Subject: [PATCH 60/61] IB/mlx5: Remove old field for create mkey mailbox Match firmware specification. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx5/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1d059099226c..817a6fae6d2c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -849,8 +849,8 @@ struct mlx5_create_mkey_mbox_in { struct mlx5_mkey_seg seg; u8 rsvd1[16]; __be32 xlat_oct_act_size; - __be32 bsf_coto_act_size; - u8 rsvd2[168]; + __be32 rsvd2; + u8 rsvd3[168]; __be64 pas[0]; }; From 57761d8df8efc7cc1227f9bc22e0dda01b0dd91b Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 15 Jan 2014 14:56:44 +0200 Subject: [PATCH 61/61] IB/mlx5: Verify reserved fields are cleared Verify that reserved fields in struct mlx5_ib_resize_cq are cleared before continuing execution of the verb. This is required to allow making use of this area in future revisions. Signed-off-by: Yann Droneaud Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/cq.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 50b03a8067e5..b1705ce6eb88 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -877,8 +877,12 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int npages; struct ib_ucontext *context = cq->buf.umem->context; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) - return -EFAULT; + err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + if (err) + return err; + + if (ucmd.reserved0 || ucmd.reserved1) + return -EINVAL; umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE, 1);