From 44a7b6759000ac51b92715579a7bba9e3f9245c2 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 6 Dec 2019 09:24:26 +0800 Subject: [PATCH 1/8] RDMA/cma: add missed unregister_pernet_subsys in init failure The driver forgets to call unregister_pernet_subsys() in the error path of cma_init(). Add the missed call to fix it. Fixes: 4be74b42a6d0 ("IB/cma: Separate port allocation to network namespaces") Signed-off-by: Chuhong Yuan Reviewed-by: Parav Pandit Link: https://lore.kernel.org/r/20191206012426.12744-1-hslester96@gmail.com Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 25f2b70fd8ef..43a6f07e0afe 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4763,6 +4763,7 @@ err_ib: err: unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); err_wq: destroy_workqueue(cma_wq); return ret; From 71bbac6e2f23e946875475ce2fb9f6752900d463 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 2 Dec 2019 20:03:18 -0600 Subject: [PATCH 2/8] Update mailmap info for Steve Wise Signed-off-by: Steve Wise Link: https://lore.kernel.org/r/20191203020319.15036-1-larrystevenwise@gmail.com Signed-off-by: Doug Ledford --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index c24773db04a7..00581c1f0983 100644 --- a/.mailmap +++ b/.mailmap @@ -276,3 +276,5 @@ Gustavo Padovan Gustavo Padovan Changbin Du Changbin Du +Steve Wise +Steve Wise From 2030abddec6884aaf5892f5724c48fc340e6826f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 2 Dec 2019 20:03:20 -0600 Subject: [PATCH 3/8] rxe: correctly calculate iCRC for unaligned payloads If RoCE PDUs being sent or received contain pad bytes, then the iCRC is miscalculated, resulting in PDUs being emitted by RXE with an incorrect iCRC, as well as ingress PDUs being dropped due to erroneously detecting a bad iCRC in the PDU. The fix is to include the pad bytes, if any, in iCRC computations. Note: This bug has caused broken on-the-wire compatibility with actual hardware RoCE devices since the soft-RoCE driver was first put into the mainstream kernel. Fixing it will create an incompatibility with the original soft-RoCE devices, but is necessary to be compatible with real hardware devices. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Steve Wise Link: https://lore.kernel.org/r/20191203020319.15036-2-larrystevenwise@gmail.com Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_recv.c | 2 +- drivers/infiniband/sw/rxe/rxe_req.c | 6 ++++++ drivers/infiniband/sw/rxe/rxe_resp.c | 7 +++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index f9a492ed900b..831ad578a7b2 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -389,7 +389,7 @@ void rxe_rcv(struct sk_buff *skb) calc_icrc = rxe_icrc_hdr(pkt, skb); calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt), - payload_size(pkt)); + payload_size(pkt) + bth_pad(pkt)); calc_icrc = (__force u32)cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { if (skb->protocol == htons(ETH_P_IPV6)) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index c5d9b558fa90..e5031172c019 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -500,6 +500,12 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (err) return err; } + if (bth_pad(pkt)) { + u8 *pad = payload_addr(pkt) + paylen; + + memset(pad, 0, bth_pad(pkt)); + crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt)); + } } p = payload_addr(pkt) + paylen + bth_pad(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 1cbfbd98eb22..c4a8195bf670 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -732,6 +732,13 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (err) pr_err("Failed copying memory\n"); + if (bth_pad(&ack_pkt)) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u8 *pad = payload_addr(&ack_pkt) + payload; + + memset(pad, 0, bth_pad(&ack_pkt)); + icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt)); + } p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); *p = ~icrc; From 33df2f1929df4a1cb13303e344fbf8a75f0dc41f Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 12 Dec 2019 11:12:12 +0200 Subject: [PATCH 4/8] RDMA/counter: Prevent auto-binding a QP which are not tracked with res Some QPs (e.g. XRC QP) are not tracked in kernel, in this case they have an invalid res and should not be bound to any dynamically-allocated counter in auto mode. This fixes below call trace: BUG: kernel NULL pointer dereference, address: 0000000000000390 PGD 80000001a7233067 P4D 80000001a7233067 PUD 1a7215067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 24822 Comm: ibv_xsrq_pingpo Not tainted 5.4.0-rc5+ #21 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 RIP: 0010:rdma_counter_bind_qp_auto+0x142/0x270 [ib_core] Code: e1 48 85 c0 48 89 c2 0f 84 bc 00 00 00 49 8b 06 48 39 42 48 75 d6 40 3a aa 90 00 00 00 75 cd 49 8b 86 00 01 00 00 48 8b 4a 28 <8b> 80 90 03 00 00 39 81 90 03 00 00 75 b4 85 c0 74 b0 48 8b 04 24 RSP: 0018:ffffc900003f39c0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: ffff88820020ec00 RSI: 0000000000000004 RDI: ffffffffffffffc0 RBP: 0000000000000001 R08: ffff888224149ff0 R09: ffffc900003f3968 R10: ffffffffffffffff R11: ffff8882249c5848 R12: ffffffffffffffff R13: ffff88821d5aca50 R14: ffff8881f7690800 R15: ffff8881ff890000 FS: 00007fe53a3e1740(0000) GS:ffff888237b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000390 CR3: 00000001a7292006 CR4: 00000000003606a0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: _ib_modify_qp+0x3a4/0x3f0 [ib_core] ? lookup_get_idr_uobject.part.8+0x23/0x40 [ib_uverbs] modify_qp+0x322/0x3e0 [ib_uverbs] ib_uverbs_modify_qp+0x43/0x70 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xb1/0xf0 [ib_uverbs] ib_uverbs_run_method+0x6be/0x760 [ib_uverbs] ? uverbs_disassociate_api+0xd0/0xd0 [ib_uverbs] ib_uverbs_cmd_verbs+0x18d/0x3a0 [ib_uverbs] ? get_acl+0x1a/0x120 ? __alloc_pages_nodemask+0x15d/0x2c0 ib_uverbs_ioctl+0xa7/0x110 [ib_uverbs] do_vfs_ioctl+0xa5/0x610 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x48/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 99fa331dc862 ("RDMA/counter: Add "auto" configuration mode support") Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Reviewed-by: Ido Kalir Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191212091214.315005-2-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/counters.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 8434ec082c3a..2257d7f7810f 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -286,6 +286,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; + if (!qp->res.valid) + return 0; + if (!rdma_is_port_valid(dev, port)) return -EINVAL; From 89f988d93c62384758b19323c886db917a80c371 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Dec 2019 11:12:13 +0200 Subject: [PATCH 5/8] IB/mlx4: Follow mirror sequence of device add during device removal Current code device add sequence is: ib_register_device() ib_mad_init() init_sriov_init() register_netdev_notifier() Therefore, the remove sequence should be, unregister_netdev_notifier() close_sriov() mad_cleanup() ib_unregister_device() However it is not above. Hence, make do above remove sequence. Fixes: fa417f7b520ee ("IB/mlx4: Add support for IBoE") Signed-off-by: Parav Pandit Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191212091214.315005-3-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0b5dc1d5928f..34055cbab38c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -3018,16 +3018,17 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ibdev->ib_active = false; flush_workqueue(wq); - mlx4_ib_close_sriov(ibdev); - mlx4_ib_mad_cleanup(ibdev); - ib_unregister_device(&ibdev->ib_dev); - mlx4_ib_diag_cleanup(ibdev); if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } + mlx4_ib_close_sriov(ibdev); + mlx4_ib_mad_cleanup(ibdev); + ib_unregister_device(&ibdev->ib_dev); + mlx4_ib_diag_cleanup(ibdev); + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); kfree(ibdev->ib_uc_qpns_bitmap); From ed9085fed9d95d5921582e3c8474f3736c5d2782 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 12 Dec 2019 11:12:14 +0200 Subject: [PATCH 6/8] IB/mlx5: Fix steering rule of drop and count There are two flow rule destinations: QP and packet. While users are setting DROP packet rule, the QP should not be set as a destination. Fixes: 3b3233fbf02e ("IB/mlx5: Add flow counters binding support") Signed-off-by: Maor Gottlieb Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191212091214.315005-4-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 51100350b688..2f5a159cbe1c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3544,10 +3544,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } INIT_LIST_HEAD(&handler->list); - if (dst) { - memcpy(&dest_arr[0], dst, sizeof(*dst)); - dest_num++; - } for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(dev->mdev, spec, @@ -3560,6 +3556,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ib_flow += ((union ib_flow_spec *)ib_flow)->size; } + if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { + memcpy(&dest_arr[0], dst, sizeof(*dst)); + dest_num++; + } + if (!flow_is_multicast_only(flow_attr)) set_underlay_qp(dev, spec, underlay_qpn); @@ -3600,10 +3601,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) { + if (!dest_num) rule_dst = NULL; - dest_num = 0; - } } else { if (is_egress) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; From 7a763d18ff2a75cfd1014800327f4120840bef09 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 12 Dec 2019 12:02:36 +0200 Subject: [PATCH 7/8] IB/core: Introduce rdma_user_mmap_entry_insert_range() API Introduce rdma_user_mmap_entry_insert_range() API to be used once the required key for the given entry should be in a given range. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191212100237.330654-2-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/ib_core_uverbs.c | 48 +++++++++++++++++++----- include/rdma/ib_verbs.h | 5 +++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index f509c478b469..b7cb59844ece 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -238,28 +238,32 @@ void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) EXPORT_SYMBOL(rdma_user_mmap_entry_remove); /** - * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa + * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa + * in a given range. * * @ucontext: associated user context. * @entry: the entry to insert into the mmap_xa * @length: length of the address that will be mmapped + * @min_pgoff: minimum pgoff to be returned + * @max_pgoff: maximum pgoff to be returned * * This function should be called by drivers that use the rdma_user_mmap * interface for implementing their mmap syscall A database of mmap offsets is * handled in the core and helper functions are provided to insert entries * into the database and extract entries when the user calls mmap with the - * given offset. The function allocates a unique page offset that should be - * provided to user, the user will use the offset to retrieve information such - * as address to be mapped and how. + * given offset. The function allocates a unique page offset in a given range + * that should be provided to user, the user will use the offset to retrieve + * information such as address to be mapped and how. * * Return: 0 on success and -ENOMEM on failure */ -int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, - struct rdma_user_mmap_entry *entry, - size_t length) +int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 min_pgoff, + u32 max_pgoff) { struct ib_uverbs_file *ufile = ucontext->ufile; - XA_STATE(xas, &ucontext->mmap_xa, 0); + XA_STATE(xas, &ucontext->mmap_xa, min_pgoff); u32 xa_first, xa_last, npages; int err; u32 i; @@ -285,7 +289,7 @@ int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, entry->npages = npages; while (true) { /* First find an empty index */ - xas_find_marked(&xas, U32_MAX, XA_FREE_MARK); + xas_find_marked(&xas, max_pgoff, XA_FREE_MARK); if (xas.xa_node == XAS_RESTART) goto err_unlock; @@ -332,4 +336,30 @@ err_unlock: mutex_unlock(&ufile->umap_lock); return -ENOMEM; } +EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range); + +/** + * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa. + * + * @ucontext: associated user context. + * @entry: the entry to insert into the mmap_xa + * @length: length of the address that will be mmapped + * + * This function should be called by drivers that use the rdma_user_mmap + * interface for handling user mmapped addresses. The database is handled in + * the core and helper functions are provided to insert entries into the + * database and extract entries when the user calls mmap with the given offset. + * The function allocates a unique page offset that should be provided to user, + * the user will use the offset to retrieve information such as address to + * be mapped and how. + * + * Return: 0 on success and -ENOMEM on failure + */ +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length) +{ + return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0, + U32_MAX); +} EXPORT_SYMBOL(rdma_user_mmap_entry_insert); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cacb48faf670..5608e14e3aad 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2832,6 +2832,11 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, size_t length); +int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 min_pgoff, + u32 max_pgoff); + struct rdma_user_mmap_entry * rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, unsigned long pgoff); From dc2316eba73ff03da6dde082a372c6b5209304c5 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 12 Dec 2019 12:02:37 +0200 Subject: [PATCH 8/8] IB/mlx5: Fix device memory flows Fix device memory flows so that only once there will be no live mmaped VA to a given allocation the matching object will be destroyed. This prevents a potential scenario that existing VA that was mmaped by one process might still be used post its deallocation despite that it's owned now by other process. The above is achieved by integrating with IB core APIs to manage mmap/munmap. Only once the refcount will become 0 the DM object and its underlay area will be freed. Fixes: 3b113a1ec3d4 ("IB/mlx5: Support device memory type attribute") Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20191212100237.330654-3-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cmd.c | 16 ++-- drivers/infiniband/hw/mlx5/cmd.h | 2 +- drivers/infiniband/hw/mlx5/main.c | 126 +++++++++++++++++---------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 19 +++- 4 files changed, 108 insertions(+), 55 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 4937947400cd..4c26492ab8a3 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -157,7 +157,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, return -ENOMEM; } -int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) +void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) { struct mlx5_core_dev *dev = dm->dev; u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); @@ -175,15 +175,13 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) MLX5_SET(dealloc_memic_in, in, memic_size, length); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return; - if (!err) { - spin_lock(&dm->lock); - bitmap_clear(dm->memic_alloc_pages, - start_page_idx, num_pages); - spin_unlock(&dm->lock); - } - - return err; + spin_lock(&dm->lock); + bitmap_clear(dm->memic_alloc_pages, + start_page_idx, num_pages); + spin_unlock(&dm->lock); } int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 169cab4915e3..945ebce73613 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -46,7 +46,7 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); -int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); +void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid); void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2f5a159cbe1c..997cbfe4b90c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2074,6 +2074,24 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, virt_to_page(dev->mdev->clock_info)); } +static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) +{ + struct mlx5_user_mmap_entry *mentry = to_mmmap(entry); + struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device); + struct mlx5_ib_dm *mdm; + + switch (mentry->mmap_flag) { + case MLX5_IB_MMAP_TYPE_MEMIC: + mdm = container_of(mentry, struct mlx5_ib_dm, mentry); + mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr, + mdm->size); + kfree(mdm); + break; + default: + WARN_ON(true); + } +} + static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, struct vm_area_struct *vma, struct mlx5_ib_ucontext *context) @@ -2186,26 +2204,55 @@ free_bfreg: return err; } -static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +static int add_dm_mmap_entry(struct ib_ucontext *context, + struct mlx5_ib_dm *mdm, + u64 address) { - struct mlx5_ib_ucontext *mctx = to_mucontext(context); - struct mlx5_ib_dev *dev = to_mdev(context->device); - u16 page_idx = get_extended_index(vma->vm_pgoff); - size_t map_size = vma->vm_end - vma->vm_start; - u32 npages = map_size >> PAGE_SHIFT; - phys_addr_t pfn; + mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC; + mdm->mentry.address = address; + return rdma_user_mmap_entry_insert_range( + context, &mdm->mentry.rdma_entry, + mdm->size, + MLX5_IB_MMAP_DEVICE_MEM << 16, + (MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1); +} - if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) != - page_idx + npages) +static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma) +{ + unsigned long idx; + u8 command; + + command = get_command(vma->vm_pgoff); + idx = get_extended_index(vma->vm_pgoff); + + return (command << 16 | idx); +} + +static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, + struct vm_area_struct *vma, + struct ib_ucontext *ucontext) +{ + struct mlx5_user_mmap_entry *mentry; + struct rdma_user_mmap_entry *entry; + unsigned long pgoff; + pgprot_t prot; + phys_addr_t pfn; + int ret; + + pgoff = mlx5_vma_to_pgoff(vma); + entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff); + if (!entry) return -EINVAL; - pfn = ((dev->mdev->bar_addr + - MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >> - PAGE_SHIFT) + - page_idx; - return rdma_user_mmap_io(context, vma, pfn, map_size, - pgprot_writecombine(vma->vm_page_prot), - NULL); + mentry = to_mmmap(entry); + pfn = (mentry->address >> PAGE_SHIFT); + prot = pgprot_writecombine(vma->vm_page_prot); + ret = rdma_user_mmap_io(ucontext, vma, pfn, + entry->npages * PAGE_SIZE, + prot, + entry); + rdma_user_mmap_entry_put(&mentry->rdma_entry); + return ret; } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) @@ -2248,11 +2295,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); - case MLX5_IB_MMAP_DEVICE_MEM: - return dm_mmap(ibcontext, vma); - default: - return -EINVAL; + return mlx5_ib_mmap_offset(dev, vma, ibcontext); } return 0; @@ -2288,8 +2332,9 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx, { struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; u64 start_offset; - u32 page_idx; + u16 page_idx; int err; + u64 address; dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); @@ -2298,28 +2343,30 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx, if (err) return err; - page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) - - MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >> - PAGE_SHIFT; - - err = uverbs_copy_to(attrs, - MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, - &page_idx, sizeof(page_idx)); + address = dm->dev_addr & PAGE_MASK; + err = add_dm_mmap_entry(ctx, dm, address); if (err) goto err_dealloc; + page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF; + err = uverbs_copy_to(attrs, + MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + &page_idx, + sizeof(page_idx)); + if (err) + goto err_copy; + start_offset = dm->dev_addr & ~PAGE_MASK; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, &start_offset, sizeof(start_offset)); if (err) - goto err_dealloc; - - bitmap_set(to_mucontext(ctx)->dm_pages, page_idx, - DIV_ROUND_UP(dm->size, PAGE_SIZE)); + goto err_copy; return 0; +err_copy: + rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry); err_dealloc: mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size); @@ -2423,23 +2470,13 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev; - struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm; struct mlx5_ib_dm *dm = to_mdm(ibdm); - u32 page_idx; int ret; switch (dm->type) { case MLX5_IB_UAPI_DM_TYPE_MEMIC: - ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size); - if (ret) - return ret; - - page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) - - MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >> - PAGE_SHIFT; - bitmap_clear(ctx->dm_pages, page_idx, - DIV_ROUND_UP(dm->size, PAGE_SIZE)); - break; + rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry); + return 0; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING, dm->size, ctx->devx_uid, dm->dev_addr, @@ -6235,6 +6272,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .map_mr_sg = mlx5_ib_map_mr_sg, .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi, .mmap = mlx5_ib_mmap, + .mmap_free = mlx5_ib_mmap_free, .modify_cq = mlx5_ib_modify_cq, .modify_device = mlx5_ib_modify_device, .modify_port = mlx5_ib_modify_port, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5986953ec2fa..b06f32ff5748 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -118,6 +118,10 @@ enum { MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, }; +enum mlx5_ib_mmap_type { + MLX5_IB_MMAP_TYPE_MEMIC = 1, +}; + #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \ (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) @@ -135,7 +139,6 @@ struct mlx5_ib_ucontext { u32 tdn; u64 lib_caps; - DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); u16 devx_uid; /* For RoCE LAG TX affinity */ atomic_t tx_port_affinity; @@ -556,6 +559,12 @@ enum mlx5_ib_mtt_access_flags { MLX5_IB_MTT_WRITE = (1 << 1), }; +struct mlx5_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + u8 mmap_flag; + u64 address; +}; + struct mlx5_ib_dm { struct ib_dm ibdm; phys_addr_t dev_addr; @@ -567,6 +576,7 @@ struct mlx5_ib_dm { } icm_dm; /* other dm types specific params should be added here */ }; + struct mlx5_user_mmap_entry mentry; }; #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) @@ -1101,6 +1111,13 @@ to_mflow_act(struct ib_flow_action *ibact) return container_of(ibact, struct mlx5_ib_flow_action, ib_action); } +static inline struct mlx5_user_mmap_entry * +to_mmmap(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, + struct mlx5_user_mmap_entry, rdma_entry); +} + int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, struct ib_udata *udata, unsigned long virt, struct mlx5_db *db);