From cc3456226176385aed8aa6ebb021ebb1380a0183 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 7 Jan 2021 17:34:13 +0200 Subject: [PATCH 01/22] nvmet: Use nvmet_is_port_enabled helper for pi_enable Remove code duplication. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index c61ffd767062..b2021bf6cee5 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -266,10 +266,8 @@ static ssize_t nvmet_param_pi_enable_store(struct config_item *item, if (strtobool(page, &val)) return -EINVAL; - if (port->enabled) { - pr_err("Disable port before setting pi_enable value.\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } port->pi_enable = val; return count; From 36ca03c830e41769c62d2ca15be8351059f86c45 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 7 Jan 2021 17:34:14 +0200 Subject: [PATCH 02/22] nvmet: Fix nvmet_is_port_enabled indentation Remove extra tab. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index b2021bf6cee5..635a7cb45d0b 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -45,7 +45,7 @@ static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller) { if (p->enabled) pr_err("Disable port '%u' before changing attribute in %s\n", - le16_to_cpu(p->disc_addr.portid), caller); + le16_to_cpu(p->disc_addr.portid), caller); return p->enabled; } From 4e2f02bf77dac7b8c841f93ae5a71556d733cb04 Mon Sep 17 00:00:00 2001 From: Leonid Ravich Date: Sun, 3 Jan 2021 20:12:54 +0200 Subject: [PATCH 03/22] nvmet-fc: use RCU proctection for assoc_list searching assoc_list protected by rcu_read_lock if list not changed inline. and according to the rcu list rules. queue array embedded into nvmet_fc_tgt_assoc protected by rcu_read_lock according to rcu dereference/assign rules. queue and assoc object freed after grace period by call_rcu. tgtport lock taken for changing assoc_list. Reviewed-by: Eldad Zinger Reviewed-by: Elad Grupi Reviewed-by: James Smart Signed-off-by: Leonid Ravich Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 81 +++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index cd4e73aa9807..c14c60bfdf85 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -145,6 +145,7 @@ struct nvmet_fc_tgt_queue { struct list_head avail_defer_list; struct workqueue_struct *work_q; struct kref ref; + struct rcu_head rcu; struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */ } __aligned(sizeof(unsigned long long)); @@ -167,6 +168,7 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; + struct rcu_head rcu; }; @@ -790,7 +792,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, u16 qid, u16 sqsize) { struct nvmet_fc_tgt_queue *queue; - unsigned long flags; int ret; if (qid > NVMET_NR_QUEUES) @@ -829,9 +830,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, goto out_fail_iodlist; WARN_ON(assoc->queues[qid]); - spin_lock_irqsave(&assoc->tgtport->lock, flags); - assoc->queues[qid] = queue; - spin_unlock_irqrestore(&assoc->tgtport->lock, flags); + rcu_assign_pointer(assoc->queues[qid], queue); return queue; @@ -851,11 +850,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref) { struct nvmet_fc_tgt_queue *queue = container_of(ref, struct nvmet_fc_tgt_queue, ref); - unsigned long flags; - spin_lock_irqsave(&queue->assoc->tgtport->lock, flags); - queue->assoc->queues[queue->qid] = NULL; - spin_unlock_irqrestore(&queue->assoc->tgtport->lock, flags); + rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL); nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); @@ -863,7 +859,7 @@ nvmet_fc_tgt_queue_free(struct kref *ref) destroy_workqueue(queue->work_q); - kfree(queue); + kfree_rcu(queue, rcu); } static void @@ -965,24 +961,23 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_tgt_queue *queue; u64 association_id = nvmet_fc_getassociationid(connection_id); u16 qid = nvmet_fc_getqueueid(connection_id); - unsigned long flags; if (qid > NVMET_NR_QUEUES) return NULL; - spin_lock_irqsave(&tgtport->lock, flags); - list_for_each_entry(assoc, &tgtport->assoc_list, a_list) { + rcu_read_lock(); + list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { - queue = assoc->queues[qid]; + queue = rcu_dereference(assoc->queues[qid]); if (queue && (!atomic_read(&queue->connected) || !nvmet_fc_tgt_q_get(queue))) queue = NULL; - spin_unlock_irqrestore(&tgtport->lock, flags); + rcu_read_unlock(); return queue; } } - spin_unlock_irqrestore(&tgtport->lock, flags); + rcu_read_unlock(); return NULL; } @@ -1137,7 +1132,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) } if (!needrandom) { assoc->association_id = ran; - list_add_tail(&assoc->a_list, &tgtport->assoc_list); + list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list); } spin_unlock_irqrestore(&tgtport->lock, flags); } @@ -1167,7 +1162,7 @@ nvmet_fc_target_assoc_free(struct kref *ref) nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); - list_del(&assoc->a_list); + list_del_rcu(&assoc->a_list); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); /* if pending Rcv Disconnect Association LS, send rsp now */ @@ -1177,7 +1172,7 @@ nvmet_fc_target_assoc_free(struct kref *ref) dev_info(tgtport->dev, "{%d:%d} Association freed\n", tgtport->fc_target_port.port_num, assoc->a_id); - kfree(assoc); + kfree_rcu(assoc, rcu); nvmet_fc_tgtport_put(tgtport); } @@ -1198,7 +1193,6 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) { struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_tgt_queue *queue; - unsigned long flags; int i, terminating; terminating = atomic_xchg(&assoc->terminating, 1); @@ -1207,19 +1201,23 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) if (terminating) return; - spin_lock_irqsave(&tgtport->lock, flags); + for (i = NVMET_NR_QUEUES; i >= 0; i--) { - queue = assoc->queues[i]; - if (queue) { - if (!nvmet_fc_tgt_q_get(queue)) - continue; - spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_queue(queue); - nvmet_fc_tgt_q_put(queue); - spin_lock_irqsave(&tgtport->lock, flags); + rcu_read_lock(); + queue = rcu_dereference(assoc->queues[i]); + if (!queue) { + rcu_read_unlock(); + continue; } + + if (!nvmet_fc_tgt_q_get(queue)) { + rcu_read_unlock(); + continue; + } + rcu_read_unlock(); + nvmet_fc_delete_target_queue(queue); + nvmet_fc_tgt_q_put(queue); } - spin_unlock_irqrestore(&tgtport->lock, flags); dev_info(tgtport->dev, "{%d:%d} Association deleted\n", @@ -1234,10 +1232,9 @@ nvmet_fc_find_target_assoc(struct nvmet_fc_tgtport *tgtport, { struct nvmet_fc_tgt_assoc *assoc; struct nvmet_fc_tgt_assoc *ret = NULL; - unsigned long flags; - spin_lock_irqsave(&tgtport->lock, flags); - list_for_each_entry(assoc, &tgtport->assoc_list, a_list) { + rcu_read_lock(); + list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { ret = assoc; if (!nvmet_fc_tgt_a_get(assoc)) @@ -1245,7 +1242,7 @@ nvmet_fc_find_target_assoc(struct nvmet_fc_tgtport *tgtport, break; } } - spin_unlock_irqrestore(&tgtport->lock, flags); + rcu_read_unlock(); return ret; } @@ -1473,19 +1470,17 @@ nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport) static void __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) { - struct nvmet_fc_tgt_assoc *assoc, *next; - unsigned long flags; + struct nvmet_fc_tgt_assoc *assoc; - spin_lock_irqsave(&tgtport->lock, flags); - list_for_each_entry_safe(assoc, next, - &tgtport->assoc_list, a_list) { + rcu_read_lock(); + list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; if (!schedule_work(&assoc->del_work)) /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); } - spin_unlock_irqrestore(&tgtport->lock, flags); + rcu_read_unlock(); } /** @@ -1568,16 +1563,16 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) continue; spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); - spin_lock_irqsave(&tgtport->lock, flags); - list_for_each_entry(assoc, &tgtport->assoc_list, a_list) { - queue = assoc->queues[0]; + rcu_read_lock(); + list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { + queue = rcu_dereference(assoc->queues[0]); if (queue && queue->nvme_sq.ctrl == ctrl) { if (nvmet_fc_tgt_a_get(assoc)) found_ctrl = true; break; } } - spin_unlock_irqrestore(&tgtport->lock, flags); + rcu_read_unlock(); nvmet_fc_tgtport_put(tgtport); From 60b152a50820a125336ecae26da489059fc61ce1 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sat, 9 Jan 2021 00:41:47 +0100 Subject: [PATCH 04/22] nvme: constify static attribute_group structs The only usage of these is to put their addresses in arrays of pointers to const attribute_groups. Make them const to allow the compiler to put them in read-only memory. Signed-off-by: Rikard Falkeborn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/fc.c | 2 +- drivers/nvme/target/fcloop.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ba5df80881ea..ff0f42652abb 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2859,7 +2859,7 @@ static struct attribute *nvme_subsys_attrs[] = { NULL, }; -static struct attribute_group nvme_subsys_attrs_group = { +static const struct attribute_group nvme_subsys_attrs_group = { .attrs = nvme_subsys_attrs, }; @@ -3694,7 +3694,7 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, return a->mode; } -static struct attribute_group nvme_dev_attrs_group = { +static const struct attribute_group nvme_dev_attrs_group = { .attrs = nvme_dev_attrs, .is_visible = nvme_dev_attrs_are_visible, }; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5f36cfa8136c..20dadd86e981 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3789,7 +3789,7 @@ static struct attribute *nvme_fc_attrs[] = { NULL }; -static struct attribute_group nvme_fc_attr_group = { +static const struct attribute_group nvme_fc_attr_group = { .attrs = nvme_fc_attrs, }; diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 68213f0a052b..54606f1872b4 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -1545,7 +1545,7 @@ static struct attribute *fcloop_dev_attrs[] = { NULL }; -static struct attribute_group fclopp_dev_attrs_group = { +static const struct attribute_group fclopp_dev_attrs_group = { .attrs = fcloop_dev_attrs, }; From f9063a53274d25a878310db3fb645bfa9e49c917 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 8 Jan 2021 23:46:57 +0900 Subject: [PATCH 05/22] nvme: support command retry delay for admin command The controller can request a delay retrying a failed command by setting the Command Retry Delay (CRD) field in the Completion Queue Entry. Currentlty this features is only applied to commands on the I/O queue, but not to commands on the admin queue. Retreive the nvme_ctrl from the request so that no namespace is required and apply the feature to all commands. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ff0f42652abb..636a88c93194 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -279,14 +279,13 @@ static blk_status_t nvme_error_status(u16 status) static void nvme_retry_req(struct request *req) { - struct nvme_ns *ns = req->q->queuedata; unsigned long delay = 0; u16 crd; /* The mask and shift result must be <= 3 */ crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11; - if (ns && crd) - delay = ns->ctrl->crdt[crd - 1] * 100; + if (crd) + delay = nvme_req(req)->ctrl->crdt[crd - 1] * 100; nvme_req(req)->retries++; blk_mq_requeue_request(req, false); From cb9b870fba3eba57cf3bcd7c6c4d4aa88bc5fe70 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 14 Jan 2021 13:15:24 -0800 Subject: [PATCH 06/22] nvme-tcp: fix wrong setting of request iov_iter We might set the iov_iter direction wrong, which is harmless for this use-case, but get it right. Also this makes the code slightly cleaner. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 881d28eb15e9..4367923d03e4 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -983,7 +983,6 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) req->state = NVME_TCP_SEND_DATA; if (queue->data_digest) crypto_ahash_init(queue->snd_hash); - nvme_tcp_init_iter(req, WRITE); } else { nvme_tcp_done_send_req(queue); } @@ -1016,8 +1015,6 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) req->state = NVME_TCP_SEND_DATA; if (queue->data_digest) crypto_ahash_init(queue->snd_hash); - if (!req->data_sent) - nvme_tcp_init_iter(req, WRITE); return 1; } req->offset += ret; @@ -2268,12 +2265,12 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, req->data_len = blk_rq_nr_phys_segments(rq) ? blk_rq_payload_bytes(rq) : 0; req->curr_bio = rq->bio; + if (req->curr_bio) + nvme_tcp_init_iter(req, rq_data_dir(rq)); if (rq_data_dir(rq) == WRITE && req->data_len <= nvme_tcp_inline_data_size(queue)) req->pdu_len = req->data_len; - else if (req->curr_bio) - nvme_tcp_init_iter(req, READ); pdu->hdr.type = nvme_tcp_cmd; pdu->hdr.flags = 0; From 60141aa08c08a43f3d22626b3a2532106a90a191 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 14 Jan 2021 13:15:25 -0800 Subject: [PATCH 07/22] nvme-tcp: get rid of unused helper function Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4367923d03e4..f2f3471faed3 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -206,11 +206,6 @@ static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req) req->pdu_len - req->pdu_sent); } -static inline size_t nvme_tcp_req_offset(struct nvme_tcp_request *req) -{ - return req->iter.iov_offset; -} - static inline size_t nvme_tcp_pdu_data_left(struct nvme_tcp_request *req) { return rq_data_dir(blk_mq_rq_from_pdu(req)) == WRITE ? From 0dc9edaf80ea3c48231d94cd482355699d453888 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 14 Jan 2021 13:15:26 -0800 Subject: [PATCH 08/22] nvme-tcp: pass multipage bvec to request iov_iter iov_iter uses the right helpers so we should be able to pass in a multipage bvec. Right now the iov_iter is initialized with more segments that it needs which doesn't fail because the iov_iter is capped by byte count, but it is better to use a full multipage bvec iter. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index f2f3471faed3..4c13c7110dbe 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -224,24 +224,29 @@ static void nvme_tcp_init_iter(struct nvme_tcp_request *req, struct request *rq = blk_mq_rq_from_pdu(req); struct bio_vec *vec; unsigned int size; - int nsegs; + int nr_bvec; size_t offset; if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { vec = &rq->special_vec; - nsegs = 1; + nr_bvec = 1; size = blk_rq_payload_bytes(rq); offset = 0; } else { struct bio *bio = req->curr_bio; + struct bvec_iter bi; + struct bio_vec bv; vec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); - nsegs = bio_segments(bio); + nr_bvec = 0; + bio_for_each_bvec(bv, bio, bi) { + nr_bvec++; + } size = bio->bi_iter.bi_size; offset = bio->bi_iter.bi_bvec_done; } - iov_iter_bvec(&req->iter, dir, vec, nsegs, size); + iov_iter_bvec(&req->iter, dir, vec, nr_bvec, size); req->iter.iov_offset = offset; } From fc97e942d90c2103755f2fcd9a068a4ee7dfc1bf Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 13 Jan 2021 23:36:27 +0900 Subject: [PATCH 09/22] nvme: refactor ns->ctrl by request Just for current code in nvme_cleanup_cmd(), we don't have to get namespace instance, but we need controller instance. Controller instance can be retrieved by namespace instance, but it can be directly accessed by nvme_request instance from request. ctrl = nvme_req(req)->ctrl; We don't have to go around namespace instance from request instance through gendisk. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 636a88c93194..009830d247f8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -841,11 +841,11 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, void nvme_cleanup_cmd(struct request *req) { if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { - struct nvme_ns *ns = req->rq_disk->private_data; + struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; struct page *page = req->special_vec.bv_page; - if (page == ns->ctrl->discard_page) - clear_bit_unlock(0, &ns->ctrl->discard_page_busy); + if (page == ctrl->discard_page) + clear_bit_unlock(0, &ctrl->discard_page_busy); else kfree(page_address(page) + req->special_vec.bv_offset); } From 624e67fdf9a657fe437d84dd9f28b35e594183dd Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 13 Jan 2021 17:33:52 -0800 Subject: [PATCH 10/22] nvmet: remove extra variable in smart log nsid We remove the extra local variable struct nvmet_ns in nvmet_get_smart_log_nsid() since req already has ns member that can be reused, this also eliminates the explicit call to nvmet_put_namespace() which is already present in the request completion path. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index dc1ea468b182..de804d9762dd 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -74,11 +74,11 @@ static void nvmet_execute_get_log_page_error(struct nvmet_req *req) static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, struct nvme_smart_log *slog) { - struct nvmet_ns *ns; u64 host_reads, host_writes, data_units_read, data_units_written; - ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid); - if (!ns) { + req->ns = nvmet_find_namespace(req->sq->ctrl, + req->cmd->get_log_page.nsid); + if (!req->ns) { pr_err("Could not find namespace id : %d\n", le32_to_cpu(req->cmd->get_log_page.nsid)); req->error_loc = offsetof(struct nvme_rw_command, nsid); @@ -86,22 +86,20 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, } /* we don't have the right data for file backed ns */ - if (!ns->bdev) - goto out; + if (!req->ns->bdev) + return NVME_SC_SUCCESS; - host_reads = part_stat_read(ns->bdev, ios[READ]); + host_reads = part_stat_read(req->ns->bdev, ios[READ]); data_units_read = - DIV_ROUND_UP(part_stat_read(ns->bdev, sectors[READ]), 1000); - host_writes = part_stat_read(ns->bdev, ios[WRITE]); + DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[READ]), 1000); + host_writes = part_stat_read(req->ns->bdev, ios[WRITE]); data_units_written = - DIV_ROUND_UP(part_stat_read(ns->bdev, sectors[WRITE]), 1000); + DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[WRITE]), 1000); put_unaligned_le64(host_reads, &slog->host_reads[0]); put_unaligned_le64(data_units_read, &slog->data_units_read[0]); put_unaligned_le64(host_writes, &slog->host_writes[0]); put_unaligned_le64(data_units_written, &slog->data_units_written[0]); -out: - nvmet_put_namespace(ns); return NVME_SC_SUCCESS; } From 3631c7f4a24165b9431942b85b502454edb0c33b Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 13 Jan 2021 17:33:53 -0800 Subject: [PATCH 11/22] nvmet: remove extra variable in id-desclist We remove the extra local variable struct nvmet_ns in nvmet_execute_identify_desclist() since req already has ns member that can be reused, this also eliminates the explicit call to nvmet_put_namespace() which is already present in the request completion path. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index de804d9762dd..1cc61ca42a7d 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -605,37 +605,35 @@ static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len, static void nvmet_execute_identify_desclist(struct nvmet_req *req) { - struct nvmet_ns *ns; u16 status = 0; off_t off = 0; - ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid); - if (!ns) { + req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid); + if (!req->ns) { req->error_loc = offsetof(struct nvme_identify, nsid); status = NVME_SC_INVALID_NS | NVME_SC_DNR; goto out; } - if (memchr_inv(&ns->uuid, 0, sizeof(ns->uuid))) { + if (memchr_inv(&req->ns->uuid, 0, sizeof(req->ns->uuid))) { status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID, NVME_NIDT_UUID_LEN, - &ns->uuid, &off); + &req->ns->uuid, &off); if (status) - goto out_put_ns; + goto out; } - if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) { + if (memchr_inv(req->ns->nguid, 0, sizeof(req->ns->nguid))) { status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID, NVME_NIDT_NGUID_LEN, - &ns->nguid, &off); + &req->ns->nguid, &off); if (status) - goto out_put_ns; + goto out; } if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off, off) != NVME_IDENTIFY_DATA_SIZE - off) status = NVME_SC_INTERNAL | NVME_SC_DNR; -out_put_ns: - nvmet_put_namespace(ns); + out: nvmet_req_complete(req, status); } From 3c7b224f1956ed232b24ed2eb2c54e4476c6acb2 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 13 Jan 2021 17:33:54 -0800 Subject: [PATCH 12/22] nvmet: remove extra variable in identify ns We remove the extra local variable struct nvmet_ns in nvmet_execute_identify_ns() since req already has ns member that can be reused, this also eliminates the explicit call to nvmet_put_namespace() which is already present in the request completion path. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 1cc61ca42a7d..613a4d8feac1 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -467,7 +467,6 @@ out: static void nvmet_execute_identify_ns(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - struct nvmet_ns *ns; struct nvme_id_ns *id; u16 status = 0; @@ -484,20 +483,21 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) } /* return an all zeroed buffer if we can't find an active namespace */ - ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); - if (!ns) { + req->ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); + if (!req->ns) { status = NVME_SC_INVALID_NS; goto done; } - nvmet_ns_revalidate(ns); + nvmet_ns_revalidate(req->ns); /* * nuse = ncap = nsze isn't always true, but we have no way to find * that out from the underlying device. */ - id->ncap = id->nsze = cpu_to_le64(ns->size >> ns->blksize_shift); - switch (req->port->ana_state[ns->anagrpid]) { + id->ncap = id->nsze = + cpu_to_le64(req->ns->size >> req->ns->blksize_shift); + switch (req->port->ana_state[req->ns->anagrpid]) { case NVME_ANA_INACCESSIBLE: case NVME_ANA_PERSISTENT_LOSS: break; @@ -506,8 +506,8 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) break; } - if (ns->bdev) - nvmet_bdev_set_limits(ns->bdev, id); + if (req->ns->bdev) + nvmet_bdev_set_limits(req->ns->bdev, id); /* * We just provide a single LBA format that matches what the @@ -521,25 +521,24 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) * controllers, but also with any other user of the block device. */ id->nmic = (1 << 0); - id->anagrpid = cpu_to_le32(ns->anagrpid); + id->anagrpid = cpu_to_le32(req->ns->anagrpid); - memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid)); + memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid)); - id->lbaf[0].ds = ns->blksize_shift; + id->lbaf[0].ds = req->ns->blksize_shift; - if (ctrl->pi_support && nvmet_ns_has_pi(ns)) { + if (ctrl->pi_support && nvmet_ns_has_pi(req->ns)) { id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST | NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 | NVME_NS_DPC_PI_TYPE3; id->mc = NVME_MC_EXTENDED_LBA; - id->dps = ns->pi_type; + id->dps = req->ns->pi_type; id->flbas = NVME_NS_FLBAS_META_EXT; - id->lbaf[0].ms = cpu_to_le16(ns->metadata_size); + id->lbaf[0].ms = cpu_to_le16(req->ns->metadata_size); } - if (ns->readonly) + if (req->ns->readonly) id->nsattr |= (1 << 0); - nvmet_put_namespace(ns); done: if (!status) status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); From 193fcf371f9e3705c14a0bf1d4bfc44af0f7c124 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 11 Jan 2021 20:26:16 -0800 Subject: [PATCH 13/22] nvmet: add lba to sect conversion helpers In this preparation patch, we add helpers to convert lbas to sectors & sectors to lba. This is needed to eliminate code duplication in the ZBD backend. Use these helpers in the block device backend. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-bdev.c | 8 +++----- drivers/nvme/target/nvmet.h | 10 ++++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 125dde3f410e..23095bdfce06 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -256,8 +256,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) if (is_pci_p2pdma_page(sg_page(req->sg))) op |= REQ_NOMERGE; - sector = le64_to_cpu(req->cmd->rw.slba); - sector <<= (req->ns->blksize_shift - 9); + sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { bio = &req->b.inline_bio; @@ -345,7 +344,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req, int ret; ret = __blkdev_issue_discard(ns->bdev, - le64_to_cpu(range->slba) << (ns->blksize_shift - 9), + nvmet_lba_to_sect(ns, range->slba), le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), GFP_KERNEL, 0, bio); if (ret && ret != -EOPNOTSUPP) { @@ -414,8 +413,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) if (!nvmet_check_transfer_len(req, 0)) return; - sector = le64_to_cpu(write_zeroes->slba) << - (req->ns->blksize_shift - 9); + sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba); nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << (req->ns->blksize_shift - 9)); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 592763732065..8776dd1a0490 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -603,4 +603,14 @@ static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns) return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple); } +static inline __le64 nvmet_sect_to_lba(struct nvmet_ns *ns, sector_t sect) +{ + return cpu_to_le64(sect >> (ns->blksize_shift - SECTOR_SHIFT)); +} + +static inline sector_t nvmet_lba_to_sect(struct nvmet_ns *ns, __le64 lba) +{ + return le64_to_cpu(lba) << (ns->blksize_shift - SECTOR_SHIFT); +} + #endif /* _NVMET_H */ From 3254899e0b52f10b9a3e7db4d10f081f60705ba9 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 21 Jan 2021 09:09:47 +0000 Subject: [PATCH 14/22] nvme: update enumerations for status codes All the updates are mentioned in the ratified NVMe 1.4 spec. Reviewed-by: Hannes Reinecke Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bfed36e342cc..458719544253 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1473,20 +1473,29 @@ enum { NVME_SC_SGL_INVALID_DATA = 0xf, NVME_SC_SGL_INVALID_METADATA = 0x10, NVME_SC_SGL_INVALID_TYPE = 0x11, - + NVME_SC_CMB_INVALID_USE = 0x12, + NVME_SC_PRP_INVALID_OFFSET = 0x13, + NVME_SC_ATOMIC_WU_EXCEEDED = 0x14, + NVME_SC_OP_DENIED = 0x15, NVME_SC_SGL_INVALID_OFFSET = 0x16, - NVME_SC_SGL_INVALID_SUBTYPE = 0x17, - + NVME_SC_RESERVED = 0x17, + NVME_SC_HOST_ID_INCONSIST = 0x18, + NVME_SC_KA_TIMEOUT_EXPIRED = 0x19, + NVME_SC_KA_TIMEOUT_INVALID = 0x1A, + NVME_SC_ABORTED_PREEMPT_ABORT = 0x1B, NVME_SC_SANITIZE_FAILED = 0x1C, NVME_SC_SANITIZE_IN_PROGRESS = 0x1D, - + NVME_SC_SGL_INVALID_GRANULARITY = 0x1E, + NVME_SC_CMD_NOT_SUP_CMB_QUEUE = 0x1F, NVME_SC_NS_WRITE_PROTECTED = 0x20, NVME_SC_CMD_INTERRUPTED = 0x21, + NVME_SC_TRANSIENT_TR_ERR = 0x22, NVME_SC_LBA_RANGE = 0x80, NVME_SC_CAP_EXCEEDED = 0x81, NVME_SC_NS_NOT_READY = 0x82, NVME_SC_RESERVATION_CONFLICT = 0x83, + NVME_SC_FORMAT_IN_PROGRESS = 0x84, /* * Command Specific Status: @@ -1519,8 +1528,15 @@ enum { NVME_SC_NS_NOT_ATTACHED = 0x11a, NVME_SC_THIN_PROV_NOT_SUPP = 0x11b, NVME_SC_CTRL_LIST_INVALID = 0x11c, + NVME_SC_SELT_TEST_IN_PROGRESS = 0x11d, NVME_SC_BP_WRITE_PROHIBITED = 0x11e, + NVME_SC_CTRL_ID_INVALID = 0x11f, + NVME_SC_SEC_CTRL_STATE_INVALID = 0x120, + NVME_SC_CTRL_RES_NUM_INVALID = 0x121, + NVME_SC_RES_ID_INVALID = 0x122, NVME_SC_PMR_SAN_PROHIBITED = 0x123, + NVME_SC_ANA_GROUP_ID_INVALID = 0x124, + NVME_SC_ANA_ATTACH_FAILED = 0x125, /* * I/O Command Set Specific - NVM commands: From 3a98c51a24825173455c479822aa2f89fecbe6af Mon Sep 17 00:00:00 2001 From: Michal Krakowiak Date: Mon, 4 Jan 2021 16:53:43 +0100 Subject: [PATCH 15/22] nvme: parse format nvm command details when tracing Add detailed parsing of format nvm admin command to make the trace log more consistent and human-readable. Signed-off-by: Michal Krakowiak Acked-by: Dan Williams Reviewed-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 5c3cb6928f3c..e0400de713b5 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -102,6 +102,23 @@ static const char *nvme_trace_get_lba_status(struct trace_seq *p, return ret; } +static const char *nvme_trace_admin_format_nvm(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 lbaf = cdw10[0] & 0xF; + u8 mset = (cdw10[0] >> 4) & 0x1; + u8 pi = (cdw10[0] >> 5) & 0x7; + u8 pil = cdw10[1] & 0x1; + u8 ses = (cdw10[1] >> 1) & 0x7; + + trace_seq_printf(p, "lbaf=%u, mset=%u, pi=%u, pil=%u, ses=%u", + lbaf, mset, pi, pil, ses); + + trace_seq_putc(p, 0); + + return ret; +} + static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10) { const char *ret = trace_seq_buffer_ptr(p); @@ -159,6 +176,8 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, return nvme_trace_admin_get_features(p, cdw10); case nvme_admin_get_lba_status: return nvme_trace_get_lba_status(p, cdw10); + case nvme_admin_format_nvm: + return nvme_trace_admin_format_nvm(p, cdw10); default: return nvme_trace_common(p, cdw10); } From 4a407d5ebc7ac1ea8c6e2692bd79320459dc60f6 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 27 Jan 2021 02:50:00 +0900 Subject: [PATCH 16/22] nvme: add tracing of zns commands When support for the NVMe ZNS commands was merged, tracing of these has been omitted. Add nvme_cmd_zone_mgmt_send, nvme_cmd_zone_mgmt_recv as well as nvme_cmd_zone_append to the nvme driver's tracing facility. Signed-off-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/nvme.h | 6 +++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index e0400de713b5..6543015b6121 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -148,6 +148,35 @@ static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10) return ret; } +static const char *nvme_trace_zone_mgmt_send(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u64 slba = get_unaligned_le64(cdw10); + u8 zsa = cdw10[12]; + u8 all = cdw10[13]; + + trace_seq_printf(p, "slba=%llu, zsa=%u, all=%u", slba, zsa, all); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u64 slba = get_unaligned_le64(cdw10); + u32 numd = get_unaligned_le32(cdw10 + 8); + u8 zra = cdw10[12]; + u8 zrasf = cdw10[13]; + u8 pr = cdw10[14]; + + trace_seq_printf(p, "slba=%llu, numd=%u, zra=%u, zrasf=%u, pr=%u", + slba, numd, zra, zrasf, pr); + trace_seq_putc(p, 0); + + return ret; +} + static const char *nvme_trace_common(struct trace_seq *p, u8 *cdw10) { const char *ret = trace_seq_buffer_ptr(p); @@ -190,9 +219,14 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, case nvme_cmd_read: case nvme_cmd_write: case nvme_cmd_write_zeroes: + case nvme_cmd_zone_append: return nvme_trace_read_write(p, cdw10); case nvme_cmd_dsm: return nvme_trace_dsm(p, cdw10); + case nvme_cmd_zone_mgmt_send: + return nvme_trace_zone_mgmt_send(p, cdw10); + case nvme_cmd_zone_mgmt_recv: + return nvme_trace_zone_mgmt_recv(p, cdw10); default: return nvme_trace_common(p, cdw10); } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 458719544253..b08787cd0881 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -697,7 +697,11 @@ enum nvme_opcode { nvme_opcode_name(nvme_cmd_resv_register), \ nvme_opcode_name(nvme_cmd_resv_report), \ nvme_opcode_name(nvme_cmd_resv_acquire), \ - nvme_opcode_name(nvme_cmd_resv_release)) + nvme_opcode_name(nvme_cmd_resv_release), \ + nvme_opcode_name(nvme_cmd_zone_mgmt_send), \ + nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \ + nvme_opcode_name(nvme_cmd_zone_append)) + /* From 8f8ea928fd77db60dc22276e3acdb9ca41cbf8dd Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 26 Jan 2021 11:47:52 -0800 Subject: [PATCH 17/22] nvme-core: get rid of the extra space Remove the extra space in the nvme_free_cels() when calling xa_for_each loop which is not a common practice (except drivers/infiniband/core/ not sure why). Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 009830d247f8..168601d96f48 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4448,7 +4448,7 @@ static void nvme_free_cels(struct nvme_ctrl *ctrl) struct nvme_effects_log *cel; unsigned long i; - xa_for_each (&ctrl->cels, i, cel) { + xa_for_each(&ctrl->cels, i, cel) { xa_erase(&ctrl->cels, i); kfree(cel); } From 2547906982e2e6a0d42f8957f55af5bb51a7e55f Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Thu, 21 Jan 2021 11:32:36 +0800 Subject: [PATCH 18/22] nvme-core: add cancel tagset helpers Add nvme_cancel_tagset and nvme_cancel_admin_tagset for tear down and reconnection error handling. Signed-off-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 20 ++++++++++++++++++++ drivers/nvme/host/nvme.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 168601d96f48..4e8e310033c9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -370,6 +370,26 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved) } EXPORT_SYMBOL_GPL(nvme_cancel_request); +void nvme_cancel_tagset(struct nvme_ctrl *ctrl) +{ + if (ctrl->tagset) { + blk_mq_tagset_busy_iter(ctrl->tagset, + nvme_cancel_request, ctrl); + blk_mq_tagset_wait_completed_request(ctrl->tagset); + } +} +EXPORT_SYMBOL_GPL(nvme_cancel_tagset); + +void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl) +{ + if (ctrl->admin_tagset) { + blk_mq_tagset_busy_iter(ctrl->admin_tagset, + nvme_cancel_request, ctrl); + blk_mq_tagset_wait_completed_request(ctrl->admin_tagset); + } +} +EXPORT_SYMBOL_GPL(nvme_cancel_admin_tagset); + bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 88a6b97247f5..a72f07181091 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -576,6 +576,8 @@ static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) void nvme_complete_rq(struct request *req); bool nvme_cancel_request(struct request *req, void *data, bool reserved); +void nvme_cancel_tagset(struct nvme_ctrl *ctrl); +void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); bool nvme_wait_reset(struct nvme_ctrl *ctrl); From 958dc1d32c80566f58d18f05ef1f05bd32d172c1 Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Thu, 21 Jan 2021 11:32:37 +0800 Subject: [PATCH 19/22] nvme-rdma: add clean action for failed reconnection A crash happens when inject failed reconnection. If reconnect failed after start io queues, the queues will be unquiesced and new requests continue to be delivered. Reconnection error handling process directly free queues without cancel suspend requests. The suppend request will time out, and then crash due to use the queue after free. Add sync queues and cancel suppend requests for reconnection error handling. Signed-off-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f5ef3edeb2fd..d92132cbcbbe 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -919,12 +919,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, error = nvme_init_identify(&ctrl->ctrl); if (error) - goto out_stop_queue; + goto out_quiesce_queue; return 0; +out_quiesce_queue: + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); out_stop_queue: nvme_rdma_stop_queue(&ctrl->queues[0]); + nvme_cancel_admin_tagset(&ctrl->ctrl); out_cleanup_queue: if (new) blk_cleanup_queue(ctrl->ctrl.admin_q); @@ -1001,8 +1005,10 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) out_wait_freeze_timed_out: nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); out_cleanup_connect_q: + nvme_cancel_tagset(&ctrl->ctrl); if (new) blk_cleanup_queue(ctrl->ctrl.connect_q); out_free_tag_set: @@ -1144,10 +1150,18 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) return 0; destroy_io: - if (ctrl->ctrl.queue_count > 1) + if (ctrl->ctrl.queue_count > 1) { + nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); + nvme_rdma_stop_io_queues(ctrl); + nvme_cancel_tagset(&ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, new); + } destroy_admin: + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); + nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_rdma_destroy_admin_queue(ctrl, new); return ret; } From 70a99574a79f1cd4dc7ad56ea37be40844bfb97b Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Thu, 21 Jan 2021 11:32:38 +0800 Subject: [PATCH 20/22] nvme-tcp: add clean action for failed reconnection If reconnect failed after start io queues, the queues will be unquiesced and new requests continue to be delivered. Reconnection error handling process directly free queues without cancel suspend requests. The suppend request will time out, and then crash due to use the queue after free. Add sync queues and cancel suppend requests for reconnection error handling. Signed-off-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4c13c7110dbe..8c256adb8c41 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1812,8 +1812,10 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) out_wait_freeze_timed_out: nvme_stop_queues(ctrl); + nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); out_cleanup_connect_q: + nvme_cancel_tagset(ctrl); if (new) blk_cleanup_queue(ctrl->connect_q); out_free_tag_set: @@ -1875,12 +1877,16 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) error = nvme_init_identify(ctrl); if (error) - goto out_stop_queue; + goto out_quiesce_queue; return 0; +out_quiesce_queue: + blk_mq_quiesce_queue(ctrl->admin_q); + blk_sync_queue(ctrl->admin_q); out_stop_queue: nvme_tcp_stop_queue(ctrl, 0); + nvme_cancel_admin_tagset(ctrl); out_cleanup_queue: if (new) blk_cleanup_queue(ctrl->admin_q); @@ -2000,10 +2006,18 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) return 0; destroy_io: - if (ctrl->queue_count > 1) + if (ctrl->queue_count > 1) { + nvme_stop_queues(ctrl); + nvme_sync_io_queues(ctrl); + nvme_tcp_stop_io_queues(ctrl); + nvme_cancel_tagset(ctrl); nvme_tcp_destroy_io_queues(ctrl, new); + } destroy_admin: + blk_mq_quiesce_queue(ctrl->admin_q); + blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); + nvme_cancel_admin_tagset(ctrl); nvme_tcp_destroy_admin_queue(ctrl, new); return ret; } From c4189d680e12f0a41eea94a1f466142b2bf02c3d Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Thu, 21 Jan 2021 11:32:39 +0800 Subject: [PATCH 21/22] nvme-rdma: use cancel tagset helper for tear down Use nvme_cancel_tagset and nvme_cancel_admin_tagset to clean code for tear down process. Signed-off-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d92132cbcbbe..6700d8bab68a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1025,11 +1025,7 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); - if (ctrl->ctrl.admin_tagset) { - blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset, - nvme_cancel_request, &ctrl->ctrl); - blk_mq_tagset_wait_completed_request(ctrl->ctrl.admin_tagset); - } + nvme_cancel_admin_tagset(&ctrl->ctrl); if (remove) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_destroy_admin_queue(ctrl, remove); @@ -1043,11 +1039,7 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, nvme_stop_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); - if (ctrl->ctrl.tagset) { - blk_mq_tagset_busy_iter(ctrl->ctrl.tagset, - nvme_cancel_request, &ctrl->ctrl); - blk_mq_tagset_wait_completed_request(ctrl->ctrl.tagset); - } + nvme_cancel_tagset(&ctrl->ctrl); if (remove) nvme_start_queues(&ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, remove); From 563c81586d0ab2841487a61fb34d6e9cd5efded7 Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Thu, 21 Jan 2021 11:32:40 +0800 Subject: [PATCH 22/22] nvme-tcp: use cancel tagset helper for tear down Use nvme_cancel_tagset and nvme_cancel_admin_tagset to clean code for tear down process. Signed-off-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8c256adb8c41..619b0d8f6e38 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1907,11 +1907,7 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, blk_mq_quiesce_queue(ctrl->admin_q); blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); - if (ctrl->admin_tagset) { - blk_mq_tagset_busy_iter(ctrl->admin_tagset, - nvme_cancel_request, ctrl); - blk_mq_tagset_wait_completed_request(ctrl->admin_tagset); - } + nvme_cancel_admin_tagset(ctrl); if (remove) blk_mq_unquiesce_queue(ctrl->admin_q); nvme_tcp_destroy_admin_queue(ctrl, remove); @@ -1927,11 +1923,7 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, nvme_stop_queues(ctrl); nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); - if (ctrl->tagset) { - blk_mq_tagset_busy_iter(ctrl->tagset, - nvme_cancel_request, ctrl); - blk_mq_tagset_wait_completed_request(ctrl->tagset); - } + nvme_cancel_tagset(ctrl); if (remove) nvme_start_queues(ctrl); nvme_tcp_destroy_io_queues(ctrl, remove);