block-5.8-2020-06-26
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl72TjIQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpvxcD/9i6yjjq7Qzx9pUIaowcCah0PTUfqNuXoL/ muA01DbynjcP3uxP5XxG416z5rPmLDRLtof6QoV/8tbwxQUsg2RaZW9i/OADYrq0 qnISlNfQ0+rlLkV1v7S+WWM2npYGoh2j+WizmeNcHYMFFo8ueds7gUM9usFkx+dw 3RXUGxColF18uXizjRYMlLgxqddNmC1H7B/Z7Y3kooRuqYcd56QXrh/gDLQuzo0e SnBybTYyIiUSsMakyoRBcYleSJu6mLQQ/BT665tkdWgQpwFaWQ7nwYtKMwXee/Ul uRyKnTK4tGnp66PCt6nDGu5Ud3IQkWqlXJvqmN/5Cggs3pWklzO+HZkxFusJOTtS NqDIs7vkVMcgi5LxXUIb5+uRqMSYXLmfhv3iyJ11/fZlT8mv6SaQJfWHo2jDJvz9 CuLEr4+auRPrcXRau8FNySnssJ3NZ4iuEH4CI0r+Zgzdm7C3kmEj9w16t8/CNuCW s3/EyyCBvwPnPGYJEukYirVoVPKQL1Pn5hHqtStyWfFH0lUlhl/GUXc0S8Qhl9YU cOBRGxjR1aIv65kK9zWeSpNq9lZCLCWeACFbA/4nIdhURtxdiH8nVW38qdVcGM3/ nr+KKTBCdOeK9iTt64XIuqIRX2J3p2NjGzExAugmlBQzeAqGbcZgCvX/WYK5Roay hel6eOLY4A== =pQH7 -----END PGP SIGNATURE----- Merge tag 'block-5.8-2020-06-26' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: - NVMe pull request from Christoph: - multipath deadlock fixes (Anton) - NUMA fixes (Max) - RDMA completion vector fix (Max) - IO deadlock fix (Sagi) - multipath reference fix (Sagi) - NS mutation fix (Sagi) - Use right allocator when freeing bip in error path (Chengguang) * tag 'block-5.8-2020-06-26' of git://git.kernel.dk/linux-block: nvme-multipath: fix bogus request queue reference put nvme-multipath: fix deadlock due to head->lock nvme: don't protect ns mutation with ns->head->lock nvme-multipath: fix deadlock between ana_work and scan_work nvme: fix possible deadlock when I/O is blocked nvme-rdma: assign completion vector correctly nvme-loop: initialize tagset numa value to the value of the ctrl nvme-tcp: initialize tagset numa value to the value of the ctrl nvme-pci: initialize tagset numa value to the value of the ctrl nvme-pci: override the value of the controller's numa node nvme: set initial value for controller's numa node block: release bip in a right way in error path
This commit is contained in:
Коммит
9b8d020796
|
@ -24,6 +24,18 @@ void blk_flush_integrity(void)
|
||||||
flush_workqueue(kintegrityd_wq);
|
flush_workqueue(kintegrityd_wq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void __bio_integrity_free(struct bio_set *bs, struct bio_integrity_payload *bip)
|
||||||
|
{
|
||||||
|
if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
|
||||||
|
if (bip->bip_vec)
|
||||||
|
bvec_free(&bs->bvec_integrity_pool, bip->bip_vec,
|
||||||
|
bip->bip_slab);
|
||||||
|
mempool_free(bip, &bs->bio_integrity_pool);
|
||||||
|
} else {
|
||||||
|
kfree(bip);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* bio_integrity_alloc - Allocate integrity payload and attach it to bio
|
* bio_integrity_alloc - Allocate integrity payload and attach it to bio
|
||||||
* @bio: bio to attach integrity metadata to
|
* @bio: bio to attach integrity metadata to
|
||||||
|
@ -78,7 +90,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
||||||
|
|
||||||
return bip;
|
return bip;
|
||||||
err:
|
err:
|
||||||
mempool_free(bip, &bs->bio_integrity_pool);
|
__bio_integrity_free(bs, bip);
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(bio_integrity_alloc);
|
EXPORT_SYMBOL(bio_integrity_alloc);
|
||||||
|
@ -99,14 +111,7 @@ void bio_integrity_free(struct bio *bio)
|
||||||
kfree(page_address(bip->bip_vec->bv_page) +
|
kfree(page_address(bip->bip_vec->bv_page) +
|
||||||
bip->bip_vec->bv_offset);
|
bip->bip_vec->bv_offset);
|
||||||
|
|
||||||
if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
|
__bio_integrity_free(bs, bip);
|
||||||
bvec_free(&bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
|
|
||||||
|
|
||||||
mempool_free(bip, &bs->bio_integrity_pool);
|
|
||||||
} else {
|
|
||||||
kfree(bip);
|
|
||||||
}
|
|
||||||
|
|
||||||
bio->bi_integrity = NULL;
|
bio->bi_integrity = NULL;
|
||||||
bio->bi_opf &= ~REQ_INTEGRITY;
|
bio->bi_opf &= ~REQ_INTEGRITY;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1974,7 +1974,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
|
||||||
if (ns->head->disk) {
|
if (ns->head->disk) {
|
||||||
nvme_update_disk_info(ns->head->disk, ns, id);
|
nvme_update_disk_info(ns->head->disk, ns, id);
|
||||||
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
|
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
|
||||||
revalidate_disk(ns->head->disk);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -4174,6 +4173,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
|
||||||
ctrl->dev = dev;
|
ctrl->dev = dev;
|
||||||
ctrl->ops = ops;
|
ctrl->ops = ops;
|
||||||
ctrl->quirks = quirks;
|
ctrl->quirks = quirks;
|
||||||
|
ctrl->numa_node = NUMA_NO_NODE;
|
||||||
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
|
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
|
||||||
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
|
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
|
||||||
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
|
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
|
||||||
|
|
|
@ -409,15 +409,14 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
|
||||||
{
|
{
|
||||||
struct nvme_ns_head *head = ns->head;
|
struct nvme_ns_head *head = ns->head;
|
||||||
|
|
||||||
lockdep_assert_held(&ns->head->lock);
|
|
||||||
|
|
||||||
if (!head->disk)
|
if (!head->disk)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!(head->disk->flags & GENHD_FL_UP))
|
if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
|
||||||
device_add_disk(&head->subsys->dev, head->disk,
|
device_add_disk(&head->subsys->dev, head->disk,
|
||||||
nvme_ns_id_attr_groups);
|
nvme_ns_id_attr_groups);
|
||||||
|
|
||||||
|
mutex_lock(&head->lock);
|
||||||
if (nvme_path_is_optimized(ns)) {
|
if (nvme_path_is_optimized(ns)) {
|
||||||
int node, srcu_idx;
|
int node, srcu_idx;
|
||||||
|
|
||||||
|
@ -426,9 +425,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
|
||||||
__nvme_find_path(head, node);
|
__nvme_find_path(head, node);
|
||||||
srcu_read_unlock(&head->srcu, srcu_idx);
|
srcu_read_unlock(&head->srcu, srcu_idx);
|
||||||
}
|
}
|
||||||
|
mutex_unlock(&head->lock);
|
||||||
|
|
||||||
synchronize_srcu(&ns->head->srcu);
|
synchronize_srcu(&head->srcu);
|
||||||
kblockd_schedule_work(&ns->head->requeue_work);
|
kblockd_schedule_work(&head->requeue_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
|
static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
|
||||||
|
@ -483,14 +483,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state)
|
||||||
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
|
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
|
||||||
struct nvme_ns *ns)
|
struct nvme_ns *ns)
|
||||||
{
|
{
|
||||||
mutex_lock(&ns->head->lock);
|
|
||||||
ns->ana_grpid = le32_to_cpu(desc->grpid);
|
ns->ana_grpid = le32_to_cpu(desc->grpid);
|
||||||
ns->ana_state = desc->state;
|
ns->ana_state = desc->state;
|
||||||
clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
|
clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
|
||||||
|
|
||||||
if (nvme_state_is_live(ns->ana_state))
|
if (nvme_state_is_live(ns->ana_state))
|
||||||
nvme_mpath_set_live(ns);
|
nvme_mpath_set_live(ns);
|
||||||
mutex_unlock(&ns->head->lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
|
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
|
||||||
|
@ -640,31 +638,37 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
|
||||||
}
|
}
|
||||||
DEVICE_ATTR_RO(ana_state);
|
DEVICE_ATTR_RO(ana_state);
|
||||||
|
|
||||||
static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
|
static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
|
||||||
struct nvme_ana_group_desc *desc, void *data)
|
struct nvme_ana_group_desc *desc, void *data)
|
||||||
{
|
{
|
||||||
struct nvme_ns *ns = data;
|
struct nvme_ana_group_desc *dst = data;
|
||||||
|
|
||||||
if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
|
if (desc->grpid != dst->grpid)
|
||||||
nvme_update_ns_ana_state(desc, ns);
|
return 0;
|
||||||
return -ENXIO; /* just break out of the loop */
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
*dst = *desc;
|
||||||
|
return -ENXIO; /* just break out of the loop */
|
||||||
}
|
}
|
||||||
|
|
||||||
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
|
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
|
||||||
{
|
{
|
||||||
if (nvme_ctrl_use_ana(ns->ctrl)) {
|
if (nvme_ctrl_use_ana(ns->ctrl)) {
|
||||||
|
struct nvme_ana_group_desc desc = {
|
||||||
|
.grpid = id->anagrpid,
|
||||||
|
.state = 0,
|
||||||
|
};
|
||||||
|
|
||||||
mutex_lock(&ns->ctrl->ana_lock);
|
mutex_lock(&ns->ctrl->ana_lock);
|
||||||
ns->ana_grpid = le32_to_cpu(id->anagrpid);
|
ns->ana_grpid = le32_to_cpu(id->anagrpid);
|
||||||
nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
|
nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
|
||||||
mutex_unlock(&ns->ctrl->ana_lock);
|
mutex_unlock(&ns->ctrl->ana_lock);
|
||||||
|
if (desc.state) {
|
||||||
|
/* found the group desc: update */
|
||||||
|
nvme_update_ns_ana_state(&desc, ns);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
mutex_lock(&ns->head->lock);
|
|
||||||
ns->ana_state = NVME_ANA_OPTIMIZED;
|
ns->ana_state = NVME_ANA_OPTIMIZED;
|
||||||
nvme_mpath_set_live(ns);
|
nvme_mpath_set_live(ns);
|
||||||
mutex_unlock(&ns->head->lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
|
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
|
||||||
|
@ -686,6 +690,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
|
||||||
kblockd_schedule_work(&head->requeue_work);
|
kblockd_schedule_work(&head->requeue_work);
|
||||||
flush_work(&head->requeue_work);
|
flush_work(&head->requeue_work);
|
||||||
blk_cleanup_queue(head->disk->queue);
|
blk_cleanup_queue(head->disk->queue);
|
||||||
|
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
|
||||||
|
/*
|
||||||
|
* if device_add_disk wasn't called, prevent
|
||||||
|
* disk release to put a bogus reference on the
|
||||||
|
* request queue
|
||||||
|
*/
|
||||||
|
head->disk->queue = NULL;
|
||||||
|
}
|
||||||
put_disk(head->disk);
|
put_disk(head->disk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -364,6 +364,8 @@ struct nvme_ns_head {
|
||||||
spinlock_t requeue_lock;
|
spinlock_t requeue_lock;
|
||||||
struct work_struct requeue_work;
|
struct work_struct requeue_work;
|
||||||
struct mutex lock;
|
struct mutex lock;
|
||||||
|
unsigned long flags;
|
||||||
|
#define NVME_NSHEAD_DISK_LIVE 0
|
||||||
struct nvme_ns __rcu *current_path[];
|
struct nvme_ns __rcu *current_path[];
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
|
@ -1593,7 +1593,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
|
||||||
|
|
||||||
dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||||
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
|
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
|
||||||
dev->admin_tagset.numa_node = dev_to_node(dev->dev);
|
dev->admin_tagset.numa_node = dev->ctrl.numa_node;
|
||||||
dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
|
dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
|
||||||
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
|
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
|
||||||
dev->admin_tagset.driver_data = dev;
|
dev->admin_tagset.driver_data = dev;
|
||||||
|
@ -1669,6 +1669,8 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
|
||||||
if (result)
|
if (result)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
|
dev->ctrl.numa_node = dev_to_node(dev->dev);
|
||||||
|
|
||||||
nvmeq = &dev->queues[0];
|
nvmeq = &dev->queues[0];
|
||||||
aqa = nvmeq->q_depth - 1;
|
aqa = nvmeq->q_depth - 1;
|
||||||
aqa |= aqa << 16;
|
aqa |= aqa << 16;
|
||||||
|
@ -2257,7 +2259,7 @@ static void nvme_dev_add(struct nvme_dev *dev)
|
||||||
if (dev->io_queues[HCTX_TYPE_POLL])
|
if (dev->io_queues[HCTX_TYPE_POLL])
|
||||||
dev->tagset.nr_maps++;
|
dev->tagset.nr_maps++;
|
||||||
dev->tagset.timeout = NVME_IO_TIMEOUT;
|
dev->tagset.timeout = NVME_IO_TIMEOUT;
|
||||||
dev->tagset.numa_node = dev_to_node(dev->dev);
|
dev->tagset.numa_node = dev->ctrl.numa_node;
|
||||||
dev->tagset.queue_depth =
|
dev->tagset.queue_depth =
|
||||||
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
|
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
|
||||||
dev->tagset.cmd_size = sizeof(struct nvme_iod);
|
dev->tagset.cmd_size = sizeof(struct nvme_iod);
|
||||||
|
|
|
@ -470,7 +470,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
|
||||||
* Spread I/O queues completion vectors according their queue index.
|
* Spread I/O queues completion vectors according their queue index.
|
||||||
* Admin queues can always go on completion vector 0.
|
* Admin queues can always go on completion vector 0.
|
||||||
*/
|
*/
|
||||||
comp_vector = idx == 0 ? idx : idx - 1;
|
comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
|
||||||
|
|
||||||
/* Polling queues need direct cq polling context */
|
/* Polling queues need direct cq polling context */
|
||||||
if (nvme_rdma_poll_queue(queue))
|
if (nvme_rdma_poll_queue(queue))
|
||||||
|
|
|
@ -1532,7 +1532,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
||||||
set->ops = &nvme_tcp_admin_mq_ops;
|
set->ops = &nvme_tcp_admin_mq_ops;
|
||||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||||
set->reserved_tags = 2; /* connect + keep-alive */
|
set->reserved_tags = 2; /* connect + keep-alive */
|
||||||
set->numa_node = NUMA_NO_NODE;
|
set->numa_node = nctrl->numa_node;
|
||||||
set->flags = BLK_MQ_F_BLOCKING;
|
set->flags = BLK_MQ_F_BLOCKING;
|
||||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||||
set->driver_data = ctrl;
|
set->driver_data = ctrl;
|
||||||
|
@ -1544,7 +1544,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
||||||
set->ops = &nvme_tcp_mq_ops;
|
set->ops = &nvme_tcp_mq_ops;
|
||||||
set->queue_depth = nctrl->sqsize + 1;
|
set->queue_depth = nctrl->sqsize + 1;
|
||||||
set->reserved_tags = 1; /* fabric connect */
|
set->reserved_tags = 1; /* fabric connect */
|
||||||
set->numa_node = NUMA_NO_NODE;
|
set->numa_node = nctrl->numa_node;
|
||||||
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
|
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
|
||||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||||
set->driver_data = ctrl;
|
set->driver_data = ctrl;
|
||||||
|
|
|
@ -340,7 +340,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
|
||||||
ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
|
ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
|
||||||
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||||
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
|
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
|
||||||
ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
|
ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
|
||||||
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
||||||
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
|
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
|
||||||
ctrl->admin_tag_set.driver_data = ctrl;
|
ctrl->admin_tag_set.driver_data = ctrl;
|
||||||
|
@ -512,7 +512,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
|
||||||
ctrl->tag_set.ops = &nvme_loop_mq_ops;
|
ctrl->tag_set.ops = &nvme_loop_mq_ops;
|
||||||
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
|
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
|
||||||
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
|
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
|
||||||
ctrl->tag_set.numa_node = NUMA_NO_NODE;
|
ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
|
||||||
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||||
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
||||||
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
|
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
|
||||||
|
|
Загрузка…
Ссылка в новой задаче