io_uring-5.9-2020-08-23
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl9CwtMQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpsehEAC4ReB53LLbZxqcmoA2RNs9yz1I4DM2PU6z C+NSGGEnAFHQAhLbfCAzxbtQa6x/m64zoLd+8zHZNAeanJXarszcgSuqhXQFlEfX 7Jz/vdXGdu7Q4zgkLuO3FxleDoPoUC5qOSFHWYtMu6KvHLOkmc9DvdSUsFMDSThX 6RsoaQY2gDOD/pwtm8Cqmy89nLZdFoyxadXyk/lzxLodjeRZOwoVc+YM8YWmrXZ0 mKEEuO4uBWxUUmoyAwUABNqWWAkwTDEhrYCiiG81DkAa1Cu0mRXodN0xycr72cLZ Ik2OlnTLCE6B0UXsBu2c0+qXGArWsvDyhEEkwF+O+Ump4IBIr72EmgZb+o2nnkXo Uu4X/r0qeQ6XD+vBTHcE6oPUjJhV6uEXXon5aesE+vh277ILmHgMyjJKaSiJcY/E efM5SuPRq2kuROKWLKiLJnpuJ/9ZTU/4nk4k1pOlWWOVGLHien0sWBBzQ+iWr6mm eRl5EkI3JoahqIrNFz0+qF3DwKPVfu+B02/EzA8OXoYHIRV9KMS5eWX5hK12aZ3i 4AT3xuAanfcNs4qBAScOfHQxQu9U5Z7Mu4JQJ58xdsJd+UWBnbznUmSLob9KKk+c X8AvAcYhb684F87VCmaCzDlIPMb46OYxLBgI6sz7L0xdc7i8TCeeEDbQCN1HixZ3 SNtKzalNXA== =fAwK -----END PGP SIGNATURE----- Merge tag 'io_uring-5.9-2020-08-23' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: - NVMe pull request from Sagi: - nvme completion rework from Christoph and Chao that mostly came from a bit of divergence of how we classify errors related to pathing/retry etc. - nvmet passthru fixes from Chaitanya - minor nvmet fixes from Amit and I - mpath round-robin path selection fix from Martin - ignore noiob for zoned devices from Keith - minor nvme-fc fix from Tianjia" - BFQ cgroup leak fix (Dmitry) - block layer MAINTAINERS addition (Geert) - fix null_blk FUA checking (Hou) - get_max_io_size() size fix (Keith) - fix block page_is_mergeable() for compound pages (Matthew) - discard granularity fixes (Ming) - IO scheduler ordering fix (Ming) - misc fixes * tag 'io_uring-5.9-2020-08-23' of git://git.kernel.dk/linux-block: (31 commits) null_blk: fix passing of REQ_FUA flag in null_handle_rq nvmet: Disable keep-alive timer when kato is cleared to 0h nvme: redirect commands on dying queue nvme: just check the status code type in nvme_is_path_error nvme: refactor command completion nvme: rename and document nvme_end_request nvme: skip noiob for zoned devices nvme-pci: fix PRP pool size nvme-pci: Use u32 for nvme_dev.q_depth and nvme_queue.q_depth nvme: Use spin_lock_irq() when taking the ctrl->lock nvmet: call blk_mq_free_request() directly nvmet: fix oops in pt cmd execution nvmet: add ns tear down label for pt-cmd handling nvme: multipath: round-robin: eliminate "fallback" variable nvme: multipath: round-robin: fix single non-optimized path case nvme-fc: Fix wrong return value in __nvme_fc_init_request() nvmet-passthru: Reject commands with non-sgl flags set nvmet: fix a memory leak blkcg: fix memleak for iolatency MAINTAINERS: Add missing header files to BLOCK LAYER section ...
This commit is contained in:
Коммит
c41c3ec4a2
|
@ -3,7 +3,7 @@ NVMe Fault Injection
|
|||
Linux's fault injection framework provides a systematic way to support
|
||||
error injection via debugfs in the /sys/kernel/debug directory. When
|
||||
enabled, the default NVME_SC_INVALID_OPCODE with no retry will be
|
||||
injected into the nvme_end_request. Users can change the default status
|
||||
injected into the nvme_try_complete_req. Users can change the default status
|
||||
code and no retry flag via the debugfs. The list of Generic Command
|
||||
Status can be found in include/linux/nvme.h
|
||||
|
||||
|
|
|
@ -3205,6 +3205,7 @@ S: Maintained
|
|||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
|
||||
F: block/
|
||||
F: drivers/block/
|
||||
F: include/linux/blk*
|
||||
F: kernel/trace/blktrace.c
|
||||
F: lib/sbitmap.c
|
||||
|
||||
|
|
|
@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg)
|
|||
kfree(bfqg);
|
||||
}
|
||||
|
||||
void bfqg_and_blkg_get(struct bfq_group *bfqg)
|
||||
static void bfqg_and_blkg_get(struct bfq_group *bfqg)
|
||||
{
|
||||
/* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
|
||||
bfqg_get(bfqg);
|
||||
|
|
|
@ -986,7 +986,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
|
|||
struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
|
||||
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
|
||||
void bfqg_and_blkg_get(struct bfq_group *bfqg);
|
||||
void bfqg_and_blkg_put(struct bfq_group *bfqg);
|
||||
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
|
|
|
@ -533,9 +533,7 @@ static void bfq_get_entity(struct bfq_entity *entity)
|
|||
bfqq->ref++;
|
||||
bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
|
||||
bfqq, bfqq->ref);
|
||||
} else
|
||||
bfqg_and_blkg_get(container_of(entity, struct bfq_group,
|
||||
entity));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -649,14 +647,8 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
|
|||
|
||||
entity->on_st_or_in_serv = false;
|
||||
st->wsum -= entity->weight;
|
||||
if (is_in_service)
|
||||
return;
|
||||
|
||||
if (bfqq)
|
||||
if (bfqq && !is_in_service)
|
||||
bfq_put_queue(bfqq);
|
||||
else
|
||||
bfqg_and_blkg_put(container_of(entity, struct bfq_group,
|
||||
entity));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -740,8 +740,8 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
|
|||
struct page *page, unsigned int len, unsigned int off,
|
||||
bool *same_page)
|
||||
{
|
||||
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) +
|
||||
bv->bv_offset + bv->bv_len - 1;
|
||||
size_t bv_end = bv->bv_offset + bv->bv_len;
|
||||
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
|
||||
phys_addr_t page_addr = page_to_phys(page);
|
||||
|
||||
if (vec_end_addr + 1 != page_addr + off)
|
||||
|
@ -750,9 +750,9 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
|
|||
return false;
|
||||
|
||||
*same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
|
||||
if (!*same_page && pfn_to_page(PFN_DOWN(vec_end_addr)) + 1 != page)
|
||||
return false;
|
||||
if (*same_page)
|
||||
return true;
|
||||
return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1152,13 +1152,15 @@ int blkcg_init_queue(struct request_queue *q)
|
|||
if (preloaded)
|
||||
radix_tree_preload_end();
|
||||
|
||||
ret = blk_iolatency_init(q);
|
||||
if (ret)
|
||||
goto err_destroy_all;
|
||||
|
||||
ret = blk_throtl_init(q);
|
||||
if (ret)
|
||||
goto err_destroy_all;
|
||||
|
||||
ret = blk_iolatency_init(q);
|
||||
if (ret) {
|
||||
blk_throtl_exit(q);
|
||||
goto err_destroy_all;
|
||||
}
|
||||
return 0;
|
||||
|
||||
err_destroy_all:
|
||||
|
|
|
@ -154,7 +154,7 @@ static inline unsigned get_max_io_size(struct request_queue *q,
|
|||
if (max_sectors > start_offset)
|
||||
return max_sectors - start_offset;
|
||||
|
||||
return sectors & (lbs - 1);
|
||||
return sectors & ~(lbs - 1);
|
||||
}
|
||||
|
||||
static inline unsigned get_max_segment_size(const struct request_queue *q,
|
||||
|
@ -533,10 +533,17 @@ int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
|
|||
}
|
||||
EXPORT_SYMBOL(__blk_rq_map_sg);
|
||||
|
||||
static inline unsigned int blk_rq_get_max_segments(struct request *rq)
|
||||
{
|
||||
if (req_op(rq) == REQ_OP_DISCARD)
|
||||
return queue_max_discard_segments(rq->q);
|
||||
return queue_max_segments(rq->q);
|
||||
}
|
||||
|
||||
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
|
||||
unsigned int nr_phys_segs)
|
||||
{
|
||||
if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(req->q))
|
||||
if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
|
||||
goto no_merge;
|
||||
|
||||
if (blk_integrity_merge_bio(req->q, req, bio) == false)
|
||||
|
@ -624,7 +631,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
|
|||
return 0;
|
||||
|
||||
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
|
||||
if (total_phys_segments > queue_max_segments(q))
|
||||
if (total_phys_segments > blk_rq_get_max_segments(req))
|
||||
return 0;
|
||||
|
||||
if (blk_integrity_merge_rq(q, req, next) == false)
|
||||
|
|
|
@ -78,6 +78,15 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
|
|||
return;
|
||||
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
|
||||
/*
|
||||
* Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
|
||||
* in blk_mq_run_hw_queue(). Its pair is the barrier in
|
||||
* blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
|
||||
* meantime new request added to hctx->dispatch is missed to check in
|
||||
* blk_mq_run_hw_queue().
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
}
|
||||
|
||||
|
|
|
@ -1437,6 +1437,15 @@ out:
|
|||
list_splice_tail_init(list, &hctx->dispatch);
|
||||
spin_unlock(&hctx->lock);
|
||||
|
||||
/*
|
||||
* Order adding requests to hctx->dispatch and checking
|
||||
* SCHED_RESTART flag. The pair of this smp_mb() is the one
|
||||
* in blk_mq_sched_restart(). Avoid restart code path to
|
||||
* miss the new added requests to hctx->dispatch, meantime
|
||||
* SCHED_RESTART is observed here.
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
/*
|
||||
* If SCHED_RESTART was set by the caller of this function and
|
||||
* it is no longer set that means that it was cleared by another
|
||||
|
@ -1834,6 +1843,7 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
|||
/**
|
||||
* blk_mq_request_bypass_insert - Insert a request at dispatch list.
|
||||
* @rq: Pointer to request to be inserted.
|
||||
* @at_head: true if the request should be inserted at the head of the list.
|
||||
* @run_queue: If we should run the hardware queue after inserting the request.
|
||||
*
|
||||
* Should only be used carefully, when the caller knows we want to
|
||||
|
@ -2016,7 +2026,8 @@ insert:
|
|||
if (bypass_insert)
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
blk_mq_request_bypass_insert(rq, false, run_queue);
|
||||
blk_mq_sched_insert_request(rq, false, run_queue, false);
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -378,7 +378,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
|
|||
bset->timeout_fn = timeout;
|
||||
|
||||
set = &bset->tag_set;
|
||||
set->ops = &bsg_mq_ops,
|
||||
set->ops = &bsg_mq_ops;
|
||||
set->nr_hw_queues = 1;
|
||||
set->queue_depth = 128;
|
||||
set->numa_node = NUMA_NO_NODE;
|
||||
|
|
|
@ -878,6 +878,7 @@ static void loop_config_discard(struct loop_device *lo)
|
|||
struct file *file = lo->lo_backing_file;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct request_queue *q = lo->lo_queue;
|
||||
u32 granularity, max_discard_sectors;
|
||||
|
||||
/*
|
||||
* If the backing device is a block device, mirror its zeroing
|
||||
|
@ -890,11 +891,10 @@ static void loop_config_discard(struct loop_device *lo)
|
|||
struct request_queue *backingq;
|
||||
|
||||
backingq = bdev_get_queue(inode->i_bdev);
|
||||
blk_queue_max_discard_sectors(q,
|
||||
backingq->limits.max_write_zeroes_sectors);
|
||||
|
||||
blk_queue_max_write_zeroes_sectors(q,
|
||||
backingq->limits.max_write_zeroes_sectors);
|
||||
max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
|
||||
granularity = backingq->limits.discard_granularity ?:
|
||||
queue_physical_block_size(backingq);
|
||||
|
||||
/*
|
||||
* We use punch hole to reclaim the free space used by the
|
||||
|
@ -903,24 +903,27 @@ static void loop_config_discard(struct loop_device *lo)
|
|||
* useful information.
|
||||
*/
|
||||
} else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) {
|
||||
q->limits.discard_granularity = 0;
|
||||
q->limits.discard_alignment = 0;
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
blk_queue_max_write_zeroes_sectors(q, 0);
|
||||
max_discard_sectors = 0;
|
||||
granularity = 0;
|
||||
|
||||
} else {
|
||||
q->limits.discard_granularity = inode->i_sb->s_blocksize;
|
||||
q->limits.discard_alignment = 0;
|
||||
|
||||
blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
|
||||
blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
|
||||
max_discard_sectors = UINT_MAX >> 9;
|
||||
granularity = inode->i_sb->s_blocksize;
|
||||
}
|
||||
|
||||
if (q->limits.max_write_zeroes_sectors)
|
||||
if (max_discard_sectors) {
|
||||
q->limits.discard_granularity = granularity;
|
||||
blk_queue_max_discard_sectors(q, max_discard_sectors);
|
||||
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
else
|
||||
} else {
|
||||
q->limits.discard_granularity = 0;
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
blk_queue_max_write_zeroes_sectors(q, 0);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
|
||||
}
|
||||
q->limits.discard_alignment = 0;
|
||||
}
|
||||
|
||||
static void loop_unprepare_queue(struct loop_device *lo)
|
||||
{
|
||||
|
|
|
@ -1147,7 +1147,7 @@ static int null_handle_rq(struct nullb_cmd *cmd)
|
|||
len = bvec.bv_len;
|
||||
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
|
||||
op_is_write(req_op(rq)), sector,
|
||||
req_op(rq) & REQ_FUA);
|
||||
rq->cmd_flags & REQ_FUA);
|
||||
if (err) {
|
||||
spin_unlock_irq(&nullb->lock);
|
||||
return err;
|
||||
|
|
|
@ -148,7 +148,8 @@ static int process_rdma(struct rtrs_srv *sess,
|
|||
/* Generate bio with pages pointing to the rdma buffer */
|
||||
bio = rnbd_bio_map_kern(data, sess_dev->rnbd_dev->ibd_bio_set, datalen, GFP_KERNEL);
|
||||
if (IS_ERR(bio)) {
|
||||
rnbd_srv_err(sess_dev, "Failed to generate bio, err: %ld\n", PTR_ERR(bio));
|
||||
err = PTR_ERR(bio);
|
||||
rnbd_srv_err(sess_dev, "Failed to generate bio, err: %d\n", err);
|
||||
goto sess_dev_put;
|
||||
}
|
||||
|
||||
|
|
|
@ -126,6 +126,18 @@ static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
|
|||
if (!range)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Single max discard segment means multi-range discard isn't
|
||||
* supported, and block layer only runs contiguity merge like
|
||||
* normal RW request. So we can't reply on bio for retrieving
|
||||
* each range info.
|
||||
*/
|
||||
if (queue_max_discard_segments(req->q) == 1) {
|
||||
range[0].flags = cpu_to_le32(flags);
|
||||
range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req));
|
||||
range[0].sector = cpu_to_le64(blk_rq_pos(req));
|
||||
n = 1;
|
||||
} else {
|
||||
__rq_for_each_bio(bio, req) {
|
||||
u64 sector = bio->bi_iter.bi_sector;
|
||||
u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
|
||||
|
@ -135,6 +147,9 @@ static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
|
|||
range[n].sector = cpu_to_le64(sector);
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(n != segments);
|
||||
|
||||
req->special_vec.bv_page = virt_to_page(range);
|
||||
req->special_vec.bv_offset = offset_in_page(range);
|
||||
|
|
|
@ -241,17 +241,6 @@ static blk_status_t nvme_error_status(u16 status)
|
|||
}
|
||||
}
|
||||
|
||||
static inline bool nvme_req_needs_retry(struct request *req)
|
||||
{
|
||||
if (blk_noretry_request(req))
|
||||
return false;
|
||||
if (nvme_req(req)->status & NVME_SC_DNR)
|
||||
return false;
|
||||
if (nvme_req(req)->retries >= nvme_max_retries)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void nvme_retry_req(struct request *req)
|
||||
{
|
||||
struct nvme_ns *ns = req->q->queuedata;
|
||||
|
@ -268,33 +257,66 @@ static void nvme_retry_req(struct request *req)
|
|||
blk_mq_delay_kick_requeue_list(req->q, delay);
|
||||
}
|
||||
|
||||
void nvme_complete_rq(struct request *req)
|
||||
enum nvme_disposition {
|
||||
COMPLETE,
|
||||
RETRY,
|
||||
FAILOVER,
|
||||
};
|
||||
|
||||
static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
|
||||
{
|
||||
if (likely(nvme_req(req)->status == 0))
|
||||
return COMPLETE;
|
||||
|
||||
if (blk_noretry_request(req) ||
|
||||
(nvme_req(req)->status & NVME_SC_DNR) ||
|
||||
nvme_req(req)->retries >= nvme_max_retries)
|
||||
return COMPLETE;
|
||||
|
||||
if (req->cmd_flags & REQ_NVME_MPATH) {
|
||||
if (nvme_is_path_error(nvme_req(req)->status) ||
|
||||
blk_queue_dying(req->q))
|
||||
return FAILOVER;
|
||||
} else {
|
||||
if (blk_queue_dying(req->q))
|
||||
return COMPLETE;
|
||||
}
|
||||
|
||||
return RETRY;
|
||||
}
|
||||
|
||||
static inline void nvme_end_req(struct request *req)
|
||||
{
|
||||
blk_status_t status = nvme_error_status(nvme_req(req)->status);
|
||||
|
||||
trace_nvme_complete_rq(req);
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
|
||||
req_op(req) == REQ_OP_ZONE_APPEND)
|
||||
req->__sector = nvme_lba_to_sect(req->q->queuedata,
|
||||
le64_to_cpu(nvme_req(req)->result.u64));
|
||||
|
||||
nvme_trace_bio_complete(req, status);
|
||||
blk_mq_end_request(req, status);
|
||||
}
|
||||
|
||||
void nvme_complete_rq(struct request *req)
|
||||
{
|
||||
trace_nvme_complete_rq(req);
|
||||
nvme_cleanup_cmd(req);
|
||||
|
||||
if (nvme_req(req)->ctrl->kas)
|
||||
nvme_req(req)->ctrl->comp_seen = true;
|
||||
|
||||
if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
|
||||
if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req))
|
||||
switch (nvme_decide_disposition(req)) {
|
||||
case COMPLETE:
|
||||
nvme_end_req(req);
|
||||
return;
|
||||
|
||||
if (!blk_queue_dying(req->q)) {
|
||||
case RETRY:
|
||||
nvme_retry_req(req);
|
||||
return;
|
||||
case FAILOVER:
|
||||
nvme_failover_req(req);
|
||||
return;
|
||||
}
|
||||
} else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
|
||||
req_op(req) == REQ_OP_ZONE_APPEND) {
|
||||
req->__sector = nvme_lba_to_sect(req->q->queuedata,
|
||||
le64_to_cpu(nvme_req(req)->result.u64));
|
||||
}
|
||||
|
||||
nvme_trace_bio_complete(req, status);
|
||||
blk_mq_end_request(req, status);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_complete_rq);
|
||||
|
||||
|
@ -2075,7 +2097,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
|
|||
}
|
||||
}
|
||||
|
||||
if (iob)
|
||||
if (iob && !blk_queue_is_zoned(ns->queue))
|
||||
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
|
||||
nvme_update_disk_info(disk, ns, id);
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
|
@ -2965,14 +2987,14 @@ static struct nvme_cel *nvme_find_cel(struct nvme_ctrl *ctrl, u8 csi)
|
|||
{
|
||||
struct nvme_cel *cel, *ret = NULL;
|
||||
|
||||
spin_lock(&ctrl->lock);
|
||||
spin_lock_irq(&ctrl->lock);
|
||||
list_for_each_entry(cel, &ctrl->cels, entry) {
|
||||
if (cel->csi == csi) {
|
||||
ret = cel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&ctrl->lock);
|
||||
spin_unlock_irq(&ctrl->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -2999,9 +3021,9 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
|
|||
|
||||
cel->csi = csi;
|
||||
|
||||
spin_lock(&ctrl->lock);
|
||||
spin_lock_irq(&ctrl->lock);
|
||||
list_add_tail(&cel->entry, &ctrl->cels);
|
||||
spin_unlock(&ctrl->lock);
|
||||
spin_unlock_irq(&ctrl->lock);
|
||||
out:
|
||||
*log = &cel->log;
|
||||
return 0;
|
||||
|
|
|
@ -2035,7 +2035,7 @@ done:
|
|||
}
|
||||
|
||||
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
|
||||
if (!nvme_end_request(rq, status, result))
|
||||
if (!nvme_try_complete_req(rq, status, result))
|
||||
nvme_fc_complete_rq(rq);
|
||||
|
||||
check_error:
|
||||
|
@ -2078,7 +2078,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
|
|||
if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) {
|
||||
dev_err(ctrl->dev,
|
||||
"FCP Op failed - cmdiu dma mapping failed.\n");
|
||||
ret = EFAULT;
|
||||
ret = -EFAULT;
|
||||
goto out_on_error;
|
||||
}
|
||||
|
||||
|
@ -2088,7 +2088,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
|
|||
if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) {
|
||||
dev_err(ctrl->dev,
|
||||
"FCP Op failed - rspiu dma mapping failed.\n");
|
||||
ret = EFAULT;
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
atomic_set(&op->state, FCPOP_STATE_IDLE);
|
||||
|
|
|
@ -65,51 +65,30 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
|
|||
}
|
||||
}
|
||||
|
||||
bool nvme_failover_req(struct request *req)
|
||||
void nvme_failover_req(struct request *req)
|
||||
{
|
||||
struct nvme_ns *ns = req->q->queuedata;
|
||||
u16 status = nvme_req(req)->status;
|
||||
u16 status = nvme_req(req)->status & 0x7ff;
|
||||
unsigned long flags;
|
||||
|
||||
switch (status & 0x7ff) {
|
||||
case NVME_SC_ANA_TRANSITION:
|
||||
case NVME_SC_ANA_INACCESSIBLE:
|
||||
case NVME_SC_ANA_PERSISTENT_LOSS:
|
||||
/*
|
||||
* If we got back an ANA error we know the controller is alive,
|
||||
* but not ready to serve this namespaces. The spec suggests
|
||||
* we should update our general state here, but due to the fact
|
||||
* that the admin and I/O queues are not serialized that is
|
||||
* fundamentally racy. So instead just clear the current path,
|
||||
* mark the the path as pending and kick of a re-read of the ANA
|
||||
* log page ASAP.
|
||||
*/
|
||||
nvme_mpath_clear_current_path(ns);
|
||||
if (ns->ctrl->ana_log_buf) {
|
||||
|
||||
/*
|
||||
* If we got back an ANA error, we know the controller is alive but not
|
||||
* ready to serve this namespace. Kick of a re-read of the ANA
|
||||
* information page, and just try any other available path for now.
|
||||
*/
|
||||
if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
|
||||
set_bit(NVME_NS_ANA_PENDING, &ns->flags);
|
||||
queue_work(nvme_wq, &ns->ctrl->ana_work);
|
||||
}
|
||||
break;
|
||||
case NVME_SC_HOST_PATH_ERROR:
|
||||
case NVME_SC_HOST_ABORTED_CMD:
|
||||
/*
|
||||
* Temporary transport disruption in talking to the controller.
|
||||
* Try to send on a new path.
|
||||
*/
|
||||
nvme_mpath_clear_current_path(ns);
|
||||
break;
|
||||
default:
|
||||
/* This was a non-ANA error so follow the normal error path. */
|
||||
return false;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&ns->head->requeue_lock, flags);
|
||||
blk_steal_bios(&ns->head->requeue_list, req);
|
||||
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
|
||||
blk_mq_end_request(req, 0);
|
||||
|
||||
blk_mq_end_request(req, 0);
|
||||
kblockd_schedule_work(&ns->head->requeue_work);
|
||||
return true;
|
||||
}
|
||||
|
||||
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
|
||||
|
@ -233,7 +212,7 @@ static struct nvme_ns *nvme_next_ns(struct nvme_ns_head *head,
|
|||
static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
|
||||
int node, struct nvme_ns *old)
|
||||
{
|
||||
struct nvme_ns *ns, *found, *fallback = NULL;
|
||||
struct nvme_ns *ns, *found = NULL;
|
||||
|
||||
if (list_is_singular(&head->list)) {
|
||||
if (nvme_path_is_disabled(old))
|
||||
|
@ -252,18 +231,22 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
|
|||
goto out;
|
||||
}
|
||||
if (ns->ana_state == NVME_ANA_NONOPTIMIZED)
|
||||
fallback = ns;
|
||||
found = ns;
|
||||
}
|
||||
|
||||
/* No optimized path found, re-check the current path */
|
||||
/*
|
||||
* The loop above skips the current path for round-robin semantics.
|
||||
* Fall back to the current path if either:
|
||||
* - no other optimized path found and current is optimized,
|
||||
* - no other usable path found and current is usable.
|
||||
*/
|
||||
if (!nvme_path_is_disabled(old) &&
|
||||
old->ana_state == NVME_ANA_OPTIMIZED) {
|
||||
found = old;
|
||||
goto out;
|
||||
}
|
||||
if (!fallback)
|
||||
(old->ana_state == NVME_ANA_OPTIMIZED ||
|
||||
(!found && old->ana_state == NVME_ANA_NONOPTIMIZED)))
|
||||
return old;
|
||||
|
||||
if (!found)
|
||||
return NULL;
|
||||
found = fallback;
|
||||
out:
|
||||
rcu_assign_pointer(head->current_path[node], found);
|
||||
return found;
|
||||
|
|
|
@ -523,7 +523,31 @@ static inline u32 nvme_bytes_to_numd(size_t len)
|
|||
return (len >> 2) - 1;
|
||||
}
|
||||
|
||||
static inline bool nvme_end_request(struct request *req, __le16 status,
|
||||
static inline bool nvme_is_ana_error(u16 status)
|
||||
{
|
||||
switch (status & 0x7ff) {
|
||||
case NVME_SC_ANA_TRANSITION:
|
||||
case NVME_SC_ANA_INACCESSIBLE:
|
||||
case NVME_SC_ANA_PERSISTENT_LOSS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool nvme_is_path_error(u16 status)
|
||||
{
|
||||
/* check for a status code type of 'path related status' */
|
||||
return (status & 0x700) == 0x300;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill in the status and result information from the CQE, and then figure out
|
||||
* if blk-mq will need to use IPI magic to complete the request, and if yes do
|
||||
* so. If not let the caller complete the request without an indirect function
|
||||
* call.
|
||||
*/
|
||||
static inline bool nvme_try_complete_req(struct request *req, __le16 status,
|
||||
union nvme_result result)
|
||||
{
|
||||
struct nvme_request *rq = nvme_req(req);
|
||||
|
@ -629,7 +653,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
|
|||
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
|
||||
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
|
||||
struct nvme_ctrl *ctrl, int *flags);
|
||||
bool nvme_failover_req(struct request *req);
|
||||
void nvme_failover_req(struct request *req);
|
||||
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
|
||||
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
|
||||
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
|
||||
|
@ -688,9 +712,8 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
|
|||
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
|
||||
}
|
||||
|
||||
static inline bool nvme_failover_req(struct request *req)
|
||||
static inline void nvme_failover_req(struct request *req)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
|
|
|
@ -120,7 +120,7 @@ struct nvme_dev {
|
|||
unsigned max_qid;
|
||||
unsigned io_queues[HCTX_MAX_TYPES];
|
||||
unsigned int num_vecs;
|
||||
u16 q_depth;
|
||||
u32 q_depth;
|
||||
int io_sqes;
|
||||
u32 db_stride;
|
||||
void __iomem *bar;
|
||||
|
@ -157,13 +157,13 @@ struct nvme_dev {
|
|||
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
int ret;
|
||||
u16 n;
|
||||
u32 n;
|
||||
|
||||
ret = kstrtou16(val, 10, &n);
|
||||
ret = kstrtou32(val, 10, &n);
|
||||
if (ret != 0 || n < 2)
|
||||
return -EINVAL;
|
||||
|
||||
return param_set_ushort(val, kp);
|
||||
return param_set_uint(val, kp);
|
||||
}
|
||||
|
||||
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
|
||||
|
@ -195,7 +195,7 @@ struct nvme_queue {
|
|||
dma_addr_t sq_dma_addr;
|
||||
dma_addr_t cq_dma_addr;
|
||||
u32 __iomem *q_db;
|
||||
u16 q_depth;
|
||||
u32 q_depth;
|
||||
u16 cq_vector;
|
||||
u16 sq_tail;
|
||||
u16 cq_head;
|
||||
|
@ -961,7 +961,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
|
|||
|
||||
req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);
|
||||
trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
|
||||
if (!nvme_end_request(req, cqe->status, cqe->result))
|
||||
if (!nvme_try_complete_req(req, cqe->status, cqe->result))
|
||||
nvme_pci_complete_rq(req);
|
||||
}
|
||||
|
||||
|
@ -2320,7 +2320,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
|
|||
|
||||
dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
|
||||
|
||||
dev->q_depth = min_t(u16, NVME_CAP_MQES(dev->ctrl.cap) + 1,
|
||||
dev->q_depth = min_t(u32, NVME_CAP_MQES(dev->ctrl.cap) + 1,
|
||||
io_queue_depth);
|
||||
dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */
|
||||
dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
|
||||
|
@ -2460,7 +2460,8 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
|
|||
static int nvme_setup_prp_pools(struct nvme_dev *dev)
|
||||
{
|
||||
dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
|
||||
PAGE_SIZE, PAGE_SIZE, 0);
|
||||
NVME_CTRL_PAGE_SIZE,
|
||||
NVME_CTRL_PAGE_SIZE, 0);
|
||||
if (!dev->prp_page_pool)
|
||||
return -ENOMEM;
|
||||
|
||||
|
|
|
@ -1189,7 +1189,7 @@ static void nvme_rdma_end_request(struct nvme_rdma_request *req)
|
|||
|
||||
if (!refcount_dec_and_test(&req->ref))
|
||||
return;
|
||||
if (!nvme_end_request(rq, req->status, req->result))
|
||||
if (!nvme_try_complete_req(rq, req->status, req->result))
|
||||
nvme_rdma_complete_rq(rq);
|
||||
}
|
||||
|
||||
|
|
|
@ -481,7 +481,7 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!nvme_end_request(rq, cqe->status, cqe->result))
|
||||
if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
|
||||
nvme_complete_rq(rq);
|
||||
queue->nr_cqe++;
|
||||
|
||||
|
@ -672,7 +672,7 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status)
|
|||
{
|
||||
union nvme_result res = {};
|
||||
|
||||
if (!nvme_end_request(rq, cpu_to_le16(status << 1), res))
|
||||
if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res))
|
||||
nvme_complete_rq(rq);
|
||||
}
|
||||
|
||||
|
|
|
@ -1136,6 +1136,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
|
|||
up_write(&nvmet_config_sem);
|
||||
|
||||
kfree_rcu(new_model, rcuhead);
|
||||
kfree(new_model_number);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
|
|
@ -397,6 +397,9 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
|
|||
|
||||
static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
|
||||
{
|
||||
if (unlikely(ctrl->kato == 0))
|
||||
return;
|
||||
|
||||
pr_debug("ctrl %d start keep-alive timer for %d secs\n",
|
||||
ctrl->cntlid, ctrl->kato);
|
||||
|
||||
|
@ -406,6 +409,9 @@ static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
|
|||
|
||||
static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
|
||||
{
|
||||
if (unlikely(ctrl->kato == 0))
|
||||
return;
|
||||
|
||||
pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
|
||||
|
||||
cancel_delayed_work_sync(&ctrl->ka_work);
|
||||
|
|
|
@ -115,7 +115,7 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
|
|||
return;
|
||||
}
|
||||
|
||||
if (!nvme_end_request(rq, cqe->status, cqe->result))
|
||||
if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
|
||||
nvme_loop_complete_rq(rq);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -165,7 +165,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
|
|||
|
||||
req->cqe->result = nvme_req(rq)->result;
|
||||
nvmet_req_complete(req, status);
|
||||
blk_put_request(rq);
|
||||
blk_mq_free_request(rq);
|
||||
}
|
||||
|
||||
static void nvmet_passthru_req_done(struct request *rq,
|
||||
|
@ -175,7 +175,7 @@ static void nvmet_passthru_req_done(struct request *rq,
|
|||
|
||||
req->cqe->result = nvme_req(rq)->result;
|
||||
nvmet_req_complete(req, nvme_req(rq)->status);
|
||||
blk_put_request(rq);
|
||||
blk_mq_free_request(rq);
|
||||
}
|
||||
|
||||
static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
|
||||
|
@ -230,7 +230,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
|
|||
if (unlikely(!ns)) {
|
||||
pr_err("failed to get passthru ns nsid:%u\n", nsid);
|
||||
status = NVME_SC_INVALID_NS | NVME_SC_DNR;
|
||||
goto fail_out;
|
||||
goto out;
|
||||
}
|
||||
|
||||
q = ns->queue;
|
||||
|
@ -238,16 +238,15 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
|
|||
|
||||
rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
|
||||
if (IS_ERR(rq)) {
|
||||
rq = NULL;
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto fail_out;
|
||||
goto out_put_ns;
|
||||
}
|
||||
|
||||
if (req->sg_cnt) {
|
||||
ret = nvmet_passthru_map_sg(req, rq);
|
||||
if (unlikely(ret)) {
|
||||
status = NVME_SC_INTERNAL;
|
||||
goto fail_out;
|
||||
goto out_put_req;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -274,11 +273,13 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
|
|||
|
||||
return;
|
||||
|
||||
fail_out:
|
||||
out_put_req:
|
||||
blk_mq_free_request(rq);
|
||||
out_put_ns:
|
||||
if (ns)
|
||||
nvme_put_ns(ns);
|
||||
out:
|
||||
nvmet_req_complete(req, status);
|
||||
blk_put_request(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -326,6 +327,10 @@ static u16 nvmet_setup_passthru_command(struct nvmet_req *req)
|
|||
|
||||
u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
|
||||
{
|
||||
/* Reject any commands with non-sgl flags set (ie. fused commands) */
|
||||
if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
|
||||
return NVME_SC_INVALID_FIELD;
|
||||
|
||||
switch (req->cmd->common.opcode) {
|
||||
case nvme_cmd_resv_register:
|
||||
case nvme_cmd_resv_report:
|
||||
|
@ -396,6 +401,10 @@ static u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
|
|||
|
||||
u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
|
||||
{
|
||||
/* Reject any commands with non-sgl flags set (ie. fused commands) */
|
||||
if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
|
||||
return NVME_SC_INVALID_FIELD;
|
||||
|
||||
/*
|
||||
* Passthru all vendor specific commands
|
||||
*/
|
||||
|
|
Загрузка…
Ссылка в новой задаче