for-linus-20190524
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAlzobRYQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgptwcD/99hOkZWNqX0FKjkrofywXBjX//UqBb2OQS /7vBoWgSMN+SXDI08YdePCjreviDs4VjbP1V1EgBTbb0HpEApbAuTqx7fszbsJLi Ld6pMkDpRp6RKttmaDW6iT39gZC3w9wOYusbC8pfrVbvhXm9CRLum78Q8h2rdl0c HzIMopvGvvJazTYj/ZD8L/83Z6oqHPWojnXPIK1CNw6PQ4+A1frD85WitW4Fragp T5lx0ZBPLHe+1VPoIQg3Rq2ZZcQW2Kfm5mytw9sDG6KbG5/Vj7+jtF6X36QvuFhZ fU2zWAN7zFVE0FvXxS/ze5lFI8/efkwIAa2xYvkkFWJ+FNBkOrNrhN1JgNyMQgTe 2r4dLPp3XGcfvCCndTnQdwNAGuc878X+bGwlxb1wjTRcElJRpflE1wBx2kzzdnjl zD2dmUgxURJvY8clKbq/bpgoxLKtqGCsJy7mHOyCUTpflP7YrpvJnUcc14PARnDt V2JlnTVNO2r9oZ7IBHPWtNLmFjZhba5BaQDD1EtUUgO3fId4wL1rJ52j5K9/2eg7 yC4qdKGZLQoHGTnn8qBY+BS8/bMeMxu6Lx4RqtgVa8r+dkKFhblIdOmYZnyevxSf B5rtt8CJUU7d3edxZHp9jFiYVbmrc6CjIhRLYZyrLfQGCL3F6qFzozYd0Lwiwxhz gx2TTsDfFg== =lGyw -----END PGP SIGNATURE----- Merge tag 'for-linus-20190524' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: - NVMe pull request from Keith, with fixes from a few folks. - bio and sbitmap before atomic barrier fixes (Andrea) - Hang fix for blk-mq freeze and unfreeze (Bob) - Single segment count regression fix (Christoph) - AoE now has a new maintainer - tools/io_uring/ Makefile fix, and sync with liburing (me) * tag 'for-linus-20190524' of git://git.kernel.dk/linux-block: (23 commits) tools/io_uring: sync with liburing tools/io_uring: fix Makefile for pthread library link blk-mq: fix hang caused by freeze/unfreeze sequence block: remove the bi_seg_{front,back}_size fields in struct bio block: remove the segment size check in bio_will_gap block: force an unlimited segment size on queues with a virt boundary block: don't decrement nr_phys_segments for physically contigous segments sbitmap: fix improper use of smp_mb__before_atomic() bio: fix improper use of smp_mb__before_atomic() aoe: list new maintainer for aoe driver nvme-pci: use blk-mq mapping for unmanaged irqs nvme: update MAINTAINERS nvme: copy MTFA field from identify controller nvme: fix memory leak for power latency tolerance nvme: release namespace SRCU protection before performing controller ioctls nvme: merge nvme_ns_ioctl into nvme_ioctl nvme: remove the ifdef around nvme_nvm_ioctl nvme: fix srcu locking on error return in nvme_get_ns_from_disk nvme: Fix known effects nvme-pci: Sync queues on reset ...
This commit is contained in:
Коммит
7fbc78e315
|
@ -2627,7 +2627,7 @@ F: Documentation/devicetree/bindings/eeprom/at24.txt
|
||||||
F: drivers/misc/eeprom/at24.c
|
F: drivers/misc/eeprom/at24.c
|
||||||
|
|
||||||
ATA OVER ETHERNET (AOE) DRIVER
|
ATA OVER ETHERNET (AOE) DRIVER
|
||||||
M: "Ed L. Cashin" <ed.cashin@acm.org>
|
M: "Justin Sanders" <justin@coraid.com>
|
||||||
W: http://www.openaoe.org/
|
W: http://www.openaoe.org/
|
||||||
S: Supported
|
S: Supported
|
||||||
F: Documentation/aoe/
|
F: Documentation/aoe/
|
||||||
|
@ -11226,7 +11226,7 @@ F: drivers/video/fbdev/riva/
|
||||||
F: drivers/video/fbdev/nvidia/
|
F: drivers/video/fbdev/nvidia/
|
||||||
|
|
||||||
NVM EXPRESS DRIVER
|
NVM EXPRESS DRIVER
|
||||||
M: Keith Busch <keith.busch@intel.com>
|
M: Keith Busch <kbusch@kernel.org>
|
||||||
M: Jens Axboe <axboe@fb.com>
|
M: Jens Axboe <axboe@fb.com>
|
||||||
M: Christoph Hellwig <hch@lst.de>
|
M: Christoph Hellwig <hch@lst.de>
|
||||||
M: Sagi Grimberg <sagi@grimberg.me>
|
M: Sagi Grimberg <sagi@grimberg.me>
|
||||||
|
|
|
@ -413,7 +413,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
|
||||||
smp_rmb();
|
smp_rmb();
|
||||||
|
|
||||||
wait_event(q->mq_freeze_wq,
|
wait_event(q->mq_freeze_wq,
|
||||||
(atomic_read(&q->mq_freeze_depth) == 0 &&
|
(!q->mq_freeze_depth &&
|
||||||
(pm || (blk_pm_request_resume(q),
|
(pm || (blk_pm_request_resume(q),
|
||||||
!blk_queue_pm_only(q)))) ||
|
!blk_queue_pm_only(q)))) ||
|
||||||
blk_queue_dying(q));
|
blk_queue_dying(q));
|
||||||
|
@ -503,6 +503,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
||||||
spin_lock_init(&q->queue_lock);
|
spin_lock_init(&q->queue_lock);
|
||||||
|
|
||||||
init_waitqueue_head(&q->mq_freeze_wq);
|
init_waitqueue_head(&q->mq_freeze_wq);
|
||||||
|
mutex_init(&q->mq_freeze_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Init percpu_ref in atomic mode so that it's faster to shutdown.
|
* Init percpu_ref in atomic mode so that it's faster to shutdown.
|
||||||
|
|
|
@ -12,23 +12,6 @@
|
||||||
|
|
||||||
#include "blk.h"
|
#include "blk.h"
|
||||||
|
|
||||||
/*
|
|
||||||
* Check if the two bvecs from two bios can be merged to one segment. If yes,
|
|
||||||
* no need to check gap between the two bios since the 1st bio and the 1st bvec
|
|
||||||
* in the 2nd bio can be handled in one segment.
|
|
||||||
*/
|
|
||||||
static inline bool bios_segs_mergeable(struct request_queue *q,
|
|
||||||
struct bio *prev, struct bio_vec *prev_last_bv,
|
|
||||||
struct bio_vec *next_first_bv)
|
|
||||||
{
|
|
||||||
if (!biovec_phys_mergeable(q, prev_last_bv, next_first_bv))
|
|
||||||
return false;
|
|
||||||
if (prev->bi_seg_back_size + next_first_bv->bv_len >
|
|
||||||
queue_max_segment_size(q))
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool bio_will_gap(struct request_queue *q,
|
static inline bool bio_will_gap(struct request_queue *q,
|
||||||
struct request *prev_rq, struct bio *prev, struct bio *next)
|
struct request *prev_rq, struct bio *prev, struct bio *next)
|
||||||
{
|
{
|
||||||
|
@ -60,7 +43,7 @@ static inline bool bio_will_gap(struct request_queue *q,
|
||||||
*/
|
*/
|
||||||
bio_get_last_bvec(prev, &pb);
|
bio_get_last_bvec(prev, &pb);
|
||||||
bio_get_first_bvec(next, &nb);
|
bio_get_first_bvec(next, &nb);
|
||||||
if (bios_segs_mergeable(q, prev, &pb, &nb))
|
if (biovec_phys_mergeable(q, &pb, &nb))
|
||||||
return false;
|
return false;
|
||||||
return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
|
return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
|
||||||
}
|
}
|
||||||
|
@ -179,8 +162,7 @@ static unsigned get_max_segment_size(struct request_queue *q,
|
||||||
* variables.
|
* variables.
|
||||||
*/
|
*/
|
||||||
static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv,
|
static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv,
|
||||||
unsigned *nsegs, unsigned *last_seg_size,
|
unsigned *nsegs, unsigned *sectors, unsigned max_segs)
|
||||||
unsigned *front_seg_size, unsigned *sectors, unsigned max_segs)
|
|
||||||
{
|
{
|
||||||
unsigned len = bv->bv_len;
|
unsigned len = bv->bv_len;
|
||||||
unsigned total_len = 0;
|
unsigned total_len = 0;
|
||||||
|
@ -202,28 +184,12 @@ static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!new_nsegs)
|
if (new_nsegs) {
|
||||||
return !!len;
|
*nsegs += new_nsegs;
|
||||||
|
if (sectors)
|
||||||
/* update front segment size */
|
*sectors += total_len >> 9;
|
||||||
if (!*nsegs) {
|
|
||||||
unsigned first_seg_size;
|
|
||||||
|
|
||||||
if (new_nsegs == 1)
|
|
||||||
first_seg_size = get_max_segment_size(q, bv->bv_offset);
|
|
||||||
else
|
|
||||||
first_seg_size = queue_max_segment_size(q);
|
|
||||||
|
|
||||||
if (*front_seg_size < first_seg_size)
|
|
||||||
*front_seg_size = first_seg_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* update other varibles */
|
|
||||||
*last_seg_size = seg_size;
|
|
||||||
*nsegs += new_nsegs;
|
|
||||||
if (sectors)
|
|
||||||
*sectors += total_len >> 9;
|
|
||||||
|
|
||||||
/* split in the middle of the bvec if len != 0 */
|
/* split in the middle of the bvec if len != 0 */
|
||||||
return !!len;
|
return !!len;
|
||||||
}
|
}
|
||||||
|
@ -235,8 +201,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
|
||||||
{
|
{
|
||||||
struct bio_vec bv, bvprv, *bvprvp = NULL;
|
struct bio_vec bv, bvprv, *bvprvp = NULL;
|
||||||
struct bvec_iter iter;
|
struct bvec_iter iter;
|
||||||
unsigned seg_size = 0, nsegs = 0, sectors = 0;
|
unsigned nsegs = 0, sectors = 0;
|
||||||
unsigned front_seg_size = bio->bi_seg_front_size;
|
|
||||||
bool do_split = true;
|
bool do_split = true;
|
||||||
struct bio *new = NULL;
|
struct bio *new = NULL;
|
||||||
const unsigned max_sectors = get_max_io_size(q, bio);
|
const unsigned max_sectors = get_max_io_size(q, bio);
|
||||||
|
@ -260,8 +225,6 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
|
||||||
/* split in the middle of bvec */
|
/* split in the middle of bvec */
|
||||||
bv.bv_len = (max_sectors - sectors) << 9;
|
bv.bv_len = (max_sectors - sectors) << 9;
|
||||||
bvec_split_segs(q, &bv, &nsegs,
|
bvec_split_segs(q, &bv, &nsegs,
|
||||||
&seg_size,
|
|
||||||
&front_seg_size,
|
|
||||||
§ors, max_segs);
|
§ors, max_segs);
|
||||||
}
|
}
|
||||||
goto split;
|
goto split;
|
||||||
|
@ -275,12 +238,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
|
||||||
|
|
||||||
if (bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
|
if (bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
|
||||||
nsegs++;
|
nsegs++;
|
||||||
seg_size = bv.bv_len;
|
|
||||||
sectors += bv.bv_len >> 9;
|
sectors += bv.bv_len >> 9;
|
||||||
if (nsegs == 1 && seg_size > front_seg_size)
|
} else if (bvec_split_segs(q, &bv, &nsegs, §ors,
|
||||||
front_seg_size = seg_size;
|
max_segs)) {
|
||||||
} else if (bvec_split_segs(q, &bv, &nsegs, &seg_size,
|
|
||||||
&front_seg_size, §ors, max_segs)) {
|
|
||||||
goto split;
|
goto split;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -295,10 +255,6 @@ split:
|
||||||
bio = new;
|
bio = new;
|
||||||
}
|
}
|
||||||
|
|
||||||
bio->bi_seg_front_size = front_seg_size;
|
|
||||||
if (seg_size > bio->bi_seg_back_size)
|
|
||||||
bio->bi_seg_back_size = seg_size;
|
|
||||||
|
|
||||||
return do_split ? new : NULL;
|
return do_split ? new : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -353,18 +309,13 @@ EXPORT_SYMBOL(blk_queue_split);
|
||||||
static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
|
static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
|
||||||
struct bio *bio)
|
struct bio *bio)
|
||||||
{
|
{
|
||||||
struct bio_vec uninitialized_var(bv), bvprv = { NULL };
|
unsigned int nr_phys_segs = 0;
|
||||||
unsigned int seg_size, nr_phys_segs;
|
|
||||||
unsigned front_seg_size;
|
|
||||||
struct bio *fbio, *bbio;
|
|
||||||
struct bvec_iter iter;
|
struct bvec_iter iter;
|
||||||
bool new_bio = false;
|
struct bio_vec bv;
|
||||||
|
|
||||||
if (!bio)
|
if (!bio)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
front_seg_size = bio->bi_seg_front_size;
|
|
||||||
|
|
||||||
switch (bio_op(bio)) {
|
switch (bio_op(bio)) {
|
||||||
case REQ_OP_DISCARD:
|
case REQ_OP_DISCARD:
|
||||||
case REQ_OP_SECURE_ERASE:
|
case REQ_OP_SECURE_ERASE:
|
||||||
|
@ -374,42 +325,11 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fbio = bio;
|
|
||||||
seg_size = 0;
|
|
||||||
nr_phys_segs = 0;
|
|
||||||
for_each_bio(bio) {
|
for_each_bio(bio) {
|
||||||
bio_for_each_bvec(bv, bio, iter) {
|
bio_for_each_bvec(bv, bio, iter)
|
||||||
if (new_bio) {
|
bvec_split_segs(q, &bv, &nr_phys_segs, NULL, UINT_MAX);
|
||||||
if (seg_size + bv.bv_len
|
|
||||||
> queue_max_segment_size(q))
|
|
||||||
goto new_segment;
|
|
||||||
if (!biovec_phys_mergeable(q, &bvprv, &bv))
|
|
||||||
goto new_segment;
|
|
||||||
|
|
||||||
seg_size += bv.bv_len;
|
|
||||||
|
|
||||||
if (nr_phys_segs == 1 && seg_size >
|
|
||||||
front_seg_size)
|
|
||||||
front_seg_size = seg_size;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
new_segment:
|
|
||||||
bvec_split_segs(q, &bv, &nr_phys_segs, &seg_size,
|
|
||||||
&front_seg_size, NULL, UINT_MAX);
|
|
||||||
new_bio = false;
|
|
||||||
}
|
|
||||||
bbio = bio;
|
|
||||||
if (likely(bio->bi_iter.bi_size)) {
|
|
||||||
bvprv = bv;
|
|
||||||
new_bio = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fbio->bi_seg_front_size = front_seg_size;
|
|
||||||
if (seg_size > bbio->bi_seg_back_size)
|
|
||||||
bbio->bi_seg_back_size = seg_size;
|
|
||||||
|
|
||||||
return nr_phys_segs;
|
return nr_phys_segs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -429,24 +349,6 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio)
|
||||||
bio_set_flag(bio, BIO_SEG_VALID);
|
bio_set_flag(bio, BIO_SEG_VALID);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
|
|
||||||
struct bio *nxt)
|
|
||||||
{
|
|
||||||
struct bio_vec end_bv = { NULL }, nxt_bv;
|
|
||||||
|
|
||||||
if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
|
|
||||||
queue_max_segment_size(q))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (!bio_has_data(bio))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
bio_get_last_bvec(bio, &end_bv);
|
|
||||||
bio_get_first_bvec(nxt, &nxt_bv);
|
|
||||||
|
|
||||||
return biovec_phys_mergeable(q, &end_bv, &nxt_bv);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
|
static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
|
||||||
struct scatterlist *sglist)
|
struct scatterlist *sglist)
|
||||||
{
|
{
|
||||||
|
@ -706,8 +608,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
|
||||||
struct request *next)
|
struct request *next)
|
||||||
{
|
{
|
||||||
int total_phys_segments;
|
int total_phys_segments;
|
||||||
unsigned int seg_size =
|
|
||||||
req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;
|
|
||||||
|
|
||||||
if (req_gap_back_merge(req, next->bio))
|
if (req_gap_back_merge(req, next->bio))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -720,14 +620,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
|
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
|
||||||
if (blk_phys_contig_segment(q, req->biotail, next->bio)) {
|
|
||||||
if (req->nr_phys_segments == 1)
|
|
||||||
req->bio->bi_seg_front_size = seg_size;
|
|
||||||
if (next->nr_phys_segments == 1)
|
|
||||||
next->biotail->bi_seg_back_size = seg_size;
|
|
||||||
total_phys_segments--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (total_phys_segments > queue_max_segments(q))
|
if (total_phys_segments > queue_max_segments(q))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
|
@ -144,13 +144,14 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
|
||||||
|
|
||||||
void blk_freeze_queue_start(struct request_queue *q)
|
void blk_freeze_queue_start(struct request_queue *q)
|
||||||
{
|
{
|
||||||
int freeze_depth;
|
mutex_lock(&q->mq_freeze_lock);
|
||||||
|
if (++q->mq_freeze_depth == 1) {
|
||||||
freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
|
|
||||||
if (freeze_depth == 1) {
|
|
||||||
percpu_ref_kill(&q->q_usage_counter);
|
percpu_ref_kill(&q->q_usage_counter);
|
||||||
|
mutex_unlock(&q->mq_freeze_lock);
|
||||||
if (queue_is_mq(q))
|
if (queue_is_mq(q))
|
||||||
blk_mq_run_hw_queues(q, false);
|
blk_mq_run_hw_queues(q, false);
|
||||||
|
} else {
|
||||||
|
mutex_unlock(&q->mq_freeze_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
|
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
|
||||||
|
@ -199,14 +200,14 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
|
||||||
|
|
||||||
void blk_mq_unfreeze_queue(struct request_queue *q)
|
void blk_mq_unfreeze_queue(struct request_queue *q)
|
||||||
{
|
{
|
||||||
int freeze_depth;
|
mutex_lock(&q->mq_freeze_lock);
|
||||||
|
q->mq_freeze_depth--;
|
||||||
freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
|
WARN_ON_ONCE(q->mq_freeze_depth < 0);
|
||||||
WARN_ON_ONCE(freeze_depth < 0);
|
if (!q->mq_freeze_depth) {
|
||||||
if (!freeze_depth) {
|
|
||||||
percpu_ref_resurrect(&q->q_usage_counter);
|
percpu_ref_resurrect(&q->q_usage_counter);
|
||||||
wake_up_all(&q->mq_freeze_wq);
|
wake_up_all(&q->mq_freeze_wq);
|
||||||
}
|
}
|
||||||
|
mutex_unlock(&q->mq_freeze_lock);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
|
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
|
||||||
|
|
||||||
|
|
|
@ -310,6 +310,9 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
|
||||||
__func__, max_size);
|
__func__, max_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* see blk_queue_virt_boundary() for the explanation */
|
||||||
|
WARN_ON_ONCE(q->limits.virt_boundary_mask);
|
||||||
|
|
||||||
q->limits.max_segment_size = max_size;
|
q->limits.max_segment_size = max_size;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_queue_max_segment_size);
|
EXPORT_SYMBOL(blk_queue_max_segment_size);
|
||||||
|
@ -742,6 +745,14 @@ EXPORT_SYMBOL(blk_queue_segment_boundary);
|
||||||
void blk_queue_virt_boundary(struct request_queue *q, unsigned long mask)
|
void blk_queue_virt_boundary(struct request_queue *q, unsigned long mask)
|
||||||
{
|
{
|
||||||
q->limits.virt_boundary_mask = mask;
|
q->limits.virt_boundary_mask = mask;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Devices that require a virtual boundary do not support scatter/gather
|
||||||
|
* I/O natively, but instead require a descriptor list entry for each
|
||||||
|
* page (which might not be idential to the Linux PAGE_SIZE). Because
|
||||||
|
* of that they are not limited by our notion of "segment size".
|
||||||
|
*/
|
||||||
|
q->limits.max_segment_size = UINT_MAX;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_queue_virt_boundary);
|
EXPORT_SYMBOL(blk_queue_virt_boundary);
|
||||||
|
|
||||||
|
|
|
@ -1257,9 +1257,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
effects |= nvme_known_admin_effects(opcode);
|
|
||||||
if (ctrl->effects)
|
if (ctrl->effects)
|
||||||
effects = le32_to_cpu(ctrl->effects->acs[opcode]);
|
effects = le32_to_cpu(ctrl->effects->acs[opcode]);
|
||||||
|
effects |= nvme_known_admin_effects(opcode);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For simplicity, IO to all namespaces is quiesced even if the command
|
* For simplicity, IO to all namespaces is quiesced even if the command
|
||||||
|
@ -1361,9 +1361,14 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_NVME_MULTIPATH
|
#ifdef CONFIG_NVME_MULTIPATH
|
||||||
if (disk->fops == &nvme_ns_head_ops) {
|
if (disk->fops == &nvme_ns_head_ops) {
|
||||||
|
struct nvme_ns *ns;
|
||||||
|
|
||||||
*head = disk->private_data;
|
*head = disk->private_data;
|
||||||
*srcu_idx = srcu_read_lock(&(*head)->srcu);
|
*srcu_idx = srcu_read_lock(&(*head)->srcu);
|
||||||
return nvme_find_path(*head);
|
ns = nvme_find_path(*head);
|
||||||
|
if (!ns)
|
||||||
|
srcu_read_unlock(&(*head)->srcu, *srcu_idx);
|
||||||
|
return ns;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
*head = NULL;
|
*head = NULL;
|
||||||
|
@ -1377,42 +1382,56 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
|
||||||
srcu_read_unlock(&head->srcu, idx);
|
srcu_read_unlock(&head->srcu, idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned cmd, unsigned long arg)
|
|
||||||
{
|
|
||||||
switch (cmd) {
|
|
||||||
case NVME_IOCTL_ID:
|
|
||||||
force_successful_syscall_return();
|
|
||||||
return ns->head->ns_id;
|
|
||||||
case NVME_IOCTL_ADMIN_CMD:
|
|
||||||
return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
|
|
||||||
case NVME_IOCTL_IO_CMD:
|
|
||||||
return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
|
|
||||||
case NVME_IOCTL_SUBMIT_IO:
|
|
||||||
return nvme_submit_io(ns, (void __user *)arg);
|
|
||||||
default:
|
|
||||||
#ifdef CONFIG_NVM
|
|
||||||
if (ns->ndev)
|
|
||||||
return nvme_nvm_ioctl(ns, cmd, arg);
|
|
||||||
#endif
|
|
||||||
if (is_sed_ioctl(cmd))
|
|
||||||
return sed_ioctl(ns->ctrl->opal_dev, cmd,
|
|
||||||
(void __user *) arg);
|
|
||||||
return -ENOTTY;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
|
static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
|
||||||
unsigned int cmd, unsigned long arg)
|
unsigned int cmd, unsigned long arg)
|
||||||
{
|
{
|
||||||
struct nvme_ns_head *head = NULL;
|
struct nvme_ns_head *head = NULL;
|
||||||
|
void __user *argp = (void __user *)arg;
|
||||||
struct nvme_ns *ns;
|
struct nvme_ns *ns;
|
||||||
int srcu_idx, ret;
|
int srcu_idx, ret;
|
||||||
|
|
||||||
ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
|
ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
|
||||||
if (unlikely(!ns))
|
if (unlikely(!ns))
|
||||||
ret = -EWOULDBLOCK;
|
return -EWOULDBLOCK;
|
||||||
else
|
|
||||||
ret = nvme_ns_ioctl(ns, cmd, arg);
|
/*
|
||||||
|
* Handle ioctls that apply to the controller instead of the namespace
|
||||||
|
* seperately and drop the ns SRCU reference early. This avoids a
|
||||||
|
* deadlock when deleting namespaces using the passthrough interface.
|
||||||
|
*/
|
||||||
|
if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) {
|
||||||
|
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||||
|
|
||||||
|
nvme_get_ctrl(ns->ctrl);
|
||||||
|
nvme_put_ns_from_disk(head, srcu_idx);
|
||||||
|
|
||||||
|
if (cmd == NVME_IOCTL_ADMIN_CMD)
|
||||||
|
ret = nvme_user_cmd(ctrl, NULL, argp);
|
||||||
|
else
|
||||||
|
ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
|
||||||
|
|
||||||
|
nvme_put_ctrl(ctrl);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (cmd) {
|
||||||
|
case NVME_IOCTL_ID:
|
||||||
|
force_successful_syscall_return();
|
||||||
|
ret = ns->head->ns_id;
|
||||||
|
break;
|
||||||
|
case NVME_IOCTL_IO_CMD:
|
||||||
|
ret = nvme_user_cmd(ns->ctrl, ns, argp);
|
||||||
|
break;
|
||||||
|
case NVME_IOCTL_SUBMIT_IO:
|
||||||
|
ret = nvme_submit_io(ns, argp);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (ns->ndev)
|
||||||
|
ret = nvme_nvm_ioctl(ns, cmd, arg);
|
||||||
|
else
|
||||||
|
ret = -ENOTTY;
|
||||||
|
}
|
||||||
|
|
||||||
nvme_put_ns_from_disk(head, srcu_idx);
|
nvme_put_ns_from_disk(head, srcu_idx);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -2557,6 +2576,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||||
|
|
||||||
ctrl->oacs = le16_to_cpu(id->oacs);
|
ctrl->oacs = le16_to_cpu(id->oacs);
|
||||||
ctrl->oncs = le16_to_cpu(id->oncs);
|
ctrl->oncs = le16_to_cpu(id->oncs);
|
||||||
|
ctrl->mtfa = le16_to_cpu(id->mtfa);
|
||||||
ctrl->oaes = le32_to_cpu(id->oaes);
|
ctrl->oaes = le32_to_cpu(id->oaes);
|
||||||
atomic_set(&ctrl->abort_limit, id->acl + 1);
|
atomic_set(&ctrl->abort_limit, id->acl + 1);
|
||||||
ctrl->vwc = id->vwc;
|
ctrl->vwc = id->vwc;
|
||||||
|
@ -3681,6 +3701,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl);
|
||||||
|
|
||||||
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
|
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
|
||||||
{
|
{
|
||||||
|
dev_pm_qos_hide_latency_tolerance(ctrl->device);
|
||||||
cdev_device_del(&ctrl->cdev, ctrl->device);
|
cdev_device_del(&ctrl->cdev, ctrl->device);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
|
EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
|
||||||
|
@ -3880,6 +3901,18 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(nvme_start_queues);
|
EXPORT_SYMBOL_GPL(nvme_start_queues);
|
||||||
|
|
||||||
|
|
||||||
|
void nvme_sync_queues(struct nvme_ctrl *ctrl)
|
||||||
|
{
|
||||||
|
struct nvme_ns *ns;
|
||||||
|
|
||||||
|
down_read(&ctrl->namespaces_rwsem);
|
||||||
|
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||||
|
blk_sync_queue(ns->queue);
|
||||||
|
up_read(&ctrl->namespaces_rwsem);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(nvme_sync_queues);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check we didn't inadvertently grow the command structure sizes:
|
* Check we didn't inadvertently grow the command structure sizes:
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -441,6 +441,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
|
||||||
void nvme_stop_queues(struct nvme_ctrl *ctrl);
|
void nvme_stop_queues(struct nvme_ctrl *ctrl);
|
||||||
void nvme_start_queues(struct nvme_ctrl *ctrl);
|
void nvme_start_queues(struct nvme_ctrl *ctrl);
|
||||||
void nvme_kill_queues(struct nvme_ctrl *ctrl);
|
void nvme_kill_queues(struct nvme_ctrl *ctrl);
|
||||||
|
void nvme_sync_queues(struct nvme_ctrl *ctrl);
|
||||||
void nvme_unfreeze(struct nvme_ctrl *ctrl);
|
void nvme_unfreeze(struct nvme_ctrl *ctrl);
|
||||||
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
|
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
|
||||||
void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
|
void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
|
||||||
|
|
|
@ -464,7 +464,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
|
||||||
* affinity), so use the regular blk-mq cpu mapping
|
* affinity), so use the regular blk-mq cpu mapping
|
||||||
*/
|
*/
|
||||||
map->queue_offset = qoff;
|
map->queue_offset = qoff;
|
||||||
if (i != HCTX_TYPE_POLL)
|
if (i != HCTX_TYPE_POLL && offset)
|
||||||
blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset);
|
blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset);
|
||||||
else
|
else
|
||||||
blk_mq_map_queues(map);
|
blk_mq_map_queues(map);
|
||||||
|
@ -1257,7 +1257,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
|
||||||
struct nvme_dev *dev = nvmeq->dev;
|
struct nvme_dev *dev = nvmeq->dev;
|
||||||
struct request *abort_req;
|
struct request *abort_req;
|
||||||
struct nvme_command cmd;
|
struct nvme_command cmd;
|
||||||
bool shutdown = false;
|
|
||||||
u32 csts = readl(dev->bar + NVME_REG_CSTS);
|
u32 csts = readl(dev->bar + NVME_REG_CSTS);
|
||||||
|
|
||||||
/* If PCI error recovery process is happening, we cannot reset or
|
/* If PCI error recovery process is happening, we cannot reset or
|
||||||
|
@ -1294,17 +1293,18 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
|
||||||
* shutdown, so we return BLK_EH_DONE.
|
* shutdown, so we return BLK_EH_DONE.
|
||||||
*/
|
*/
|
||||||
switch (dev->ctrl.state) {
|
switch (dev->ctrl.state) {
|
||||||
case NVME_CTRL_DELETING:
|
|
||||||
shutdown = true;
|
|
||||||
/* fall through */
|
|
||||||
case NVME_CTRL_CONNECTING:
|
case NVME_CTRL_CONNECTING:
|
||||||
case NVME_CTRL_RESETTING:
|
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
|
||||||
|
/* fall through */
|
||||||
|
case NVME_CTRL_DELETING:
|
||||||
dev_warn_ratelimited(dev->ctrl.device,
|
dev_warn_ratelimited(dev->ctrl.device,
|
||||||
"I/O %d QID %d timeout, disable controller\n",
|
"I/O %d QID %d timeout, disable controller\n",
|
||||||
req->tag, nvmeq->qid);
|
req->tag, nvmeq->qid);
|
||||||
nvme_dev_disable(dev, shutdown);
|
nvme_dev_disable(dev, true);
|
||||||
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
|
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
|
||||||
return BLK_EH_DONE;
|
return BLK_EH_DONE;
|
||||||
|
case NVME_CTRL_RESETTING:
|
||||||
|
return BLK_EH_RESET_TIMER;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2376,7 +2376,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
|
||||||
|
|
||||||
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
||||||
{
|
{
|
||||||
bool dead = true;
|
bool dead = true, freeze = false;
|
||||||
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
||||||
|
|
||||||
mutex_lock(&dev->shutdown_lock);
|
mutex_lock(&dev->shutdown_lock);
|
||||||
|
@ -2384,8 +2384,10 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
||||||
u32 csts = readl(dev->bar + NVME_REG_CSTS);
|
u32 csts = readl(dev->bar + NVME_REG_CSTS);
|
||||||
|
|
||||||
if (dev->ctrl.state == NVME_CTRL_LIVE ||
|
if (dev->ctrl.state == NVME_CTRL_LIVE ||
|
||||||
dev->ctrl.state == NVME_CTRL_RESETTING)
|
dev->ctrl.state == NVME_CTRL_RESETTING) {
|
||||||
|
freeze = true;
|
||||||
nvme_start_freeze(&dev->ctrl);
|
nvme_start_freeze(&dev->ctrl);
|
||||||
|
}
|
||||||
dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
|
dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
|
||||||
pdev->error_state != pci_channel_io_normal);
|
pdev->error_state != pci_channel_io_normal);
|
||||||
}
|
}
|
||||||
|
@ -2394,10 +2396,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
||||||
* Give the controller a chance to complete all entered requests if
|
* Give the controller a chance to complete all entered requests if
|
||||||
* doing a safe shutdown.
|
* doing a safe shutdown.
|
||||||
*/
|
*/
|
||||||
if (!dead) {
|
if (!dead && shutdown && freeze)
|
||||||
if (shutdown)
|
nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
|
||||||
nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
|
|
||||||
}
|
|
||||||
|
|
||||||
nvme_stop_queues(&dev->ctrl);
|
nvme_stop_queues(&dev->ctrl);
|
||||||
|
|
||||||
|
@ -2492,6 +2492,7 @@ static void nvme_reset_work(struct work_struct *work)
|
||||||
*/
|
*/
|
||||||
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
|
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
|
||||||
nvme_dev_disable(dev, false);
|
nvme_dev_disable(dev, false);
|
||||||
|
nvme_sync_queues(&dev->ctrl);
|
||||||
|
|
||||||
mutex_lock(&dev->shutdown_lock);
|
mutex_lock(&dev->shutdown_lock);
|
||||||
result = nvme_pci_enable(dev);
|
result = nvme_pci_enable(dev);
|
||||||
|
|
|
@ -210,7 +210,7 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count)
|
||||||
{
|
{
|
||||||
if (count != 1) {
|
if (count != 1) {
|
||||||
bio->bi_flags |= (1 << BIO_REFFED);
|
bio->bi_flags |= (1 << BIO_REFFED);
|
||||||
smp_mb__before_atomic();
|
smp_mb();
|
||||||
}
|
}
|
||||||
atomic_set(&bio->__bi_cnt, count);
|
atomic_set(&bio->__bi_cnt, count);
|
||||||
}
|
}
|
||||||
|
|
|
@ -159,13 +159,6 @@ struct bio {
|
||||||
*/
|
*/
|
||||||
unsigned int bi_phys_segments;
|
unsigned int bi_phys_segments;
|
||||||
|
|
||||||
/*
|
|
||||||
* To keep track of the max segment size, we account for the
|
|
||||||
* sizes of the first and last mergeable segments in this bio.
|
|
||||||
*/
|
|
||||||
unsigned int bi_seg_front_size;
|
|
||||||
unsigned int bi_seg_back_size;
|
|
||||||
|
|
||||||
struct bvec_iter bi_iter;
|
struct bvec_iter bi_iter;
|
||||||
|
|
||||||
atomic_t __bi_remaining;
|
atomic_t __bi_remaining;
|
||||||
|
|
|
@ -542,7 +542,7 @@ struct request_queue {
|
||||||
struct list_head unused_hctx_list;
|
struct list_head unused_hctx_list;
|
||||||
spinlock_t unused_hctx_lock;
|
spinlock_t unused_hctx_lock;
|
||||||
|
|
||||||
atomic_t mq_freeze_depth;
|
int mq_freeze_depth;
|
||||||
|
|
||||||
#if defined(CONFIG_BLK_DEV_BSG)
|
#if defined(CONFIG_BLK_DEV_BSG)
|
||||||
struct bsg_class_device bsg_dev;
|
struct bsg_class_device bsg_dev;
|
||||||
|
@ -554,6 +554,11 @@ struct request_queue {
|
||||||
#endif
|
#endif
|
||||||
struct rcu_head rcu_head;
|
struct rcu_head rcu_head;
|
||||||
wait_queue_head_t mq_freeze_wq;
|
wait_queue_head_t mq_freeze_wq;
|
||||||
|
/*
|
||||||
|
* Protect concurrent access to q_usage_counter by
|
||||||
|
* percpu_ref_kill() and percpu_ref_reinit().
|
||||||
|
*/
|
||||||
|
struct mutex mq_freeze_lock;
|
||||||
struct percpu_ref q_usage_counter;
|
struct percpu_ref q_usage_counter;
|
||||||
|
|
||||||
struct blk_mq_tag_set *tag_set;
|
struct blk_mq_tag_set *tag_set;
|
||||||
|
|
|
@ -435,7 +435,7 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
|
||||||
* to ensure that the batch size is updated before the wait
|
* to ensure that the batch size is updated before the wait
|
||||||
* counts.
|
* counts.
|
||||||
*/
|
*/
|
||||||
smp_mb__before_atomic();
|
smp_mb();
|
||||||
for (i = 0; i < SBQ_WAIT_QUEUES; i++)
|
for (i = 0; i < SBQ_WAIT_QUEUES; i++)
|
||||||
atomic_set(&sbq->ws[i].wait_cnt, 1);
|
atomic_set(&sbq->ws[i].wait_cnt, 1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,7 @@ all: io_uring-cp io_uring-bench
|
||||||
$(CC) $(CFLAGS) -o $@ $^
|
$(CC) $(CFLAGS) -o $@ $^
|
||||||
|
|
||||||
io_uring-bench: syscall.o io_uring-bench.o
|
io_uring-bench: syscall.o io_uring-bench.o
|
||||||
$(CC) $(CFLAGS) $(LDLIBS) -o $@ $^
|
$(CC) $(CFLAGS) -o $@ $^ $(LDLIBS)
|
||||||
|
|
||||||
io_uring-cp: setup.o syscall.o queue.o
|
io_uring-cp: setup.o syscall.o queue.o
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
|
@ -85,11 +86,16 @@ static int queue_read(struct io_uring *ring, off_t size, off_t offset)
|
||||||
struct io_uring_sqe *sqe;
|
struct io_uring_sqe *sqe;
|
||||||
struct io_data *data;
|
struct io_data *data;
|
||||||
|
|
||||||
sqe = io_uring_get_sqe(ring);
|
data = malloc(size + sizeof(*data));
|
||||||
if (!sqe)
|
if (!data)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
data = malloc(size + sizeof(*data));
|
sqe = io_uring_get_sqe(ring);
|
||||||
|
if (!sqe) {
|
||||||
|
free(data);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
data->read = 1;
|
data->read = 1;
|
||||||
data->offset = data->first_offset = offset;
|
data->offset = data->first_offset = offset;
|
||||||
|
|
||||||
|
@ -166,22 +172,23 @@ static int copy_file(struct io_uring *ring, off_t insize)
|
||||||
struct io_data *data;
|
struct io_data *data;
|
||||||
|
|
||||||
if (!got_comp) {
|
if (!got_comp) {
|
||||||
ret = io_uring_wait_completion(ring, &cqe);
|
ret = io_uring_wait_cqe(ring, &cqe);
|
||||||
got_comp = 1;
|
got_comp = 1;
|
||||||
} else
|
} else
|
||||||
ret = io_uring_get_completion(ring, &cqe);
|
ret = io_uring_peek_cqe(ring, &cqe);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
fprintf(stderr, "io_uring_get_completion: %s\n",
|
fprintf(stderr, "io_uring_peek_cqe: %s\n",
|
||||||
strerror(-ret));
|
strerror(-ret));
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (!cqe)
|
if (!cqe)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
data = (struct io_data *) (uintptr_t) cqe->user_data;
|
data = io_uring_cqe_get_data(cqe);
|
||||||
if (cqe->res < 0) {
|
if (cqe->res < 0) {
|
||||||
if (cqe->res == -EAGAIN) {
|
if (cqe->res == -EAGAIN) {
|
||||||
queue_prepped(ring, data);
|
queue_prepped(ring, data);
|
||||||
|
io_uring_cqe_seen(ring, cqe);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
fprintf(stderr, "cqe failed: %s\n",
|
fprintf(stderr, "cqe failed: %s\n",
|
||||||
|
@ -193,6 +200,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
|
||||||
data->iov.iov_len -= cqe->res;
|
data->iov.iov_len -= cqe->res;
|
||||||
data->offset += cqe->res;
|
data->offset += cqe->res;
|
||||||
queue_prepped(ring, data);
|
queue_prepped(ring, data);
|
||||||
|
io_uring_cqe_seen(ring, cqe);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -209,6 +217,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
|
||||||
free(data);
|
free(data);
|
||||||
writes--;
|
writes--;
|
||||||
}
|
}
|
||||||
|
io_uring_cqe_seen(ring, cqe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,16 @@
|
||||||
#ifndef LIB_URING_H
|
#ifndef LIB_URING_H
|
||||||
#define LIB_URING_H
|
#define LIB_URING_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <sys/uio.h>
|
#include <sys/uio.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "../../include/uapi/linux/io_uring.h"
|
#include "../../include/uapi/linux/io_uring.h"
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include "barrier.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Library interface to io_uring
|
* Library interface to io_uring
|
||||||
|
@ -46,7 +52,7 @@ struct io_uring {
|
||||||
* System calls
|
* System calls
|
||||||
*/
|
*/
|
||||||
extern int io_uring_setup(unsigned entries, struct io_uring_params *p);
|
extern int io_uring_setup(unsigned entries, struct io_uring_params *p);
|
||||||
extern int io_uring_enter(unsigned fd, unsigned to_submit,
|
extern int io_uring_enter(int fd, unsigned to_submit,
|
||||||
unsigned min_complete, unsigned flags, sigset_t *sig);
|
unsigned min_complete, unsigned flags, sigset_t *sig);
|
||||||
extern int io_uring_register(int fd, unsigned int opcode, void *arg,
|
extern int io_uring_register(int fd, unsigned int opcode, void *arg,
|
||||||
unsigned int nr_args);
|
unsigned int nr_args);
|
||||||
|
@ -59,13 +65,32 @@ extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
|
||||||
extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
|
extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
|
||||||
struct io_uring *ring);
|
struct io_uring *ring);
|
||||||
extern void io_uring_queue_exit(struct io_uring *ring);
|
extern void io_uring_queue_exit(struct io_uring *ring);
|
||||||
extern int io_uring_get_completion(struct io_uring *ring,
|
extern int io_uring_peek_cqe(struct io_uring *ring,
|
||||||
struct io_uring_cqe **cqe_ptr);
|
struct io_uring_cqe **cqe_ptr);
|
||||||
extern int io_uring_wait_completion(struct io_uring *ring,
|
extern int io_uring_wait_cqe(struct io_uring *ring,
|
||||||
struct io_uring_cqe **cqe_ptr);
|
struct io_uring_cqe **cqe_ptr);
|
||||||
extern int io_uring_submit(struct io_uring *ring);
|
extern int io_uring_submit(struct io_uring *ring);
|
||||||
extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
|
extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Must be called after io_uring_{peek,wait}_cqe() after the cqe has
|
||||||
|
* been processed by the application.
|
||||||
|
*/
|
||||||
|
static inline void io_uring_cqe_seen(struct io_uring *ring,
|
||||||
|
struct io_uring_cqe *cqe)
|
||||||
|
{
|
||||||
|
if (cqe) {
|
||||||
|
struct io_uring_cq *cq = &ring->cq;
|
||||||
|
|
||||||
|
(*cq->khead)++;
|
||||||
|
/*
|
||||||
|
* Ensure that the kernel sees our new head, the kernel has
|
||||||
|
* the matching read barrier.
|
||||||
|
*/
|
||||||
|
write_barrier();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Command prep helpers
|
* Command prep helpers
|
||||||
*/
|
*/
|
||||||
|
@ -74,8 +99,14 @@ static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
|
||||||
sqe->user_data = (unsigned long) data;
|
sqe->user_data = (unsigned long) data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void *io_uring_cqe_get_data(struct io_uring_cqe *cqe)
|
||||||
|
{
|
||||||
|
return (void *) (uintptr_t) cqe->user_data;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
|
static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
|
||||||
void *addr, unsigned len, off_t offset)
|
const void *addr, unsigned len,
|
||||||
|
off_t offset)
|
||||||
{
|
{
|
||||||
memset(sqe, 0, sizeof(*sqe));
|
memset(sqe, 0, sizeof(*sqe));
|
||||||
sqe->opcode = op;
|
sqe->opcode = op;
|
||||||
|
@ -86,8 +117,8 @@ static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
|
static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
|
||||||
struct iovec *iovecs, unsigned nr_vecs,
|
const struct iovec *iovecs,
|
||||||
off_t offset)
|
unsigned nr_vecs, off_t offset)
|
||||||
{
|
{
|
||||||
io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
|
io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
|
||||||
}
|
}
|
||||||
|
@ -100,14 +131,14 @@ static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
|
static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
|
||||||
struct iovec *iovecs, unsigned nr_vecs,
|
const struct iovec *iovecs,
|
||||||
off_t offset)
|
unsigned nr_vecs, off_t offset)
|
||||||
{
|
{
|
||||||
io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
|
io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
|
static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
|
||||||
void *buf, unsigned nbytes,
|
const void *buf, unsigned nbytes,
|
||||||
off_t offset)
|
off_t offset)
|
||||||
{
|
{
|
||||||
io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
|
io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
|
||||||
|
@ -131,13 +162,22 @@ static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
|
static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
|
||||||
int datasync)
|
unsigned fsync_flags)
|
||||||
{
|
{
|
||||||
memset(sqe, 0, sizeof(*sqe));
|
memset(sqe, 0, sizeof(*sqe));
|
||||||
sqe->opcode = IORING_OP_FSYNC;
|
sqe->opcode = IORING_OP_FSYNC;
|
||||||
sqe->fd = fd;
|
sqe->fd = fd;
|
||||||
if (datasync)
|
sqe->fsync_flags = fsync_flags;
|
||||||
sqe->fsync_flags = IORING_FSYNC_DATASYNC;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
|
||||||
|
{
|
||||||
|
memset(sqe, 0, sizeof(*sqe));
|
||||||
|
sqe->opcode = IORING_OP_NOP;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -8,8 +8,8 @@
|
||||||
#include "liburing.h"
|
#include "liburing.h"
|
||||||
#include "barrier.h"
|
#include "barrier.h"
|
||||||
|
|
||||||
static int __io_uring_get_completion(struct io_uring *ring,
|
static int __io_uring_get_cqe(struct io_uring *ring,
|
||||||
struct io_uring_cqe **cqe_ptr, int wait)
|
struct io_uring_cqe **cqe_ptr, int wait)
|
||||||
{
|
{
|
||||||
struct io_uring_cq *cq = &ring->cq;
|
struct io_uring_cq *cq = &ring->cq;
|
||||||
const unsigned mask = *cq->kring_mask;
|
const unsigned mask = *cq->kring_mask;
|
||||||
|
@ -39,34 +39,25 @@ static int __io_uring_get_completion(struct io_uring *ring,
|
||||||
return -errno;
|
return -errno;
|
||||||
} while (1);
|
} while (1);
|
||||||
|
|
||||||
if (*cqe_ptr) {
|
|
||||||
*cq->khead = head + 1;
|
|
||||||
/*
|
|
||||||
* Ensure that the kernel sees our new head, the kernel has
|
|
||||||
* the matching read barrier.
|
|
||||||
*/
|
|
||||||
write_barrier();
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return an IO completion, if one is readily available
|
* Return an IO completion, if one is readily available. Returns 0 with
|
||||||
|
* cqe_ptr filled in on success, -errno on failure.
|
||||||
*/
|
*/
|
||||||
int io_uring_get_completion(struct io_uring *ring,
|
int io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
|
||||||
struct io_uring_cqe **cqe_ptr)
|
|
||||||
{
|
{
|
||||||
return __io_uring_get_completion(ring, cqe_ptr, 0);
|
return __io_uring_get_cqe(ring, cqe_ptr, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return an IO completion, waiting for it if necessary
|
* Return an IO completion, waiting for it if necessary. Returns 0 with
|
||||||
|
* cqe_ptr filled in on success, -errno on failure.
|
||||||
*/
|
*/
|
||||||
int io_uring_wait_completion(struct io_uring *ring,
|
int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
|
||||||
struct io_uring_cqe **cqe_ptr)
|
|
||||||
{
|
{
|
||||||
return __io_uring_get_completion(ring, cqe_ptr, 1);
|
return __io_uring_get_cqe(ring, cqe_ptr, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -78,7 +69,7 @@ int io_uring_submit(struct io_uring *ring)
|
||||||
{
|
{
|
||||||
struct io_uring_sq *sq = &ring->sq;
|
struct io_uring_sq *sq = &ring->sq;
|
||||||
const unsigned mask = *sq->kring_mask;
|
const unsigned mask = *sq->kring_mask;
|
||||||
unsigned ktail, ktail_next, submitted;
|
unsigned ktail, ktail_next, submitted, to_submit;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -100,7 +91,8 @@ int io_uring_submit(struct io_uring *ring)
|
||||||
*/
|
*/
|
||||||
submitted = 0;
|
submitted = 0;
|
||||||
ktail = ktail_next = *sq->ktail;
|
ktail = ktail_next = *sq->ktail;
|
||||||
while (sq->sqe_head < sq->sqe_tail) {
|
to_submit = sq->sqe_tail - sq->sqe_head;
|
||||||
|
while (to_submit--) {
|
||||||
ktail_next++;
|
ktail_next++;
|
||||||
read_barrier();
|
read_barrier();
|
||||||
|
|
||||||
|
@ -136,7 +128,7 @@ submit:
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return -errno;
|
return -errno;
|
||||||
|
|
||||||
return 0;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -27,7 +27,7 @@ static int io_uring_mmap(int fd, struct io_uring_params *p,
|
||||||
sq->kdropped = ptr + p->sq_off.dropped;
|
sq->kdropped = ptr + p->sq_off.dropped;
|
||||||
sq->array = ptr + p->sq_off.array;
|
sq->array = ptr + p->sq_off.array;
|
||||||
|
|
||||||
size = p->sq_entries * sizeof(struct io_uring_sqe),
|
size = p->sq_entries * sizeof(struct io_uring_sqe);
|
||||||
sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
|
sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
|
||||||
MAP_SHARED | MAP_POPULATE, fd,
|
MAP_SHARED | MAP_POPULATE, fd,
|
||||||
IORING_OFF_SQES);
|
IORING_OFF_SQES);
|
||||||
|
@ -79,7 +79,7 @@ int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring
|
||||||
int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
|
int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
|
||||||
{
|
{
|
||||||
struct io_uring_params p;
|
struct io_uring_params p;
|
||||||
int fd;
|
int fd, ret;
|
||||||
|
|
||||||
memset(&p, 0, sizeof(p));
|
memset(&p, 0, sizeof(p));
|
||||||
p.flags = flags;
|
p.flags = flags;
|
||||||
|
@ -88,7 +88,11 @@ int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
|
||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
return fd;
|
return fd;
|
||||||
|
|
||||||
return io_uring_queue_mmap(fd, &p, ring);
|
ret = io_uring_queue_mmap(fd, &p, ring);
|
||||||
|
if (ret)
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void io_uring_queue_exit(struct io_uring *ring)
|
void io_uring_queue_exit(struct io_uring *ring)
|
||||||
|
|
|
@ -7,34 +7,46 @@
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include "liburing.h"
|
#include "liburing.h"
|
||||||
|
|
||||||
#if defined(__x86_64) || defined(__i386__)
|
#ifdef __alpha__
|
||||||
#ifndef __NR_sys_io_uring_setup
|
/*
|
||||||
#define __NR_sys_io_uring_setup 425
|
* alpha is the only exception, all other architectures
|
||||||
#endif
|
* have common numbers for new system calls.
|
||||||
#ifndef __NR_sys_io_uring_enter
|
*/
|
||||||
#define __NR_sys_io_uring_enter 426
|
# ifndef __NR_io_uring_setup
|
||||||
#endif
|
# define __NR_io_uring_setup 535
|
||||||
#ifndef __NR_sys_io_uring_register
|
# endif
|
||||||
#define __NR_sys_io_uring_register 427
|
# ifndef __NR_io_uring_enter
|
||||||
#endif
|
# define __NR_io_uring_enter 536
|
||||||
#else
|
# endif
|
||||||
#error "Arch not supported yet"
|
# ifndef __NR_io_uring_register
|
||||||
|
# define __NR_io_uring_register 537
|
||||||
|
# endif
|
||||||
|
#else /* !__alpha__ */
|
||||||
|
# ifndef __NR_io_uring_setup
|
||||||
|
# define __NR_io_uring_setup 425
|
||||||
|
# endif
|
||||||
|
# ifndef __NR_io_uring_enter
|
||||||
|
# define __NR_io_uring_enter 426
|
||||||
|
# endif
|
||||||
|
# ifndef __NR_io_uring_register
|
||||||
|
# define __NR_io_uring_register 427
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int io_uring_register(int fd, unsigned int opcode, void *arg,
|
int io_uring_register(int fd, unsigned int opcode, void *arg,
|
||||||
unsigned int nr_args)
|
unsigned int nr_args)
|
||||||
{
|
{
|
||||||
return syscall(__NR_sys_io_uring_register, fd, opcode, arg, nr_args);
|
return syscall(__NR_io_uring_register, fd, opcode, arg, nr_args);
|
||||||
}
|
}
|
||||||
|
|
||||||
int io_uring_setup(unsigned entries, struct io_uring_params *p)
|
int io_uring_setup(unsigned int entries, struct io_uring_params *p)
|
||||||
{
|
{
|
||||||
return syscall(__NR_sys_io_uring_setup, entries, p);
|
return syscall(__NR_io_uring_setup, entries, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
int io_uring_enter(unsigned fd, unsigned to_submit, unsigned min_complete,
|
int io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete,
|
||||||
unsigned flags, sigset_t *sig)
|
unsigned int flags, sigset_t *sig)
|
||||||
{
|
{
|
||||||
return syscall(__NR_sys_io_uring_enter, fd, to_submit, min_complete,
|
return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
|
||||||
flags, sig, _NSIG / 8);
|
flags, sig, _NSIG / 8);
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче