io_uring: explicitly keep a CQE in io_kiocb

We already have req->{result,user_data,cflags}, which mimic struct
io_uring_cqe and are intended to store CQE data. Combine them into a
struct io_uring_cqe field.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/e1efe65d5005cd6a9ec3440767eb15a9fa9351cf.1649771823.git.asml.silence@gmail.com
[axboe: add mirror cqe to cater to fd union]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov 2022-04-12 15:09:43 +01:00 коммит произвёл Jens Axboe
Родитель 8b3171bdf5
Коммит cef216fc32
1 изменённых файлов: 69 добавлений и 65 удалений

Просмотреть файл

@ -866,6 +866,16 @@ enum {
IORING_RSRC_BUFFER = 1,
};
struct io_cqe {
__u64 user_data;
__s32 res;
/* fd initially, then cflags for completion */
union {
__u32 flags;
int fd;
};
};
/*
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
@ -909,13 +919,7 @@ struct io_kiocb {
u16 buf_index;
unsigned int flags;
u64 user_data;
u32 result;
/* fd initially, then cflags for completion */
union {
u32 cflags;
int fd;
};
struct io_cqe cqe;
struct io_ring_ctx *ctx;
struct task_struct *task;
@ -1501,7 +1505,7 @@ static inline void req_set_fail(struct io_kiocb *req)
static inline void req_fail_link_node(struct io_kiocb *req, int res)
{
req_set_fail(req);
req->result = res;
req->cqe.res = res;
}
static __cold void io_ring_ctx_ref_free(struct percpu_ref *ref)
@ -1733,7 +1737,7 @@ static void io_queue_async_work(struct io_kiocb *req, bool *dont_use)
if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
req->work.flags |= IO_WQ_WORK_CANCEL;
trace_io_uring_queue_async_work(ctx, req, req->user_data, req->opcode, req->flags,
trace_io_uring_queue_async_work(ctx, req, req->cqe.user_data, req->opcode, req->flags,
&req->work, io_wq_is_hashed(&req->work));
io_wq_enqueue(tctx->io_wq, &req->work);
if (link)
@ -2073,8 +2077,8 @@ static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
{
trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
return __io_fill_cqe(req->ctx, req->user_data, res, cflags);
trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags);
return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
}
static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
@ -2140,8 +2144,8 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res,
static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
u32 cflags)
{
req->result = res;
req->cflags = cflags;
req->cqe.res = res;
req->cqe.flags = cflags;
req->flags |= REQ_F_COMPLETE_INLINE;
}
@ -2173,7 +2177,7 @@ static void io_req_complete_fail_submit(struct io_kiocb *req)
*/
req->flags &= ~REQ_F_HARDLINK;
req->flags |= REQ_F_LINK;
io_req_complete_failed(req, req->result);
io_req_complete_failed(req, req->cqe.res);
}
/*
@ -2186,7 +2190,7 @@ static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
req->link = NULL;
req->async_data = NULL;
/* not necessary, but safer to zero */
req->result = 0;
req->cqe.res = 0;
}
static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
@ -2340,12 +2344,12 @@ static void io_fail_links(struct io_kiocb *req)
long res = -ECANCELED;
if (link->flags & REQ_F_FAIL)
res = link->result;
res = link->cqe.res;
nxt = link->link;
link->link = NULL;
trace_io_uring_fail_link(req->ctx, req, req->user_data,
trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data,
req->opcode, link);
if (!ignore_cqes) {
@ -2465,7 +2469,7 @@ static void handle_prev_tw_list(struct io_wq_work_node *node,
if (likely(*uring_locked))
req->io_task_work.func(req, uring_locked);
else
__io_req_complete_post(req, req->result,
__io_req_complete_post(req, req->cqe.res,
io_put_kbuf_comp(req));
node = next;
} while (node);
@ -2595,7 +2599,7 @@ static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
/* not needed for normal modes, but SQPOLL depends on it */
io_tw_lock(ctx, locked);
io_req_complete_failed(req, req->result);
io_req_complete_failed(req, req->cqe.res);
}
static void io_req_task_submit(struct io_kiocb *req, bool *locked)
@ -2612,7 +2616,7 @@ static void io_req_task_submit(struct io_kiocb *req, bool *locked)
static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
{
req->result = ret;
req->cqe.res = ret;
req->io_task_work.func = io_req_task_cancel;
io_req_task_work_add(req, false);
}
@ -2712,7 +2716,7 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
comp_list);
if (!(req->flags & REQ_F_CQE_SKIP))
__io_fill_cqe_req(req, req->result, req->cflags);
__io_fill_cqe_req(req, req->cqe.res, req->cqe.flags);
}
io_commit_cqring(ctx);
@ -2837,7 +2841,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
nr_events++;
if (unlikely(req->flags & REQ_F_CQE_SKIP))
continue;
__io_fill_cqe_req(req, req->result, io_put_kbuf(req, 0));
__io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
}
if (unlikely(!nr_events))
@ -2995,21 +2999,21 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
} else {
fsnotify_access(req->file);
}
if (unlikely(res != req->result)) {
if (unlikely(res != req->cqe.res)) {
if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE;
return true;
}
req_set_fail(req);
req->result = res;
req->cqe.res = res;
}
return false;
}
static inline void io_req_task_complete(struct io_kiocb *req, bool *locked)
{
int res = req->result;
int res = req->cqe.res;
if (*locked) {
io_req_complete_state(req, res, io_put_kbuf(req, 0));
@ -3025,7 +3029,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res,
{
if (__io_complete_rw_common(req, res))
return;
__io_req_complete(req, issue_flags, req->result,
__io_req_complete(req, issue_flags, req->cqe.res,
io_put_kbuf(req, issue_flags));
}
@ -3035,7 +3039,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
if (__io_complete_rw_common(req, res))
return;
req->result = res;
req->cqe.res = res;
req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
}
@ -3046,12 +3050,12 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
if (unlikely(res != req->result)) {
if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE;
return;
}
req->result = res;
req->cqe.res = res;
}
/* order with io_iopoll_complete() checking ->iopoll_completed */
@ -3844,7 +3848,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
kfree(iovec);
return ret;
}
req->result = iov_iter_count(&s->iter);
req->cqe.res = iov_iter_count(&s->iter);
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
@ -3860,7 +3864,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
ppos = io_kiocb_update_pos(req);
ret = rw_verify_area(READ, req->file, ppos, req->result);
ret = rw_verify_area(READ, req->file, ppos, req->cqe.res);
if (unlikely(ret)) {
kfree(iovec);
return ret;
@ -3882,7 +3886,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
ret = 0;
} else if (ret == -EIOCBQUEUED) {
goto out_free;
} else if (ret == req->result || ret <= 0 || !force_nonblock ||
} else if (ret == req->cqe.res || ret <= 0 || !force_nonblock ||
(req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
/* read all, failed, already did sync or don't want to retry */
goto done;
@ -3972,7 +3976,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
kfree(iovec);
return ret;
}
req->result = iov_iter_count(&s->iter);
req->cqe.res = iov_iter_count(&s->iter);
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
@ -3992,7 +3996,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
ppos = io_kiocb_update_pos(req);
ret = rw_verify_area(WRITE, req->file, ppos, req->result);
ret = rw_verify_area(WRITE, req->file, ppos, req->cqe.res);
if (unlikely(ret))
goto out_free;
@ -5777,7 +5781,7 @@ static void io_poll_req_insert(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
struct hlist_head *list;
list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)];
hlist_add_head(&req->hash_node, list);
}
@ -5842,7 +5846,7 @@ static void io_poll_remove_entries(struct io_kiocb *req)
*
* Returns a negative error on failure. >0 when no action require, which is
* either spurious wakeup or multishot CQE is served. 0 when it's done with
* the request, then the mask is stored in req->result.
* the request, then the mask is stored in req->cqe.res.
*/
static int io_poll_check_events(struct io_kiocb *req, bool locked)
{
@ -5862,29 +5866,29 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked)
if (v & IO_POLL_CANCEL_FLAG)
return -ECANCELED;
if (!req->result) {
if (!req->cqe.res) {
struct poll_table_struct pt = { ._key = req->apoll_events };
unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED;
if (unlikely(!io_assign_file(req, flags)))
return -EBADF;
req->result = vfs_poll(req->file, &pt) & req->apoll_events;
req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
}
/* multishot, just fill an CQE and proceed */
if (req->result && !(req->apoll_events & EPOLLONESHOT)) {
__poll_t mask = mangle_poll(req->result & req->apoll_events);
if (req->cqe.res && !(req->apoll_events & EPOLLONESHOT)) {
__poll_t mask = mangle_poll(req->cqe.res & req->apoll_events);
bool filled;
spin_lock(&ctx->completion_lock);
filled = io_fill_cqe_aux(ctx, req->user_data, mask,
filled = io_fill_cqe_aux(ctx, req->cqe.user_data, mask,
IORING_CQE_F_MORE);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
if (unlikely(!filled))
return -ECANCELED;
io_cqring_ev_posted(ctx);
} else if (req->result) {
} else if (req->cqe.res) {
return 0;
}
@ -5907,16 +5911,16 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
return;
if (!ret) {
req->result = mangle_poll(req->result & req->poll.events);
req->cqe.res = mangle_poll(req->cqe.res & req->poll.events);
} else {
req->result = ret;
req->cqe.res = ret;
req_set_fail(req);
}
io_poll_remove_entries(req);
spin_lock(&ctx->completion_lock);
hash_del(&req->hash_node);
__io_req_complete_post(req, req->result, 0);
__io_req_complete_post(req, req->cqe.res, 0);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
@ -5944,7 +5948,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
{
req->result = mask;
req->cqe.res = mask;
/*
* This is useful for poll that is armed on behalf of another
* request, and where the wakeup path could be on a different
@ -5957,7 +5961,7 @@ static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
else
req->io_task_work.func = io_apoll_task_func;
trace_io_uring_task_add(req->ctx, req, req->user_data, req->opcode, mask);
trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask);
io_req_task_work_add(req, false);
}
@ -6207,7 +6211,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
if (ret || ipt.error)
return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
trace_io_uring_poll_arm(ctx, req, req->user_data, req->opcode,
trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode,
mask, apoll->poll.events);
return IO_APOLL_OK;
}
@ -6249,7 +6253,7 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr,
list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
hlist_for_each_entry(req, list, hash_node) {
if (sqe_addr != req->user_data)
if (sqe_addr != req->cqe.user_data)
continue;
if (poll_only && req->opcode != IORING_OP_POLL_ADD)
continue;
@ -6386,7 +6390,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
preq->poll.events |= IO_POLL_UNMASK;
}
if (req->poll_update.update_user_data)
preq->user_data = req->poll_update.new_user_data;
preq->cqe.user_data = req->poll_update.new_user_data;
ret2 = io_poll_add(preq, issue_flags);
/* successfully updated, don't complete poll request */
@ -6395,7 +6399,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
}
req_set_fail(preq);
preq->result = -ECANCELED;
preq->cqe.res = -ECANCELED;
locked = !(issue_flags & IO_URING_F_UNLOCKED);
io_req_task_complete(preq, &locked);
out:
@ -6423,7 +6427,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
req_set_fail(req);
req->result = -ETIME;
req->cqe.res = -ETIME;
req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req, false);
return HRTIMER_NORESTART;
@ -6438,7 +6442,7 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
bool found = false;
list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
found = user_data == req->user_data;
found = user_data == req->cqe.user_data;
if (found)
break;
}
@ -6489,7 +6493,7 @@ static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
bool found = false;
list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) {
found = user_data == req->user_data;
found = user_data == req->cqe.user_data;
if (found)
break;
}
@ -6715,7 +6719,7 @@ static bool io_cancel_cb(struct io_wq_work *work, void *data)
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct io_cancel_data *cd = data;
return req->ctx == cd->ctx && req->user_data == cd->user_data;
return req->ctx == cd->ctx && req->cqe.user_data == cd->user_data;
}
static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
@ -7016,7 +7020,7 @@ fail:
goto queue;
}
trace_io_uring_defer(ctx, req, req->user_data, req->opcode);
trace_io_uring_defer(ctx, req, req->cqe.user_data, req->opcode);
de->req = req;
de->seq = seq;
list_add_tail(&de->list, &ctx->defer_list);
@ -7100,14 +7104,14 @@ static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags)
return true;
if (req->flags & REQ_F_FIXED_FILE)
req->file = io_file_get_fixed(req, req->fd, issue_flags);
req->file = io_file_get_fixed(req, req->cqe.fd, issue_flags);
else
req->file = io_file_get_normal(req, req->fd);
req->file = io_file_get_normal(req, req->cqe.fd);
if (req->file)
return true;
req_set_fail(req);
req->result = -EBADF;
req->cqe.res = -EBADF;
return false;
}
@ -7396,7 +7400,7 @@ static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
{
struct file *file = fget(fd);
trace_io_uring_file_get(req->ctx, req, req->user_data, fd);
trace_io_uring_file_get(req->ctx, req, req->cqe.user_data, fd);
/* we don't allow fixed io_uring files */
if (file && file->f_op == &io_uring_fops)
@ -7411,7 +7415,7 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
if (prev) {
if (!(req->task->flags & PF_EXITING))
ret = io_try_cancel_userdata(req, prev->user_data);
ret = io_try_cancel_userdata(req, prev->cqe.user_data);
io_req_complete_post(req, ret ?: -ETIME, 0);
io_put_req(prev);
} else {
@ -7602,7 +7606,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->opcode = opcode = READ_ONCE(sqe->opcode);
/* same numerical values with corresponding REQ_F_*, safe to copy */
req->flags = sqe_flags = READ_ONCE(sqe->flags);
req->user_data = READ_ONCE(sqe->user_data);
req->cqe.user_data = READ_ONCE(sqe->user_data);
req->file = NULL;
req->fixed_rsrc_refs = NULL;
req->task = current;
@ -7643,7 +7647,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (io_op_defs[opcode].needs_file) {
struct io_submit_state *state = &ctx->submit_state;
req->fd = READ_ONCE(sqe->fd);
req->cqe.fd = READ_ONCE(sqe->fd);
/*
* Plug now if we have more than 2 IO left after this, and the
@ -7692,7 +7696,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
* we can judge a link req is failed or cancelled by if
* REQ_F_FAIL is set, but the head is an exception since
* it may be set REQ_F_FAIL because of other req's failure
* so let's leverage req->result to distinguish if a head
* so let's leverage req->cqe.res to distinguish if a head
* is set REQ_F_FAIL because of its failure or other req's
* failure so that we can set the correct ret code for it.
* init result here to avoid affecting the normal path.
@ -7711,7 +7715,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
/* don't need @sqe from now on */
trace_io_uring_submit_sqe(ctx, req, req->user_data, req->opcode,
trace_io_uring_submit_sqe(ctx, req, req->cqe.user_data, req->opcode,
req->flags, true,
ctx->flags & IORING_SETUP_SQPOLL);