2020-09-13 22:09:39 +03:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
#ifndef _LINUX_IO_URING_H
|
|
|
|
#define _LINUX_IO_URING_H
|
|
|
|
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/xarray.h>
|
2022-09-30 09:27:38 +03:00
|
|
|
#include <uapi/linux/io_uring.h>
|
2020-10-14 19:48:51 +03:00
|
|
|
|
2022-05-11 08:47:45 +03:00
|
|
|
enum io_uring_cmd_flags {
|
|
|
|
IO_URING_F_COMPLETE_DEFER = 1,
|
|
|
|
IO_URING_F_UNLOCKED = 2,
|
2022-11-30 18:21:59 +03:00
|
|
|
/* the request is executed from poll, it should not be freed */
|
|
|
|
IO_URING_F_MULTISHOT = 4,
|
2022-12-07 06:53:30 +03:00
|
|
|
/* executed by io-wq */
|
|
|
|
IO_URING_F_IOWQ = 8,
|
2022-05-11 08:47:45 +03:00
|
|
|
/* int's last bit, sign checks are usually faster than a bit test */
|
|
|
|
IO_URING_F_NONBLOCK = INT_MIN,
|
|
|
|
|
|
|
|
/* ctx state flags, for URING_CMD */
|
2022-11-30 18:21:59 +03:00
|
|
|
IO_URING_F_SQE128 = (1 << 8),
|
|
|
|
IO_URING_F_CQE32 = (1 << 9),
|
|
|
|
IO_URING_F_IOPOLL = (1 << 10),
|
2022-05-11 08:47:45 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
struct io_uring_cmd {
|
|
|
|
struct file *file;
|
io_uring: Pass whole sqe to commands
Currently uring CMD operation relies on having large SQEs, but future
operations might want to use normal SQE.
The io_uring_cmd currently only saves the payload (cmd) part of the SQE,
but, for commands that use normal SQE size, it might be necessary to
access the initial SQE fields outside of the payload/cmd block. So,
saves the whole SQE other than just the pdu.
This changes slightly how the io_uring_cmd works, since the cmd
structures and callbacks are not opaque to io_uring anymore. I.e, the
callbacks can look at the SQE entries, not only, in the cmd structure.
The main advantage is that we don't need to create custom structures for
simple commands.
Creates io_uring_sqe_cmd() that returns the cmd private data as a null
pointer and avoids casting in the callee side.
Also, make most of ublk_drv's sqe->cmd priv structure into const, and use
io_uring_sqe_cmd() to get the private structure, removing the unwanted
cast. (There is one case where the cast is still needed since the
header->{len,addr} is updated in the private structure)
Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20230504121856.904491-3-leitao@debian.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2023-05-04 15:18:55 +03:00
|
|
|
const struct io_uring_sqe *sqe;
|
2022-08-23 19:14:41 +03:00
|
|
|
union {
|
|
|
|
/* callback to defer completions to task context */
|
2023-03-21 05:01:25 +03:00
|
|
|
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
|
2022-08-23 19:14:41 +03:00
|
|
|
/* used for polled completion */
|
|
|
|
void *cookie;
|
|
|
|
};
|
2022-05-11 08:47:45 +03:00
|
|
|
u32 cmd_op;
|
2022-09-30 09:27:39 +03:00
|
|
|
u32 flags;
|
2022-05-11 08:47:45 +03:00
|
|
|
u8 pdu[32]; /* available inline for free use */
|
|
|
|
};
|
|
|
|
|
2020-09-13 22:09:39 +03:00
|
|
|
#if defined(CONFIG_IO_URING)
|
2022-09-30 09:27:38 +03:00
|
|
|
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
|
|
|
struct iov_iter *iter, void *ioucmd);
|
2023-03-21 05:01:25 +03:00
|
|
|
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
|
|
|
|
unsigned issue_flags);
|
2022-05-11 08:47:45 +03:00
|
|
|
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
2023-03-21 05:01:25 +03:00
|
|
|
void (*task_work_cb)(struct io_uring_cmd *, unsigned));
|
2020-09-19 05:41:00 +03:00
|
|
|
struct sock *io_uring_get_socket(struct file *file);
|
2021-08-12 07:14:35 +03:00
|
|
|
void __io_uring_cancel(bool cancel_all);
|
2020-09-13 22:09:39 +03:00
|
|
|
void __io_uring_free(struct task_struct *tsk);
|
io_uring: add support for registering ring file descriptors
Lots of workloads use multiple threads, in which case the file table is
shared between them. This makes getting and putting the ring file
descriptor for each io_uring_enter(2) system call more expensive, as it
involves an atomic get and put for each call.
Similarly to how we allow registering normal file descriptors to avoid
this overhead, add support for an io_uring_register(2) API that allows
to register the ring fds themselves:
1) IORING_REGISTER_RING_FDS - takes an array of io_uring_rsrc_update
structs, and registers them with the task.
2) IORING_UNREGISTER_RING_FDS - takes an array of io_uring_src_update
structs, and unregisters them.
When a ring fd is registered, it is internally represented by an offset.
This offset is returned to the application, and the application then
uses this offset and sets IORING_ENTER_REGISTERED_RING for the
io_uring_enter(2) system call. This works just like using a registered
file descriptor, rather than a real one, in an SQE, where
IOSQE_FIXED_FILE gets set to tell io_uring that we're using an internal
offset/descriptor rather than a real file descriptor.
In initial testing, this provides a nice bump in performance for
threaded applications in real world cases where the batch count (eg
number of requests submitted per io_uring_enter(2) invocation) is low.
In a microbenchmark, submitting NOP requests, we see the following
increases in performance:
Requests per syscall Baseline Registered Increase
----------------------------------------------------------------
1 ~7030K ~8080K +15%
2 ~13120K ~14800K +13%
4 ~22740K ~25300K +11%
Co-developed-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-03-04 18:22:22 +03:00
|
|
|
void io_uring_unreg_ringfd(void);
|
2022-04-26 11:29:05 +03:00
|
|
|
const char *io_uring_get_opcode(u8 opcode);
|
2020-09-13 22:09:39 +03:00
|
|
|
|
2021-08-12 07:14:35 +03:00
|
|
|
static inline void io_uring_files_cancel(void)
|
2020-09-13 22:09:39 +03:00
|
|
|
{
|
io_uring: add support for registering ring file descriptors
Lots of workloads use multiple threads, in which case the file table is
shared between them. This makes getting and putting the ring file
descriptor for each io_uring_enter(2) system call more expensive, as it
involves an atomic get and put for each call.
Similarly to how we allow registering normal file descriptors to avoid
this overhead, add support for an io_uring_register(2) API that allows
to register the ring fds themselves:
1) IORING_REGISTER_RING_FDS - takes an array of io_uring_rsrc_update
structs, and registers them with the task.
2) IORING_UNREGISTER_RING_FDS - takes an array of io_uring_src_update
structs, and unregisters them.
When a ring fd is registered, it is internally represented by an offset.
This offset is returned to the application, and the application then
uses this offset and sets IORING_ENTER_REGISTERED_RING for the
io_uring_enter(2) system call. This works just like using a registered
file descriptor, rather than a real one, in an SQE, where
IOSQE_FIXED_FILE gets set to tell io_uring that we're using an internal
offset/descriptor rather than a real file descriptor.
In initial testing, this provides a nice bump in performance for
threaded applications in real world cases where the batch count (eg
number of requests submitted per io_uring_enter(2) invocation) is low.
In a microbenchmark, submitting NOP requests, we see the following
increases in performance:
Requests per syscall Baseline Registered Increase
----------------------------------------------------------------
1 ~7030K ~8080K +15%
2 ~13120K ~14800K +13%
4 ~22740K ~25300K +11%
Co-developed-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-03-04 18:22:22 +03:00
|
|
|
if (current->io_uring) {
|
|
|
|
io_uring_unreg_ringfd();
|
2021-08-12 07:14:35 +03:00
|
|
|
__io_uring_cancel(false);
|
io_uring: add support for registering ring file descriptors
Lots of workloads use multiple threads, in which case the file table is
shared between them. This makes getting and putting the ring file
descriptor for each io_uring_enter(2) system call more expensive, as it
involves an atomic get and put for each call.
Similarly to how we allow registering normal file descriptors to avoid
this overhead, add support for an io_uring_register(2) API that allows
to register the ring fds themselves:
1) IORING_REGISTER_RING_FDS - takes an array of io_uring_rsrc_update
structs, and registers them with the task.
2) IORING_UNREGISTER_RING_FDS - takes an array of io_uring_src_update
structs, and unregisters them.
When a ring fd is registered, it is internally represented by an offset.
This offset is returned to the application, and the application then
uses this offset and sets IORING_ENTER_REGISTERED_RING for the
io_uring_enter(2) system call. This works just like using a registered
file descriptor, rather than a real one, in an SQE, where
IOSQE_FIXED_FILE gets set to tell io_uring that we're using an internal
offset/descriptor rather than a real file descriptor.
In initial testing, this provides a nice bump in performance for
threaded applications in real world cases where the batch count (eg
number of requests submitted per io_uring_enter(2) invocation) is low.
In a microbenchmark, submitting NOP requests, we see the following
increases in performance:
Requests per syscall Baseline Registered Increase
----------------------------------------------------------------
1 ~7030K ~8080K +15%
2 ~13120K ~14800K +13%
4 ~22740K ~25300K +11%
Co-developed-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2022-03-04 18:22:22 +03:00
|
|
|
}
|
2020-09-13 22:09:39 +03:00
|
|
|
}
|
2021-04-11 03:46:27 +03:00
|
|
|
static inline void io_uring_task_cancel(void)
|
2020-09-13 22:09:39 +03:00
|
|
|
{
|
2021-08-12 07:14:34 +03:00
|
|
|
if (current->io_uring)
|
2021-08-12 07:14:35 +03:00
|
|
|
__io_uring_cancel(true);
|
2020-09-13 22:09:39 +03:00
|
|
|
}
|
|
|
|
static inline void io_uring_free(struct task_struct *tsk)
|
|
|
|
{
|
|
|
|
if (tsk->io_uring)
|
|
|
|
__io_uring_free(tsk);
|
|
|
|
}
|
io_uring: Pass whole sqe to commands
Currently uring CMD operation relies on having large SQEs, but future
operations might want to use normal SQE.
The io_uring_cmd currently only saves the payload (cmd) part of the SQE,
but, for commands that use normal SQE size, it might be necessary to
access the initial SQE fields outside of the payload/cmd block. So,
saves the whole SQE other than just the pdu.
This changes slightly how the io_uring_cmd works, since the cmd
structures and callbacks are not opaque to io_uring anymore. I.e, the
callbacks can look at the SQE entries, not only, in the cmd structure.
The main advantage is that we don't need to create custom structures for
simple commands.
Creates io_uring_sqe_cmd() that returns the cmd private data as a null
pointer and avoids casting in the callee side.
Also, make most of ublk_drv's sqe->cmd priv structure into const, and use
io_uring_sqe_cmd() to get the private structure, removing the unwanted
cast. (There is one case where the cast is still needed since the
header->{len,addr} is updated in the private structure)
Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20230504121856.904491-3-leitao@debian.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2023-05-04 15:18:55 +03:00
|
|
|
|
|
|
|
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
|
|
|
|
{
|
|
|
|
return sqe->cmd;
|
|
|
|
}
|
2020-09-13 22:09:39 +03:00
|
|
|
#else
|
2022-10-04 15:39:10 +03:00
|
|
|
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
2022-09-30 09:27:38 +03:00
|
|
|
struct iov_iter *iter, void *ioucmd)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
2022-05-11 08:47:45 +03:00
|
|
|
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
|
2023-03-21 05:01:25 +03:00
|
|
|
ssize_t ret2, unsigned issue_flags)
|
2022-05-11 08:47:45 +03:00
|
|
|
{
|
|
|
|
}
|
|
|
|
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
2023-03-21 05:01:25 +03:00
|
|
|
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
2022-05-11 08:47:45 +03:00
|
|
|
{
|
|
|
|
}
|
2020-09-19 05:41:00 +03:00
|
|
|
static inline struct sock *io_uring_get_socket(struct file *file)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
2020-09-13 22:09:39 +03:00
|
|
|
static inline void io_uring_task_cancel(void)
|
|
|
|
{
|
|
|
|
}
|
2021-08-12 07:14:35 +03:00
|
|
|
static inline void io_uring_files_cancel(void)
|
2020-09-13 22:09:39 +03:00
|
|
|
{
|
|
|
|
}
|
|
|
|
static inline void io_uring_free(struct task_struct *tsk)
|
|
|
|
{
|
|
|
|
}
|
2022-04-26 11:29:05 +03:00
|
|
|
static inline const char *io_uring_get_opcode(u8 opcode)
|
|
|
|
{
|
|
|
|
return "";
|
|
|
|
}
|
2020-09-13 22:09:39 +03:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|