If we're going to ever support multiple types of resources we need
shared rsrc nodes to not bloat requests, that is implemented in this
patch. It also gives a nicer API and saves one pointer dereference
in io_req_set_rsrc_node().

We may say that all requests bound to a resource belong to one and only
one rsrc node, and considering that nodes are removed and recycled
strictly in-order, this separates requests into generations, where
generation are changed on each node switch (i.e. io_rsrc_node_switch()).

The API is simple, io_rsrc_node_switch() switches to a new generation if
needed, and also optionally kills a passed in io_rsrc_data. Each call to
io_rsrc_node_switch() have to be preceded with
io_rsrc_node_switch_start(). The start function is idempotent and should
not necessarily be followed by switch.

One difference is that once a node was set it will always retain a valid
rsrc node, even on unregister. It may be a nuisance at the moment, but
makes much sense for multiple types of resources. Another thing changed
is that nodes are bound to/associated with a io_rsrc_data later just
before killing (i.e. switching).

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7e9c693b4b9a2f47aa784b616ce29843021bb65a.1617287883.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov 2021-04-01 15:43:46 +01:00 коммит произвёл Jens Axboe
Родитель e7c78371bb
Коммит a7f0ed5acd
1 изменённых файлов: 38 добавлений и 37 удалений

Просмотреть файл

@ -235,7 +235,6 @@ struct io_rsrc_data {
struct io_ring_ctx *ctx;
rsrc_put_fn *do_put;
struct io_rsrc_node *node;
struct percpu_ref refs;
struct completion done;
bool quiesce;
@ -448,6 +447,7 @@ struct io_ring_ctx {
struct llist_head rsrc_put_llist;
struct list_head rsrc_ref_list;
spinlock_t rsrc_ref_lock;
struct io_rsrc_node *rsrc_node;
struct io_rsrc_node *rsrc_backup_node;
struct io_restriction restrictions;
@ -1080,7 +1080,7 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
if (!req->fixed_rsrc_refs) {
req->fixed_rsrc_refs = &ctx->file_data->node->refs;
req->fixed_rsrc_refs = &ctx->rsrc_node->refs;
percpu_ref_get(req->fixed_rsrc_refs);
}
}
@ -7093,36 +7093,32 @@ static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx)
spin_unlock_bh(&ctx->rsrc_ref_lock);
}
static void io_rsrc_node_set(struct io_ring_ctx *ctx,
struct io_rsrc_data *rsrc_data)
static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
struct io_rsrc_data *data_to_kill)
{
struct io_rsrc_node *rsrc_node = ctx->rsrc_backup_node;
WARN_ON_ONCE(!ctx->rsrc_backup_node);
WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node);
WARN_ON_ONCE(!rsrc_node);
if (data_to_kill) {
struct io_rsrc_node *rsrc_node = ctx->rsrc_node;
ctx->rsrc_backup_node = NULL;
rsrc_node->rsrc_data = rsrc_data;
rsrc_node->rsrc_data = data_to_kill;
io_rsrc_ref_lock(ctx);
list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
io_rsrc_ref_unlock(ctx);
io_rsrc_ref_lock(ctx);
rsrc_data->node = rsrc_node;
list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
io_rsrc_ref_unlock(ctx);
percpu_ref_get(&rsrc_data->refs);
percpu_ref_get(&data_to_kill->refs);
percpu_ref_kill(&rsrc_node->refs);
ctx->rsrc_node = NULL;
}
if (!ctx->rsrc_node) {
ctx->rsrc_node = ctx->rsrc_backup_node;
ctx->rsrc_backup_node = NULL;
}
}
static void io_rsrc_node_kill(struct io_ring_ctx *ctx, struct io_rsrc_data *data)
{
struct io_rsrc_node *ref_node = NULL;
io_rsrc_ref_lock(ctx);
ref_node = data->node;
data->node = NULL;
io_rsrc_ref_unlock(ctx);
if (ref_node)
percpu_ref_kill(&ref_node->refs);
}
static int io_rsrc_node_prealloc(struct io_ring_ctx *ctx)
static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
{
if (ctx->rsrc_backup_node)
return 0;
@ -7139,10 +7135,11 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
data->quiesce = true;
do {
ret = io_rsrc_node_prealloc(ctx);
ret = io_rsrc_node_switch_start(ctx);
if (ret)
break;
io_rsrc_node_kill(ctx, data);
io_rsrc_node_switch(ctx, data);
percpu_ref_kill(&data->refs);
flush_delayed_work(&ctx->rsrc_put_work);
@ -7151,7 +7148,6 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
break;
percpu_ref_resurrect(&data->refs);
io_rsrc_node_set(ctx, data);
reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
@ -7632,7 +7628,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL;
if (nr_args > IORING_MAX_FIXED_FILES)
return -EMFILE;
ret = io_rsrc_node_prealloc(ctx);
ret = io_rsrc_node_switch_start(ctx);
if (ret)
return ret;
@ -7694,7 +7690,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
io_rsrc_node_set(ctx, file_data);
io_rsrc_node_switch(ctx, NULL);
return ret;
out_fput:
for (i = 0; i < ctx->nr_user_files; i++) {
@ -7783,7 +7779,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
return -EOVERFLOW;
if (done > ctx->nr_user_files)
return -EINVAL;
err = io_rsrc_node_prealloc(ctx);
err = io_rsrc_node_switch_start(ctx);
if (err)
return err;
@ -7802,7 +7798,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (*file_slot) {
file = (struct file *) ((unsigned long) *file_slot & FFS_MASK);
err = io_queue_rsrc_removal(data, data->node, file);
err = io_queue_rsrc_removal(data, ctx->rsrc_node, file);
if (err)
break;
*file_slot = NULL;
@ -7837,10 +7833,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
}
}
if (needs_switch) {
percpu_ref_kill(&data->node->refs);
io_rsrc_node_set(ctx, data);
}
if (needs_switch)
io_rsrc_node_switch(ctx, data);
return done ? done : err;
}
@ -8514,8 +8508,15 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_eventfd_unregister(ctx);
io_destroy_buffers(ctx);
/* there are no registered resources left, nobody uses it */
if (ctx->rsrc_node)
io_rsrc_node_destroy(ctx->rsrc_node);
if (ctx->rsrc_backup_node)
io_rsrc_node_destroy(ctx->rsrc_backup_node);
flush_delayed_work(&ctx->rsrc_put_work);
WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {