io_uring: do ctx initiated file note removal

Another preparation patch. When full quiesce is done on ctx exit, use
task_work infra to remove corresponding to the ctx io_uring->xa entries.
For that we use the back tctx map. Also use ->in_idle to prevent
removing it while we traversing ->xa on cancellation, just ignore it.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov 2021-03-06 11:02:13 +00:00 коммит произвёл Jens Axboe
Родитель 13bf43f5f4
Коммит d56d938b4b
1 изменённых файлов: 46 добавлений и 2 удалений

Просмотреть файл

@ -987,6 +987,7 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_UNLINKAT] = {},
};
static void io_uring_del_task_file(unsigned long index);
static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct task_struct *task,
struct files_struct *files);
@ -8536,10 +8537,33 @@ static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
return executed;
}
struct io_tctx_exit {
struct callback_head task_work;
struct completion completion;
unsigned long index;
};
static void io_tctx_exit_cb(struct callback_head *cb)
{
struct io_uring_task *tctx = current->io_uring;
struct io_tctx_exit *work;
work = container_of(cb, struct io_tctx_exit, task_work);
/*
* When @in_idle, we're in cancellation and it's racy to remove the
* node. It'll be removed by the end of cancellation, just ignore it.
*/
if (!atomic_read(&tctx->in_idle))
io_uring_del_task_file(work->index);
complete(&work->completion);
}
static void io_ring_exit_work(struct work_struct *work)
{
struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
exit_work);
struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
struct io_tctx_exit exit;
struct io_tctx_node *node;
int ret;
/*
* If we're doing polled IO and end up having requests being
@ -8550,6 +8574,26 @@ static void io_ring_exit_work(struct work_struct *work)
do {
io_uring_try_cancel_requests(ctx, NULL, NULL);
} while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
mutex_lock(&ctx->uring_lock);
while (!list_empty(&ctx->tctx_list)) {
node = list_first_entry(&ctx->tctx_list, struct io_tctx_node,
ctx_node);
exit.index = (unsigned long)node->file;
init_completion(&exit.completion);
init_task_work(&exit.task_work, io_tctx_exit_cb);
ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL);
if (WARN_ON_ONCE(ret))
continue;
wake_up_process(node->task);
mutex_unlock(&ctx->uring_lock);
wait_for_completion(&exit.completion);
cond_resched();
mutex_lock(&ctx->uring_lock);
}
mutex_unlock(&ctx->uring_lock);
io_ring_ctx_free(ctx);
}