block/rq_qos: protect rq_qos apis with a new lock
commit50e34d7881
("block: disable the elevator int del_gendisk") move rq_qos_exit() from disk_release() to del_gendisk(), this will introduce some problems: 1) If rq_qos_add() is triggered by enabling iocost/iolatency through cgroupfs, then it can concurrent with del_gendisk(), it's not safe to write 'q->rq_qos' concurrently. 2) Activate cgroup policy that is relied on rq_qos will call rq_qos_add() and blkcg_activate_policy(), and if rq_qos_exit() is called in the middle, null-ptr-dereference will be triggered in blkcg_activate_policy(). 3) blkg_conf_open_bdev() can call blkdev_get_no_open() first to find the disk, then if rq_qos_exit() from del_gendisk() is done before rq_qos_add(), then memory will be leaked. This patch add a new disk level mutex 'rq_qos_mutex': 1) The lock will protect rq_qos_exit() directly. 2) For wbt that doesn't relied on blk-cgroup, rq_qos_add() can only be called from disk initialization for now because wbt can't be destructed until rq_qos_exit(), so it's safe not to protect wbt for now. Hoever, in case that rq_qos dynamically destruction is supported in the furture, this patch also protect rq_qos_add() from wbt_init() directly, this is enough because blk-sysfs already synchronize writers with disk removal. 3) For iocost and iolatency, in order to synchronize disk removal and cgroup configuration, the lock is held after blkdev_get_no_open() from blkg_conf_open_bdev(), and is released in blkg_conf_exit(). In order to fix the above memory leak, disk_live() is checked after holding the new lock. Fixes:50e34d7881
("block: disable the elevator int del_gendisk") Signed-off-by: Yu Kuai <yukuai3@huawei.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20230414084008.2085155-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Родитель
712fd23a90
Коммит
a13bd91be2
|
@ -748,6 +748,13 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mutex_lock(&bdev->bd_queue->rq_qos_mutex);
|
||||||
|
if (!disk_live(bdev->bd_disk)) {
|
||||||
|
blkdev_put_no_open(bdev);
|
||||||
|
mutex_unlock(&bdev->bd_queue->rq_qos_mutex);
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
ctx->body = input;
|
ctx->body = input;
|
||||||
ctx->bdev = bdev;
|
ctx->bdev = bdev;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -892,6 +899,7 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
|
||||||
*/
|
*/
|
||||||
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
|
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
|
||||||
__releases(&ctx->bdev->bd_queue->queue_lock)
|
__releases(&ctx->bdev->bd_queue->queue_lock)
|
||||||
|
__releases(&ctx->bdev->bd_queue->rq_qos_mutex)
|
||||||
{
|
{
|
||||||
if (ctx->blkg) {
|
if (ctx->blkg) {
|
||||||
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
|
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
|
||||||
|
@ -899,6 +907,7 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx->bdev) {
|
if (ctx->bdev) {
|
||||||
|
mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);
|
||||||
blkdev_put_no_open(ctx->bdev);
|
blkdev_put_no_open(ctx->bdev);
|
||||||
ctx->body = NULL;
|
ctx->body = NULL;
|
||||||
ctx->bdev = NULL;
|
ctx->bdev = NULL;
|
||||||
|
|
|
@ -420,6 +420,7 @@ struct request_queue *blk_alloc_queue(int node_id)
|
||||||
mutex_init(&q->debugfs_mutex);
|
mutex_init(&q->debugfs_mutex);
|
||||||
mutex_init(&q->sysfs_lock);
|
mutex_init(&q->sysfs_lock);
|
||||||
mutex_init(&q->sysfs_dir_lock);
|
mutex_init(&q->sysfs_dir_lock);
|
||||||
|
mutex_init(&q->rq_qos_mutex);
|
||||||
spin_lock_init(&q->queue_lock);
|
spin_lock_init(&q->queue_lock);
|
||||||
|
|
||||||
init_waitqueue_head(&q->mq_freeze_wq);
|
init_waitqueue_head(&q->mq_freeze_wq);
|
||||||
|
|
|
@ -288,11 +288,13 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
|
||||||
|
|
||||||
void rq_qos_exit(struct request_queue *q)
|
void rq_qos_exit(struct request_queue *q)
|
||||||
{
|
{
|
||||||
|
mutex_lock(&q->rq_qos_mutex);
|
||||||
while (q->rq_qos) {
|
while (q->rq_qos) {
|
||||||
struct rq_qos *rqos = q->rq_qos;
|
struct rq_qos *rqos = q->rq_qos;
|
||||||
q->rq_qos = rqos->next;
|
q->rq_qos = rqos->next;
|
||||||
rqos->ops->exit(rqos);
|
rqos->ops->exit(rqos);
|
||||||
}
|
}
|
||||||
|
mutex_unlock(&q->rq_qos_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
|
int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
|
||||||
|
@ -300,6 +302,8 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
|
||||||
{
|
{
|
||||||
struct request_queue *q = disk->queue;
|
struct request_queue *q = disk->queue;
|
||||||
|
|
||||||
|
lockdep_assert_held(&q->rq_qos_mutex);
|
||||||
|
|
||||||
rqos->disk = disk;
|
rqos->disk = disk;
|
||||||
rqos->id = id;
|
rqos->id = id;
|
||||||
rqos->ops = ops;
|
rqos->ops = ops;
|
||||||
|
@ -307,18 +311,13 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
|
||||||
/*
|
/*
|
||||||
* No IO can be in-flight when adding rqos, so freeze queue, which
|
* No IO can be in-flight when adding rqos, so freeze queue, which
|
||||||
* is fine since we only support rq_qos for blk-mq queue.
|
* is fine since we only support rq_qos for blk-mq queue.
|
||||||
*
|
|
||||||
* Reuse ->queue_lock for protecting against other concurrent
|
|
||||||
* rq_qos adding/deleting
|
|
||||||
*/
|
*/
|
||||||
blk_mq_freeze_queue(q);
|
blk_mq_freeze_queue(q);
|
||||||
|
|
||||||
spin_lock_irq(&q->queue_lock);
|
|
||||||
if (rq_qos_id(q, rqos->id))
|
if (rq_qos_id(q, rqos->id))
|
||||||
goto ebusy;
|
goto ebusy;
|
||||||
rqos->next = q->rq_qos;
|
rqos->next = q->rq_qos;
|
||||||
q->rq_qos = rqos;
|
q->rq_qos = rqos;
|
||||||
spin_unlock_irq(&q->queue_lock);
|
|
||||||
|
|
||||||
blk_mq_unfreeze_queue(q);
|
blk_mq_unfreeze_queue(q);
|
||||||
|
|
||||||
|
@ -330,7 +329,6 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
ebusy:
|
ebusy:
|
||||||
spin_unlock_irq(&q->queue_lock);
|
|
||||||
blk_mq_unfreeze_queue(q);
|
blk_mq_unfreeze_queue(q);
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
|
@ -340,21 +338,15 @@ void rq_qos_del(struct rq_qos *rqos)
|
||||||
struct request_queue *q = rqos->disk->queue;
|
struct request_queue *q = rqos->disk->queue;
|
||||||
struct rq_qos **cur;
|
struct rq_qos **cur;
|
||||||
|
|
||||||
/*
|
lockdep_assert_held(&q->rq_qos_mutex);
|
||||||
* See comment in rq_qos_add() about freezing queue & using
|
|
||||||
* ->queue_lock.
|
|
||||||
*/
|
|
||||||
blk_mq_freeze_queue(q);
|
|
||||||
|
|
||||||
spin_lock_irq(&q->queue_lock);
|
blk_mq_freeze_queue(q);
|
||||||
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
|
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
|
||||||
if (*cur == rqos) {
|
if (*cur == rqos) {
|
||||||
*cur = rqos->next;
|
*cur = rqos->next;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spin_unlock_irq(&q->queue_lock);
|
|
||||||
|
|
||||||
blk_mq_unfreeze_queue(q);
|
blk_mq_unfreeze_queue(q);
|
||||||
|
|
||||||
mutex_lock(&q->debugfs_mutex);
|
mutex_lock(&q->debugfs_mutex);
|
||||||
|
|
|
@ -942,7 +942,9 @@ int wbt_init(struct gendisk *disk)
|
||||||
/*
|
/*
|
||||||
* Assign rwb and add the stats callback.
|
* Assign rwb and add the stats callback.
|
||||||
*/
|
*/
|
||||||
|
mutex_lock(&q->rq_qos_mutex);
|
||||||
ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
|
ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
|
||||||
|
mutex_unlock(&q->rq_qos_mutex);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err_free;
|
goto err_free;
|
||||||
|
|
||||||
|
|
|
@ -392,6 +392,7 @@ struct request_queue {
|
||||||
|
|
||||||
struct blk_queue_stats *stats;
|
struct blk_queue_stats *stats;
|
||||||
struct rq_qos *rq_qos;
|
struct rq_qos *rq_qos;
|
||||||
|
struct mutex rq_qos_mutex;
|
||||||
|
|
||||||
const struct blk_mq_ops *mq_ops;
|
const struct blk_mq_ops *mq_ops;
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче