There's currently no way to experiment with polled IO with null_blk,
which seems like an oversight. This patch adds support for polled IO.
We keep a list of issued IOs on submit, and then process that list
when mq_ops->poll() is invoked.

A new parameter is added, poll_queues. It defaults to 1 like the
submit queues, meaning we'll have 1 poll queue available.

Fixes-by: Bart Van Assche <bvanassche@acm.org>
Fixes-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/baca710d-0f2a-16e2-60bd-b105b854e0ae@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2021-04-17 09:29:49 -06:00
Родитель 4f5022453a
Коммит 0a593fbbc2
2 изменённых файлов: 108 добавлений и 4 удалений

Просмотреть файл

@ -92,6 +92,10 @@ static int g_submit_queues = 1;
module_param_named(submit_queues, g_submit_queues, int, 0444); module_param_named(submit_queues, g_submit_queues, int, 0444);
MODULE_PARM_DESC(submit_queues, "Number of submission queues"); MODULE_PARM_DESC(submit_queues, "Number of submission queues");
static int g_poll_queues = 1;
module_param_named(poll_queues, g_poll_queues, int, 0444);
MODULE_PARM_DESC(poll_queues, "Number of IOPOLL submission queues");
static int g_home_node = NUMA_NO_NODE; static int g_home_node = NUMA_NO_NODE;
module_param_named(home_node, g_home_node, int, 0444); module_param_named(home_node, g_home_node, int, 0444);
MODULE_PARM_DESC(home_node, "Home node for the device"); MODULE_PARM_DESC(home_node, "Home node for the device");
@ -347,6 +351,7 @@ static int nullb_apply_submit_queues(struct nullb_device *dev,
NULLB_DEVICE_ATTR(size, ulong, NULL); NULLB_DEVICE_ATTR(size, ulong, NULL);
NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL); NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues); NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_submit_queues);
NULLB_DEVICE_ATTR(home_node, uint, NULL); NULLB_DEVICE_ATTR(home_node, uint, NULL);
NULLB_DEVICE_ATTR(queue_mode, uint, NULL); NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
NULLB_DEVICE_ATTR(blocksize, uint, NULL); NULLB_DEVICE_ATTR(blocksize, uint, NULL);
@ -466,6 +471,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_size, &nullb_device_attr_size,
&nullb_device_attr_completion_nsec, &nullb_device_attr_completion_nsec,
&nullb_device_attr_submit_queues, &nullb_device_attr_submit_queues,
&nullb_device_attr_poll_queues,
&nullb_device_attr_home_node, &nullb_device_attr_home_node,
&nullb_device_attr_queue_mode, &nullb_device_attr_queue_mode,
&nullb_device_attr_blocksize, &nullb_device_attr_blocksize,
@ -593,6 +599,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->size = g_gb * 1024; dev->size = g_gb * 1024;
dev->completion_nsec = g_completion_nsec; dev->completion_nsec = g_completion_nsec;
dev->submit_queues = g_submit_queues; dev->submit_queues = g_submit_queues;
dev->poll_queues = g_poll_queues;
dev->home_node = g_home_node; dev->home_node = g_home_node;
dev->queue_mode = g_queue_mode; dev->queue_mode = g_queue_mode;
dev->blocksize = g_bs; dev->blocksize = g_bs;
@ -1454,12 +1461,80 @@ static bool should_requeue_request(struct request *rq)
return false; return false;
} }
static int null_map_queues(struct blk_mq_tag_set *set)
{
struct nullb *nullb = set->driver_data;
int i, qoff;
for (i = 0, qoff = 0; i < set->nr_maps; i++) {
struct blk_mq_queue_map *map = &set->map[i];
switch (i) {
case HCTX_TYPE_DEFAULT:
if (nullb)
map->nr_queues = nullb->dev->submit_queues;
else
map->nr_queues = g_submit_queues;
break;
case HCTX_TYPE_READ:
map->nr_queues = 0;
continue;
case HCTX_TYPE_POLL:
if (nullb)
map->nr_queues = nullb->dev->poll_queues;
else
map->nr_queues = g_poll_queues;
break;
}
map->queue_offset = qoff;
qoff += map->nr_queues;
blk_mq_map_queues(map);
}
return 0;
}
static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct nullb_queue *nq = hctx->driver_data;
LIST_HEAD(list);
int nr = 0;
spin_lock(&nq->poll_lock);
list_splice_init(&nq->poll_list, &list);
spin_unlock(&nq->poll_lock);
while (!list_empty(&list)) {
struct nullb_cmd *cmd;
struct request *req;
req = list_first_entry(&list, struct request, queuelist);
list_del_init(&req->queuelist);
cmd = blk_mq_rq_to_pdu(req);
cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req),
blk_rq_sectors(req));
end_cmd(cmd);
nr++;
}
return nr;
}
static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
{ {
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
pr_info("rq %p timed out\n", rq); pr_info("rq %p timed out\n", rq);
if (hctx->type == HCTX_TYPE_POLL) {
struct nullb_queue *nq = hctx->driver_data;
spin_lock(&nq->poll_lock);
list_del_init(&rq->queuelist);
spin_unlock(&nq->poll_lock);
}
/* /*
* If the device is marked as blocking (i.e. memory backed or zoned * If the device is marked as blocking (i.e. memory backed or zoned
* device), the submission path may be blocked waiting for resources * device), the submission path may be blocked waiting for resources
@ -1480,10 +1555,11 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nullb_queue *nq = hctx->driver_data; struct nullb_queue *nq = hctx->driver_data;
sector_t nr_sectors = blk_rq_sectors(bd->rq); sector_t nr_sectors = blk_rq_sectors(bd->rq);
sector_t sector = blk_rq_pos(bd->rq); sector_t sector = blk_rq_pos(bd->rq);
const bool is_poll = hctx->type == HCTX_TYPE_POLL;
might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
if (nq->dev->irqmode == NULL_IRQ_TIMER) { if (!is_poll && nq->dev->irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired; cmd->timer.function = null_cmd_timer_expired;
} }
@ -1507,6 +1583,13 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_OK; return BLK_STS_OK;
} }
} }
if (is_poll) {
spin_lock(&nq->poll_lock);
list_add_tail(&bd->rq->queuelist, &nq->poll_list);
spin_unlock(&nq->poll_lock);
return BLK_STS_OK;
}
if (cmd->fake_timeout) if (cmd->fake_timeout)
return BLK_STS_OK; return BLK_STS_OK;
@ -1542,6 +1625,8 @@ static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
init_waitqueue_head(&nq->wait); init_waitqueue_head(&nq->wait);
nq->queue_depth = nullb->queue_depth; nq->queue_depth = nullb->queue_depth;
nq->dev = nullb->dev; nq->dev = nullb->dev;
INIT_LIST_HEAD(&nq->poll_list);
spin_lock_init(&nq->poll_lock);
} }
static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
@ -1567,6 +1652,8 @@ static const struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq, .queue_rq = null_queue_rq,
.complete = null_complete_rq, .complete = null_complete_rq,
.timeout = null_timeout_rq, .timeout = null_timeout_rq,
.poll = null_poll,
.map_queues = null_map_queues,
.init_hctx = null_init_hctx, .init_hctx = null_init_hctx,
.exit_hctx = null_exit_hctx, .exit_hctx = null_exit_hctx,
}; };
@ -1663,13 +1750,17 @@ static int setup_commands(struct nullb_queue *nq)
static int setup_queues(struct nullb *nullb) static int setup_queues(struct nullb *nullb)
{ {
nullb->queues = kcalloc(nr_cpu_ids, sizeof(struct nullb_queue), int nqueues = nr_cpu_ids;
if (g_poll_queues)
nqueues += g_poll_queues;
nullb->queues = kcalloc(nqueues, sizeof(struct nullb_queue),
GFP_KERNEL); GFP_KERNEL);
if (!nullb->queues) if (!nullb->queues)
return -ENOMEM; return -ENOMEM;
nullb->queue_depth = nullb->dev->hw_queue_depth; nullb->queue_depth = nullb->dev->hw_queue_depth;
return 0; return 0;
} }
@ -1721,9 +1812,14 @@ static int null_gendisk_register(struct nullb *nullb)
static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
{ {
int poll_queues;
set->ops = &null_mq_ops; set->ops = &null_mq_ops;
set->nr_hw_queues = nullb ? nullb->dev->submit_queues : set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
g_submit_queues; g_submit_queues;
poll_queues = nullb ? nullb->dev->poll_queues : g_poll_queues;
if (poll_queues)
set->nr_hw_queues += poll_queues;
set->queue_depth = nullb ? nullb->dev->hw_queue_depth : set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
g_hw_queue_depth; g_hw_queue_depth;
set->numa_node = nullb ? nullb->dev->home_node : g_home_node; set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
@ -1733,7 +1829,11 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
set->flags |= BLK_MQ_F_NO_SCHED; set->flags |= BLK_MQ_F_NO_SCHED;
if (g_shared_tag_bitmap) if (g_shared_tag_bitmap)
set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
set->driver_data = NULL; set->driver_data = nullb;
if (g_poll_queues)
set->nr_maps = 3;
else
set->nr_maps = 1;
if ((nullb && nullb->dev->blocking) || g_blocking) if ((nullb && nullb->dev->blocking) || g_blocking)
set->flags |= BLK_MQ_F_BLOCKING; set->flags |= BLK_MQ_F_BLOCKING;

Просмотреть файл

@ -32,6 +32,9 @@ struct nullb_queue {
struct nullb_device *dev; struct nullb_device *dev;
unsigned int requeue_selection; unsigned int requeue_selection;
struct list_head poll_list;
spinlock_t poll_lock;
struct nullb_cmd *cmds; struct nullb_cmd *cmds;
}; };
@ -83,6 +86,7 @@ struct nullb_device {
unsigned int zone_max_open; /* max number of open zones */ unsigned int zone_max_open; /* max number of open zones */
unsigned int zone_max_active; /* max number of active zones */ unsigned int zone_max_active; /* max number of active zones */
unsigned int submit_queues; /* number of submission queues */ unsigned int submit_queues; /* number of submission queues */
unsigned int poll_queues; /* number of IOPOLL submission queues */
unsigned int home_node; /* home node for the device */ unsigned int home_node; /* home node for the device */
unsigned int queue_mode; /* block interface */ unsigned int queue_mode; /* block interface */
unsigned int blocksize; /* block size */ unsigned int blocksize; /* block size */