nvmet-loop: Avoid preallocating big SGL for data
nvme_loop_create_io_queues() preallocates a big buffer for the IO SGL based on SG_CHUNK_SIZE. Modern DMA engines are often capable of dealing with very big segments so the SG_CHUNK_SIZE is often too big. SG_CHUNK_SIZE results in a static 4KB SGL allocation per command. If a controller has lots of deep queues, preallocation for the sg list can consume substantial amounts of memory. For nvmet-loop, nr_hw_queues can be 128 and each queue's depth 128. This means the resulting preallocation for the data SGL is 128*128*4K = 64MB per controller. Switch to runtime allocation for SGL for lists longer than 2 entries. This is the approach used by NVMe PCI so it should be reasonable for NVMeOF as well. Runtime SGL allocation has always been the case for the legacy I/O path so this is nothing new. Tested-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> Reviewed-by: Max Gurtovoy <maxg@mellanox.com> Signed-off-by: Israel Rukshin <israelr@mellanox.com> Signed-off-by: Keith Busch <kbusch@kernel.org>
This commit is contained in:
Родитель
b1ae1a2389
Коммит
52e6d8ed16
|
@ -76,7 +76,7 @@ static void nvme_loop_complete_rq(struct request *req)
|
||||||
{
|
{
|
||||||
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
|
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
|
||||||
|
|
||||||
sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE);
|
sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT);
|
||||||
nvme_complete_rq(req);
|
nvme_complete_rq(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,7 +156,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||||
iod->sg_table.sgl = iod->first_sgl;
|
iod->sg_table.sgl = iod->first_sgl;
|
||||||
if (sg_alloc_table_chained(&iod->sg_table,
|
if (sg_alloc_table_chained(&iod->sg_table,
|
||||||
blk_rq_nr_phys_segments(req),
|
blk_rq_nr_phys_segments(req),
|
||||||
iod->sg_table.sgl, SG_CHUNK_SIZE)) {
|
iod->sg_table.sgl, NVME_INLINE_SG_CNT)) {
|
||||||
nvme_cleanup_cmd(req);
|
nvme_cleanup_cmd(req);
|
||||||
return BLK_STS_RESOURCE;
|
return BLK_STS_RESOURCE;
|
||||||
}
|
}
|
||||||
|
@ -342,7 +342,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
|
||||||
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
|
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
|
||||||
ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
|
ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
|
||||||
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
||||||
SG_CHUNK_SIZE * sizeof(struct scatterlist);
|
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
|
||||||
ctrl->admin_tag_set.driver_data = ctrl;
|
ctrl->admin_tag_set.driver_data = ctrl;
|
||||||
ctrl->admin_tag_set.nr_hw_queues = 1;
|
ctrl->admin_tag_set.nr_hw_queues = 1;
|
||||||
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
|
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
|
||||||
|
@ -516,7 +516,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
|
||||||
ctrl->tag_set.numa_node = NUMA_NO_NODE;
|
ctrl->tag_set.numa_node = NUMA_NO_NODE;
|
||||||
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||||
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
|
||||||
SG_CHUNK_SIZE * sizeof(struct scatterlist);
|
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
|
||||||
ctrl->tag_set.driver_data = ctrl;
|
ctrl->tag_set.driver_data = ctrl;
|
||||||
ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
|
ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
|
||||||
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
|
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче