NVMe: Add a kthread to handle the congestion list
Instead of trying to resubmit I/Os in the I/O completion path (in interrupt context), wake up a kthread which will resubmit I/O from user context. This allows mke2fs to run to completion. Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
This commit is contained in:
Родитель
eeee322647
Коммит
1fa6aeadf1
|
@ -26,6 +26,7 @@
|
||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
#include <linux/io.h>
|
#include <linux/io.h>
|
||||||
#include <linux/kdev_t.h>
|
#include <linux/kdev_t.h>
|
||||||
|
#include <linux/kthread.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
|
@ -50,10 +51,15 @@ module_param(nvme_major, int, 0);
|
||||||
static int use_threaded_interrupts;
|
static int use_threaded_interrupts;
|
||||||
module_param(use_threaded_interrupts, int, 0);
|
module_param(use_threaded_interrupts, int, 0);
|
||||||
|
|
||||||
|
static DEFINE_SPINLOCK(dev_list_lock);
|
||||||
|
static LIST_HEAD(dev_list);
|
||||||
|
static struct task_struct *nvme_thread;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Represents an NVM Express device. Each nvme_dev is a PCI function.
|
* Represents an NVM Express device. Each nvme_dev is a PCI function.
|
||||||
*/
|
*/
|
||||||
struct nvme_dev {
|
struct nvme_dev {
|
||||||
|
struct list_head node;
|
||||||
struct nvme_queue **queues;
|
struct nvme_queue **queues;
|
||||||
u32 __iomem *dbs;
|
u32 __iomem *dbs;
|
||||||
struct pci_dev *pci_dev;
|
struct pci_dev *pci_dev;
|
||||||
|
@ -97,6 +103,7 @@ struct nvme_queue {
|
||||||
dma_addr_t sq_dma_addr;
|
dma_addr_t sq_dma_addr;
|
||||||
dma_addr_t cq_dma_addr;
|
dma_addr_t cq_dma_addr;
|
||||||
wait_queue_head_t sq_full;
|
wait_queue_head_t sq_full;
|
||||||
|
wait_queue_t sq_cong_wait;
|
||||||
struct bio_list sq_cong;
|
struct bio_list sq_cong;
|
||||||
u32 __iomem *q_db;
|
u32 __iomem *q_db;
|
||||||
u16 q_depth;
|
u16 q_depth;
|
||||||
|
@ -108,8 +115,6 @@ struct nvme_queue {
|
||||||
unsigned long cmdid_data[];
|
unsigned long cmdid_data[];
|
||||||
};
|
};
|
||||||
|
|
||||||
static void nvme_resubmit_bio(struct nvme_queue *nvmeq, struct bio *bio);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check we didin't inadvertently grow the command struct
|
* Check we didin't inadvertently grow the command struct
|
||||||
*/
|
*/
|
||||||
|
@ -309,9 +314,6 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
|
||||||
bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
|
bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
|
||||||
free_nbio(nvmeq, nbio);
|
free_nbio(nvmeq, nbio);
|
||||||
bio_endio(bio, status ? -EIO : 0);
|
bio_endio(bio, status ? -EIO : 0);
|
||||||
bio = bio_list_pop(&nvmeq->sq_cong);
|
|
||||||
if (bio)
|
|
||||||
nvme_resubmit_bio(nvmeq, bio);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* length is in bytes */
|
/* length is in bytes */
|
||||||
|
@ -482,16 +484,6 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_resubmit_bio(struct nvme_queue *nvmeq, struct bio *bio)
|
|
||||||
{
|
|
||||||
struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
|
|
||||||
if (nvme_submit_bio_queue(nvmeq, ns, bio))
|
|
||||||
bio_list_add_head(&nvmeq->sq_cong, bio);
|
|
||||||
else if (bio_list_empty(&nvmeq->sq_cong))
|
|
||||||
blk_clear_queue_congested(ns->queue, rw_is_sync(bio->bi_rw));
|
|
||||||
/* XXX: Need to duplicate the logic from __freed_request here */
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NB: return value of non-zero would mean that we were a stacking driver.
|
* NB: return value of non-zero would mean that we were a stacking driver.
|
||||||
* make_request must always succeed.
|
* make_request must always succeed.
|
||||||
|
@ -774,6 +766,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
|
||||||
nvmeq->cq_head = 0;
|
nvmeq->cq_head = 0;
|
||||||
nvmeq->cq_phase = 1;
|
nvmeq->cq_phase = 1;
|
||||||
init_waitqueue_head(&nvmeq->sq_full);
|
init_waitqueue_head(&nvmeq->sq_full);
|
||||||
|
init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread);
|
||||||
bio_list_init(&nvmeq->sq_cong);
|
bio_list_init(&nvmeq->sq_cong);
|
||||||
nvmeq->q_db = &dev->dbs[qid * 2];
|
nvmeq->q_db = &dev->dbs[qid * 2];
|
||||||
nvmeq->q_depth = depth;
|
nvmeq->q_depth = depth;
|
||||||
|
@ -1097,6 +1090,43 @@ static const struct block_device_operations nvme_fops = {
|
||||||
.ioctl = nvme_ioctl,
|
.ioctl = nvme_ioctl,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
|
||||||
|
{
|
||||||
|
while (bio_list_peek(&nvmeq->sq_cong)) {
|
||||||
|
struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
|
||||||
|
struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
|
||||||
|
if (nvme_submit_bio_queue(nvmeq, ns, bio)) {
|
||||||
|
bio_list_add_head(&nvmeq->sq_cong, bio);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvme_kthread(void *data)
|
||||||
|
{
|
||||||
|
struct nvme_dev *dev;
|
||||||
|
|
||||||
|
while (!kthread_should_stop()) {
|
||||||
|
__set_current_state(TASK_RUNNING);
|
||||||
|
spin_lock(&dev_list_lock);
|
||||||
|
list_for_each_entry(dev, &dev_list, node) {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < dev->queue_count; i++) {
|
||||||
|
struct nvme_queue *nvmeq = dev->queues[i];
|
||||||
|
spin_lock_irq(&nvmeq->q_lock);
|
||||||
|
if (nvme_process_cq(nvmeq))
|
||||||
|
printk("process_cq did something\n");
|
||||||
|
nvme_resubmit_bios(nvmeq);
|
||||||
|
spin_unlock_irq(&nvmeq->q_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&dev_list_lock);
|
||||||
|
set_current_state(TASK_INTERRUPTIBLE);
|
||||||
|
schedule_timeout(HZ);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int index,
|
static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int index,
|
||||||
struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
|
struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
|
||||||
{
|
{
|
||||||
|
@ -1307,6 +1337,10 @@ static int nvme_dev_remove(struct nvme_dev *dev)
|
||||||
{
|
{
|
||||||
struct nvme_ns *ns, *next;
|
struct nvme_ns *ns, *next;
|
||||||
|
|
||||||
|
spin_lock(&dev_list_lock);
|
||||||
|
list_del(&dev->node);
|
||||||
|
spin_unlock(&dev_list_lock);
|
||||||
|
|
||||||
/* TODO: wait all I/O finished or cancel them */
|
/* TODO: wait all I/O finished or cancel them */
|
||||||
|
|
||||||
list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
|
list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
|
||||||
|
@ -1406,6 +1440,11 @@ static int __devinit nvme_probe(struct pci_dev *pdev,
|
||||||
result = nvme_dev_add(dev);
|
result = nvme_dev_add(dev);
|
||||||
if (result)
|
if (result)
|
||||||
goto delete;
|
goto delete;
|
||||||
|
|
||||||
|
spin_lock(&dev_list_lock);
|
||||||
|
list_add(&dev->node, &dev_list);
|
||||||
|
spin_unlock(&dev_list_lock);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
delete:
|
delete:
|
||||||
|
@ -1479,17 +1518,25 @@ static struct pci_driver nvme_driver = {
|
||||||
|
|
||||||
static int __init nvme_init(void)
|
static int __init nvme_init(void)
|
||||||
{
|
{
|
||||||
int result;
|
int result = -EBUSY;
|
||||||
|
|
||||||
|
nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
|
||||||
|
if (IS_ERR(nvme_thread))
|
||||||
|
return PTR_ERR(nvme_thread);
|
||||||
|
|
||||||
nvme_major = register_blkdev(nvme_major, "nvme");
|
nvme_major = register_blkdev(nvme_major, "nvme");
|
||||||
if (nvme_major <= 0)
|
if (nvme_major <= 0)
|
||||||
return -EBUSY;
|
goto kill_kthread;
|
||||||
|
|
||||||
result = pci_register_driver(&nvme_driver);
|
result = pci_register_driver(&nvme_driver);
|
||||||
if (!result)
|
if (result)
|
||||||
return 0;
|
goto unregister_blkdev;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
unregister_blkdev:
|
||||||
unregister_blkdev(nvme_major, "nvme");
|
unregister_blkdev(nvme_major, "nvme");
|
||||||
|
kill_kthread:
|
||||||
|
kthread_stop(nvme_thread);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1497,6 +1544,7 @@ static void __exit nvme_exit(void)
|
||||||
{
|
{
|
||||||
pci_unregister_driver(&nvme_driver);
|
pci_unregister_driver(&nvme_driver);
|
||||||
unregister_blkdev(nvme_major, "nvme");
|
unregister_blkdev(nvme_major, "nvme");
|
||||||
|
kthread_stop(nvme_thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
|
MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
|
||||||
|
|
Загрузка…
Ссылка в новой задаче