From adce7e9856798d4883f42c3d8429123707fa34e8 Mon Sep 17 00:00:00 2001 From: Edmund Nadolski Date: Wed, 27 Nov 2019 10:17:43 -0700 Subject: [PATCH 01/81] nvme: remove unused return code from nvme_alloc_ns The return code of nvme_alloc_ns is never used, so change it to void. Reviewed-by: Christoph Hellwig Signed-off-by: Edmund Nadolski Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a4d8c90ee7cc..414076aaf52b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3480,7 +3480,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) return 0; } -static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) +static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; struct gendisk *disk; @@ -3490,13 +3490,11 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) - return -ENOMEM; + return; ns->queue = blk_mq_init_queue(ctrl->tagset); - if (IS_ERR(ns->queue)) { - ret = PTR_ERR(ns->queue); + if (IS_ERR(ns->queue)) goto out_free_ns; - } if (ctrl->opts && ctrl->opts->data_digest) ns->queue->backing_dev_info->capabilities @@ -3519,10 +3517,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (ret) goto out_free_queue; - if (id->ncap == 0) { - ret = -EINVAL; + if (id->ncap == 0) /* no namespace (legacy quirk) */ goto out_free_id; - } ret = nvme_init_ns_head(ns, nsid, id); if (ret) @@ -3531,10 +3527,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_set_disk_name(disk_name, ns, ctrl, &flags); disk = alloc_disk_node(0, node); - if (!disk) { - ret = -ENOMEM; + if (!disk) goto out_unlink_ns; - } disk->fops = &nvme_fops; disk->private_data = ns; @@ -3565,7 +3559,7 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); kfree(id); - return 0; + return; out_put_disk: put_disk(ns->disk); out_unlink_ns: @@ -3579,9 +3573,6 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) blk_cleanup_queue(ns->queue); out_free_ns: kfree(ns); - if (ret > 0) - ret = blk_status_to_errno(nvme_error_status(ret)); - return ret; } static void nvme_ns_remove(struct nvme_ns *ns) From 527123c7deafd5aa921773f739887d610d59b437 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 26 Jan 2020 12:35:44 -0800 Subject: [PATCH 02/81] nvmet: configfs code cleanup This is a pure code cleanup patch which does not change any functionality. This patch removes the extra lines, get rid of else which is duplicate for return. Reviewed-by: Christoph Hellwig Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/configfs.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 98613a45bd3b..403508a52e17 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -395,14 +395,12 @@ static ssize_t nvmet_ns_device_uuid_store(struct config_item *item, struct nvmet_subsys *subsys = ns->subsys; int ret = 0; - mutex_lock(&subsys->lock); if (ns->enabled) { ret = -EBUSY; goto out_unlock; } - if (uuid_parse(page, &ns->uuid)) ret = -EINVAL; @@ -815,10 +813,10 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item, (int)NVME_MAJOR(subsys->ver), (int)NVME_MINOR(subsys->ver), (int)NVME_TERTIARY(subsys->ver)); - else - return snprintf(page, PAGE_SIZE, "%d.%d\n", - (int)NVME_MAJOR(subsys->ver), - (int)NVME_MINOR(subsys->ver)); + + return snprintf(page, PAGE_SIZE, "%d.%d\n", + (int)NVME_MAJOR(subsys->ver), + (int)NVME_MINOR(subsys->ver)); } static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, @@ -828,7 +826,6 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, int major, minor, tertiary = 0; int ret; - ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary); if (ret != 2 && ret != 3) return -EINVAL; From 94a39d61f80fcd679debda11e1ca02b88d90e67e Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 30 Jan 2020 10:29:31 -0800 Subject: [PATCH 03/81] nvmet: make ctrl-id configurable This patch adds a new target subsys attribute which allows user to optionally specify target controller IDs which then used in the nvmet_execute_identify_ctrl() to fill up the nvme_id_ctrl structure. For example, when using a cluster setup with two nodes, with a dual ported NVMe drive and exporting the drive from both the nodes, The connection to the host fails due to the same controller ID and results in the following error message:- "nvme nvmeX: Duplicate cntlid XXX with nvmeX, rejecting" With this patch now user can partition the controller IDs for each subsystem by setting up the cntlid_min and cntlid_max. These values will be used at the time of the controller ID creation. By partitioning the ctrl-ids for each subsystem results in the unique ctrl-id space which avoids the collision. When new attribute is not specified target will fall back to original cntlid calculation method. Reviewed-by: Christoph Hellwig Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/configfs.c | 62 ++++++++++++++++++++++++++++++++++ drivers/nvme/target/core.c | 8 +++-- drivers/nvme/target/nvmet.h | 2 ++ 3 files changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 403508a52e17..71c50751b5a6 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -859,10 +859,72 @@ static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_subsys_, attr_serial); +static ssize_t nvmet_subsys_attr_cntlid_min_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_min); +} + +static ssize_t nvmet_subsys_attr_cntlid_min_store(struct config_item *item, + const char *page, size_t cnt) +{ + u16 cntlid_min; + + if (sscanf(page, "%hu\n", &cntlid_min) != 1) + return -EINVAL; + + if (cntlid_min == 0) + return -EINVAL; + + down_write(&nvmet_config_sem); + if (cntlid_min >= to_subsys(item)->cntlid_max) + goto out_unlock; + to_subsys(item)->cntlid_min = cntlid_min; + up_write(&nvmet_config_sem); + return cnt; + +out_unlock: + up_write(&nvmet_config_sem); + return -EINVAL; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_min); + +static ssize_t nvmet_subsys_attr_cntlid_max_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_max); +} + +static ssize_t nvmet_subsys_attr_cntlid_max_store(struct config_item *item, + const char *page, size_t cnt) +{ + u16 cntlid_max; + + if (sscanf(page, "%hu\n", &cntlid_max) != 1) + return -EINVAL; + + if (cntlid_max == 0) + return -EINVAL; + + down_write(&nvmet_config_sem); + if (cntlid_max <= to_subsys(item)->cntlid_min) + goto out_unlock; + to_subsys(item)->cntlid_max = cntlid_max; + up_write(&nvmet_config_sem); + return cnt; + +out_unlock: + up_write(&nvmet_config_sem); + return -EINVAL; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_max); + static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_version, &nvmet_subsys_attr_attr_serial, + &nvmet_subsys_attr_attr_cntlid_min, + &nvmet_subsys_attr_attr_cntlid_max, NULL, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 576de773b4db..48080c948692 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1289,8 +1289,11 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!ctrl->sqs) goto out_free_cqs; + if (subsys->cntlid_min > subsys->cntlid_max) + goto out_free_cqs; + ret = ida_simple_get(&cntlid_ida, - NVME_CNTLID_MIN, NVME_CNTLID_MAX, + subsys->cntlid_min, subsys->cntlid_max, GFP_KERNEL); if (ret < 0) { status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; @@ -1438,7 +1441,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, kfree(subsys); return ERR_PTR(-ENOMEM); } - + subsys->cntlid_min = NVME_CNTLID_MIN; + subsys->cntlid_max = NVME_CNTLID_MAX; kref_init(&subsys->ref); mutex_init(&subsys->lock); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index eda28b22a2c8..c2d518fb1789 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -211,6 +211,8 @@ struct nvmet_subsys { struct list_head namespaces; unsigned int nr_namespaces; unsigned int max_nsid; + u16 cntlid_min; + u16 cntlid_max; struct list_head ctrls; From 013b7ebe5a0d70e2a02fd225174595e79c591b3e Mon Sep 17 00:00:00 2001 From: Mark Ruijter Date: Thu, 30 Jan 2020 10:29:32 -0800 Subject: [PATCH 04/81] nvmet: make ctrl model configurable This patch adds a new target subsys attribute which allows user to optionally specify model name which then used in the nvmet_execute_identify_ctrl() to fill up the nvme_id_ctrl structure. The default value for the model is set to "Linux" for backward compatibility. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Mark Ruijter [chaitanya.kulkarni@wdc.com *Use macro for default model, coding style fixes. *Use RCU for accessing model in for configfs and in nvmet_execute_identify_ctrl(). ] Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/admin-cmd.c | 17 ++++++++- drivers/nvme/target/configfs.c | 66 +++++++++++++++++++++++++++++++++ drivers/nvme/target/core.c | 1 + drivers/nvme/target/nvmet.h | 8 ++++ 4 files changed, 90 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 72a7e41f3018..19f949570625 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -322,12 +322,25 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); } +static void nvmet_id_set_model_number(struct nvme_id_ctrl *id, + struct nvmet_subsys *subsys) +{ + const char *model = NVMET_DEFAULT_CTRL_MODEL; + struct nvmet_subsys_model *subsys_model; + + rcu_read_lock(); + subsys_model = rcu_dereference(subsys->model); + if (subsys_model) + model = subsys_model->number; + memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' '); + rcu_read_unlock(); +} + static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl *id; u16 status = 0; - const char model[] = "Linux"; id = kzalloc(sizeof(*id), GFP_KERNEL); if (!id) { @@ -342,7 +355,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) memset(id->sn, ' ', sizeof(id->sn)); bin2hex(id->sn, &ctrl->subsys->serial, min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); - memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); + nvmet_id_set_model_number(id, ctrl->subsys); memcpy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE), ' '); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 71c50751b5a6..1654064deea5 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -919,12 +919,78 @@ out_unlock: } CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_max); +static ssize_t nvmet_subsys_attr_model_show(struct config_item *item, + char *page) +{ + struct nvmet_subsys *subsys = to_subsys(item); + struct nvmet_subsys_model *subsys_model; + char *model = NVMET_DEFAULT_CTRL_MODEL; + int ret; + + rcu_read_lock(); + subsys_model = rcu_dereference(subsys->model); + if (subsys_model) + model = subsys_model->number; + ret = snprintf(page, PAGE_SIZE, "%s\n", model); + rcu_read_unlock(); + + return ret; +} + +/* See Section 1.5 of NVMe 1.4 */ +static bool nvmet_is_ascii(const char c) +{ + return c >= 0x20 && c <= 0x7e; +} + +static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + struct nvmet_subsys_model *new_model; + char *new_model_number; + int pos = 0, len; + + len = strcspn(page, "\n"); + if (!len) + return -EINVAL; + + for (pos = 0; pos < len; pos++) { + if (!nvmet_is_ascii(page[pos])) + return -EINVAL; + } + + new_model_number = kstrndup(page, len, GFP_KERNEL); + if (!new_model_number) + return -ENOMEM; + + new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL); + if (!new_model) { + kfree(new_model_number); + return -ENOMEM; + } + memcpy(new_model->number, new_model_number, len); + + down_write(&nvmet_config_sem); + mutex_lock(&subsys->lock); + new_model = rcu_replace_pointer(subsys->model, new_model, + mutex_is_locked(&subsys->lock)); + mutex_unlock(&subsys->lock); + up_write(&nvmet_config_sem); + + kfree_rcu(new_model, rcuhead); + + return count; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_model); + static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_version, &nvmet_subsys_attr_attr_serial, &nvmet_subsys_attr_attr_cntlid_min, &nvmet_subsys_attr_attr_cntlid_max, + &nvmet_subsys_attr_attr_model, NULL, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 48080c948692..b685f99d56a1 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1461,6 +1461,7 @@ static void nvmet_subsys_free(struct kref *ref) WARN_ON_ONCE(!list_empty(&subsys->namespaces)); kfree(subsys->subsysnqn); + kfree_rcu(subsys->model, rcuhead); kfree(subsys); } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index c2d518fb1789..42ba2ddd9e96 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -23,6 +23,7 @@ #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 #define NVMET_NO_ERROR_LOC ((u16)-1) +#define NVMET_DEFAULT_CTRL_MODEL "Linux" /* * Supported optional AENs: @@ -202,6 +203,11 @@ struct nvmet_ctrl { struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; }; +struct nvmet_subsys_model { + struct rcu_head rcuhead; + char number[]; +}; + struct nvmet_subsys { enum nvme_subsys_type type; @@ -229,6 +235,8 @@ struct nvmet_subsys { struct config_group namespaces_group; struct config_group allowed_hosts_group; + + struct nvmet_subsys_model __rcu *model; }; static inline struct nvmet_subsys *to_subsys(struct config_item *item) From d3a9b0cadf8cea1746a6bf525d049198e705836a Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 30 Jan 2020 10:29:33 -0800 Subject: [PATCH 05/81] nvmet: check sscanf value for subsys serial attr For nvmet in configfs.c we check return values for all the sscanf() calls. Add similar check into the nvmet_subsys_attr_serial_store(). Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/configfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 1654064deea5..7aa10788b7c8 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -849,10 +849,13 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item, static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item, const char *page, size_t count) { - struct nvmet_subsys *subsys = to_subsys(item); + u64 serial; + + if (sscanf(page, "%llx\n", &serial) != 1) + return -EINVAL; down_write(&nvmet_config_sem); - sscanf(page, "%llx\n", &subsys->serial); + to_subsys(item)->serial = serial; up_write(&nvmet_config_sem); return count; From 9912ade355902adb9dacbec640fac23c4e73019d Mon Sep 17 00:00:00 2001 From: "Wunderlich, Mark" Date: Thu, 16 Jan 2020 00:46:12 +0000 Subject: [PATCH 06/81] nvme-tcp: Set SO_PRIORITY for all host sockets Enable ability to associate all sockets related to NVMf TCP traffic to a priority group that will perform optimized network processing for this traffic class. Maintain initial default behavior of using priority of zero. Signed-off-by: Kiran Patil Signed-off-by: Mark Wunderlich Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 49d4373b84eb..e384239af880 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -20,6 +20,16 @@ struct nvme_tcp_queue; +/* Define the socket priority to use for connections were it is desirable + * that the NIC consider performing optimized packet processing or filtering. + * A non-zero value being sufficient to indicate general consideration of any + * possible optimization. Making it a module param allows for alternative + * values that may be unique for some NIC implementations. + */ +static int so_priority; +module_param(so_priority, int, 0644); +MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); + enum nvme_tcp_send_state { NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_H2C_PDU, @@ -1309,6 +1319,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, goto err_sock; } + if (so_priority > 0) { + ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY, + (char *)&so_priority, sizeof(so_priority)); + if (ret) { + dev_err(ctrl->ctrl.device, + "failed to set SO_PRIORITY sock opt, ret %d\n", + ret); + goto err_sock; + } + } + /* Set socket type of service */ if (nctrl->opts->tos >= 0) { opt = nctrl->opts->tos; From 43cc66892e81bb05283159e489a19cec177e6f9d Mon Sep 17 00:00:00 2001 From: "Wunderlich, Mark" Date: Thu, 16 Jan 2020 00:46:16 +0000 Subject: [PATCH 07/81] nvmet-tcp: set SO_PRIORITY for accepted sockets Enable ability to associate all sockets related to NVMf TCP traffic to a priority group that will perform optimized network processing for this traffic class. Maintain initial default behavior of using priority of zero. Signed-off-by: Kiran Patil Signed-off-by: Mark Wunderlich Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index af674fc0bb1e..cbff1038bdb3 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -19,6 +19,16 @@ #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) +/* Define the socket priority to use for connections were it is desirable + * that the NIC consider performing optimized packet processing or filtering. + * A non-zero value being sufficient to indicate general consideration of any + * possible optimization. Making it a module param allows for alternative + * values that may be unique for some NIC implementations. + */ +static int so_priority; +module_param(so_priority, int, 0644); +MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority"); + #define NVMET_TCP_RECV_BUDGET 8 #define NVMET_TCP_SEND_BUDGET 8 #define NVMET_TCP_IO_WORK_BUDGET 64 @@ -1433,6 +1443,13 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) if (ret) return ret; + if (so_priority > 0) { + ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY, + (char *)&so_priority, sizeof(so_priority)); + if (ret) + return ret; + } + /* Set socket type of service */ if (inet->rcv_tos > 0) { int tos = inet->rcv_tos; @@ -1622,6 +1639,15 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) goto err_sock; } + if (so_priority > 0) { + ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY, + (char *)&so_priority, sizeof(so_priority)); + if (ret) { + pr_err("failed to set SO_PRIORITY sock opt %d\n", ret); + goto err_sock; + } + } + ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr, sizeof(port->addr)); if (ret) { From 7e81f99afd91c937f0e66dc135e26c1c4f78b003 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Tue, 10 Mar 2020 14:12:30 +0100 Subject: [PATCH 08/81] loop: Only change blocksize when needed. Return early in loop_set_block_size() if the requested block size is identical to the one we already have; this avoids expensive calls to freeze the block queue. Reviewed-by: Christoph Hellwig Signed-off-by: Martijn Coenen Signed-off-by: Jens Axboe --- drivers/block/loop.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 739b372a5112..93b8d6047c14 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1539,16 +1539,16 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL; - if (lo->lo_queue->limits.logical_block_size != arg) { - sync_blockdev(lo->lo_device); - kill_bdev(lo->lo_device); - } + if (lo->lo_queue->limits.logical_block_size == arg) + return 0; + + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); blk_mq_freeze_queue(lo->lo_queue); /* kill_bdev should have truncated all the pages */ - if (lo->lo_queue->limits.logical_block_size != arg && - lo->lo_device->bd_inode->i_mapping->nrpages) { + if (lo->lo_device->bd_inode->i_mapping->nrpages) { err = -EAGAIN; pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, From 0fbcf57982346763ec636f176d5afaa367b5f71b Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Tue, 10 Mar 2020 14:06:54 +0100 Subject: [PATCH 09/81] loop: Only freeze block queue when needed. __loop_update_dio() can be called as a part of loop_set_fd(), when the block queue is not yet up and running; avoid freezing the block queue in that case, since that is an expensive operation. Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Martijn Coenen Signed-off-by: Jens Axboe --- drivers/block/loop.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 93b8d6047c14..a42c49e04954 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -214,7 +214,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) * LO_FLAGS_READ_ONLY, both are set from kernel, and losetup * will get updated by ioctl(LOOP_GET_STATUS) */ - blk_mq_freeze_queue(lo->lo_queue); + if (lo->lo_state == Lo_bound) + blk_mq_freeze_queue(lo->lo_queue); lo->use_dio = use_dio; if (use_dio) { blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue); @@ -223,7 +224,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; } - blk_mq_unfreeze_queue(lo->lo_queue); + if (lo->lo_state == Lo_bound) + blk_mq_unfreeze_queue(lo->lo_queue); } static int From 034851049082d084a6e616900293e14590b4e0e1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Mar 2020 08:12:58 +0100 Subject: [PATCH 10/81] block: aoe: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Signed-off-by: Takashi Iwai Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 7b32fb673375..a27804d71e12 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -87,9 +87,9 @@ static ssize_t aoedisk_show_netif(struct device *dev, if (*nd == NULL) return snprintf(page, PAGE_SIZE, "none\n"); for (p = page; nd < ne; nd++) - p += snprintf(p, PAGE_SIZE - (p-page), "%s%s", + p += scnprintf(p, PAGE_SIZE - (p-page), "%s%s", p == page ? "" : ",", (*nd)->name); - p += snprintf(p, PAGE_SIZE - (p-page), "\n"); + p += scnprintf(p, PAGE_SIZE - (p-page), "\n"); return p-page; } /* firmware version */ From 7cd37a0006b0489c3e6cc766b1b3f3d5e4c3a4f2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Mar 2020 08:44:39 +0100 Subject: [PATCH 11/81] lightnvm: pblk: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Signed-off-by: Takashi Iwai Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-sysfs.c | 42 +++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 7d8958df9472..6387302b03f2 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -37,7 +37,7 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) active = 0; up(&rlun->wr_sem); } - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "pblk: pos:%d, ch:%d, lun:%d - %d\n", i, rlun->bppa.a.ch, @@ -120,7 +120,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf; - sz = snprintf(page, PAGE_SIZE, + sz = scnprintf(page, PAGE_SIZE, "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", pblk->addrf_len, ppaf->blk_offset, ppaf->blk_len, @@ -130,7 +130,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) ppaf->pln_offset, ppaf->pln_len, ppaf->sec_offset, ppaf->sec_len); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", gppaf->blk_offset, gppaf->blk_len, gppaf->pg_offset, gppaf->pg_len, @@ -142,7 +142,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) struct nvm_addrf *ppaf = &pblk->addrf; struct nvm_addrf *gppaf = &geo->addrf; - sz = snprintf(page, PAGE_SIZE, + sz = scnprintf(page, PAGE_SIZE, "pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n", pblk->addrf_len, ppaf->ch_offset, ppaf->ch_len, @@ -150,7 +150,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) ppaf->chk_offset, ppaf->chk_len, ppaf->sec_offset, ppaf->sec_len); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n", gppaf->ch_offset, gppaf->ch_len, gppaf->lun_offset, gppaf->lun_len, @@ -278,11 +278,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) pblk_err(pblk, "corrupted free line list:%d/%d\n", nr_free_lines, free_line_cnt); - sz = snprintf(page, PAGE_SIZE - sz, + sz = scnprintf(page, PAGE_SIZE - sz, "line: nluns:%d, nblks:%d, nsecs:%d\n", geo->all_luns, lm->blk_per_line, lm->sec_per_line); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n", cur_data, cur_log, nr_free_lines, @@ -292,12 +292,12 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) d_line_cnt, l_line_cnt, l_mg->nr_lines); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n", gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr, atomic_read(&pblk->gc.read_inflight_gc)); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n", cur_data, cur_sec, msecs, vsc, sec_in_line, map_weight, lm->sec_per_line, @@ -313,19 +313,19 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page) struct pblk_line_meta *lm = &pblk->lm; ssize_t sz = 0; - sz = snprintf(page, PAGE_SIZE - sz, + sz = scnprintf(page, PAGE_SIZE - sz, "smeta - len:%d, secs:%d\n", lm->smeta_len, lm->smeta_sec); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "emeta - len:%d, sec:%d, bb_start:%d\n", lm->emeta_len[0], lm->emeta_sec[0], lm->emeta_bb); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "bitmap lengths: sec:%d, blk:%d, lun:%d\n", lm->sec_bitmap_len, lm->blk_bitmap_len, lm->lun_bitmap_len); - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "blk_line:%d, sec_line:%d, sec_blk:%d\n", lm->blk_per_line, lm->sec_per_line, @@ -344,12 +344,12 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, { int sz; - sz = snprintf(page, PAGE_SIZE, + sz = scnprintf(page, PAGE_SIZE, "user:%lld gc:%lld pad:%lld WA:", user, gc, pad); if (!user) { - sz += snprintf(page + sz, PAGE_SIZE - sz, "NaN\n"); + sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n"); } else { u64 wa_int; u32 wa_frac; @@ -358,7 +358,7 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, wa_int = div64_u64(wa_int, user); wa_int = div_u64_rem(wa_int, 100000, &wa_frac); - sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n", + sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n", wa_int, wa_frac); } @@ -401,9 +401,9 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page) total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst; if (!total) { for (i = 0; i < (buckets + 1); i++) - sz += snprintf(page + sz, PAGE_SIZE - sz, + sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:0 ", i); - sz += snprintf(page + sz, PAGE_SIZE - sz, "\n"); + sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n"); return sz; } @@ -411,7 +411,7 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page) for (i = 0; i < buckets; i++) total_buckets += atomic64_read(&pblk->pad_dist[i]); - sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ", + sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ", bucket_percentage(total - total_buckets, total)); for (i = 0; i < buckets; i++) { @@ -419,10 +419,10 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page) p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]), total); - sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ", + sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ", i + 1, p); } - sz += snprintf(page + sz, PAGE_SIZE - sz, "\n"); + sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n"); return sz; } From 91dfa2dd812acc36eb6c8b5cb703886f450209a1 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Sat, 7 Mar 2020 10:39:25 +0800 Subject: [PATCH 12/81] block/drbd: delete invalid function drbd_md_mark_dirty_ We deleted last_md_mark_dirty long ago, this function no longer needs to exist, delete it, otherwise a compilation error will occur when DEBUG is opened. Fixes: ac0acb9e39ac ("drbd: use drbd_device_post_work() in more place") Signed-off-by: Jackie Liu Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a18155cdce41..6fc776ccef3e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3414,22 +3414,11 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev) * the meta-data super block. This function sets MD_DIRTY, and starts a * timer that ensures that within five seconds you have to call drbd_md_sync(). */ -#ifdef DEBUG -void drbd_md_mark_dirty_(struct drbd_device *device, unsigned int line, const char *func) -{ - if (!test_and_set_bit(MD_DIRTY, &device->flags)) { - mod_timer(&device->md_sync_timer, jiffies + HZ); - device->last_md_mark_dirty.line = line; - device->last_md_mark_dirty.func = func; - } -} -#else void drbd_md_mark_dirty(struct drbd_device *device) { if (!test_and_set_bit(MD_DIRTY, &device->flags)) mod_timer(&device->md_sync_timer, jiffies + 5*HZ); } -#endif void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local) { From d970958b2d24e9a40b685ad82bf26a291d6f1c25 Mon Sep 17 00:00:00 2001 From: Hou Pu Date: Fri, 28 Feb 2020 01:40:29 -0500 Subject: [PATCH 13/81] nbd: enable replace socket if only one connection is configured Nbd server with multiple connections could be upgraded since 560bc4b (nbd: handle dead connections). But if only one conncection is configured, after we take down nbd server, all inflight IO would finally timeout and return error. We could requeue them like what we do with multiple connections and wait for new socket in submit path. Reviewed-by: Josef Bacik Signed-off-by: Hou Pu Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 78181908f0df..83070714888b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -395,16 +395,19 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, } config = nbd->config; - if (config->num_connections > 1) { + if (config->num_connections > 1 || + (config->num_connections == 1 && nbd->tag_set.timeout)) { dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out, retrying (%d/%d alive)\n", atomic_read(&config->live_connections), config->num_connections); /* * Hooray we have more connections, requeue this IO, the submit - * path will put it on a real connection. + * path will put it on a real connection. Or if only one + * connection is configured, the submit path will wait util + * a new connection is reconfigured or util dead timeout. */ - if (config->socks && config->num_connections > 1) { + if (config->socks) { if (cmd->index < config->num_connections) { struct nbd_sock *nsock = config->socks[cmd->index]; @@ -741,14 +744,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); /* - * If we've disconnected or we only have 1 - * connection then we need to make sure we + * If we've disconnected, we need to make sure we * complete this request, otherwise error out * and let the timeout stuff handle resubmitting * this request onto another connection. */ - if (nbd_disconnected(config) || - config->num_connections <= 1) { + if (nbd_disconnected(config)) { cmd->status = BLK_STS_IOERR; goto out; } @@ -825,7 +826,7 @@ static int find_fallback(struct nbd_device *nbd, int index) if (config->num_connections <= 1) { dev_err_ratelimited(disk_to_dev(nbd->disk), - "Attempted send on invalid socket\n"); + "Dead connection, failed to find a fallback\n"); return new_index; } From 2c272542baee2c3b9e8e3a260db81227ccefe8b5 Mon Sep 17 00:00:00 2001 From: Hou Pu Date: Fri, 28 Feb 2020 01:40:30 -0500 Subject: [PATCH 14/81] nbd: requeue command if the soecket is changed In commit 2da22da5734 (nbd: fix zero cmd timeout handling v2), it is allowed to reset timer when it fires if tag_set.timeout is set to zero. If the server is shutdown and a new socket is reconfigured, the request should be requeued to be processed by new server instead of waiting for response from the old one. Reviewed-by: Josef Bacik Signed-off-by: Hou Pu Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 83070714888b..43cff01a5a67 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -434,12 +434,22 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, * Userspace sets timeout=0 to disable socket disconnection, * so just warn and reset the timer. */ + struct nbd_sock *nsock = config->socks[cmd->index]; cmd->retries++; dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n", req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries); + mutex_lock(&nsock->tx_lock); + if (cmd->cookie != nsock->cookie) { + nbd_requeue_cmd(cmd); + mutex_unlock(&nsock->tx_lock); + mutex_unlock(&cmd->lock); + nbd_config_put(nbd); + return BLK_EH_DONE; + } + mutex_unlock(&nsock->tx_lock); mutex_unlock(&cmd->lock); nbd_config_put(nbd); return BLK_EH_RESET_TIMER; From ff77042296d0a54535ddf74412c5ae92cb4ec76a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 12 Feb 2020 23:23:20 +0300 Subject: [PATCH 15/81] null_blk: fix spurious IO errors after failed past-wp access Steps to reproduce: BLKRESETZONE zone 0 // force EIO pwrite(fd, buf, 4096, 4096); [issue more IO including zone ioctls] It will start failing randomly including IO to unrelated zones because of ->error "reuse". Trigger can be partition detection as well if test is not run immediately which is even more entertaining. The fix is of course to clear ->error where necessary. Reviewed-by: Christoph Hellwig Signed-off-by: Alexey Dobriyan (SK hynix) Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 89bb16a99007..8060ffa4bc75 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -615,6 +615,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) if (tag != -1U) { cmd = &nq->cmds[tag]; cmd->tag = tag; + cmd->error = BLK_STS_OK; cmd->nq = nq; if (nq->dev->irqmode == NULL_IRQ_TIMER) { hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, @@ -1395,6 +1396,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, cmd->timer.function = null_cmd_timer_expired; } cmd->rq = bd->rq; + cmd->error = BLK_STS_OK; cmd->nq = nq; blk_mq_start_request(bd->rq); From 290df92a9419363bf88347d3dbfb9fa6cfc2dcb4 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Thu, 12 Mar 2020 15:01:40 -0700 Subject: [PATCH 16/81] null_blk: describe the usage of fault injection param As null_blk is a very good start point to test block layer, this patch adds description and comments to 'timeout', 'requeue' and 'init_hctx' to explain how to use fault injection with null_blk. The nvme has similar with nvme_core.fail_request in the form of comment. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Dongli Zhang Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 8060ffa4bc75..e9d66cc0d6b9 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -97,14 +97,21 @@ module_param_named(home_node, g_home_node, int, 0444); MODULE_PARM_DESC(home_node, "Home node for the device"); #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION +/* + * For more details about fault injection, please refer to + * Documentation/fault-injection/fault-injection.rst. + */ static char g_timeout_str[80]; module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444); +MODULE_PARM_DESC(timeout, "Fault injection. timeout=,,,"); static char g_requeue_str[80]; module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444); +MODULE_PARM_DESC(requeue, "Fault injection. requeue=,,,"); static char g_init_hctx_str[80]; module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444); +MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=,,,"); #endif static int g_queue_mode = NULL_Q_MQ; From de6048b843bcefe8ec10762a393d92929e44444f Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 24 Feb 2020 22:23:43 +0100 Subject: [PATCH 17/81] floppy: cleanup: expand macro FDCS Macro FDCS silently uses identifier "fdc" which may be either the global one or a local one. Let's expand the macro to make this more obvious. Link: https://lore.kernel.org/r/20200224212352.8640-2-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 183 ++++++++++++++++++++--------------------- 1 file changed, 91 insertions(+), 92 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8ef65c085640..93e08403556f 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -309,7 +309,6 @@ static bool initialized; #define DP (&drive_params[current_drive]) #define DRS (&drive_state[current_drive]) #define DRWE (&write_errors[current_drive]) -#define FDCS (&fdc_state[fdc]) #define UDP (&drive_params[drive]) #define UDRS (&drive_state[drive]) @@ -742,11 +741,11 @@ static int disk_change(int drive) if (time_before(jiffies, UDRS->select_date + UDP->select_delay)) DPRINT("WARNING disk change called early\n"); - if (!(FDCS->dor & (0x10 << UNIT(drive))) || - (FDCS->dor & 3) != UNIT(drive) || fdc != FDC(drive)) { + if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive))) || + (fdc_state[fdc].dor & 3) != UNIT(drive) || fdc != FDC(drive)) { DPRINT("probing disk change on unselected drive\n"); DPRINT("drive=%d fdc=%d dor=%x\n", drive, FDC(drive), - (unsigned int)FDCS->dor); + (unsigned int)fdc_state[fdc].dor); } debug_dcl(UDP->flags, @@ -799,10 +798,10 @@ static int set_dor(int fdc, char mask, char data) unsigned char newdor; unsigned char olddor; - if (FDCS->address == -1) + if (fdc_state[fdc].address == -1) return -1; - olddor = FDCS->dor; + olddor = fdc_state[fdc].dor; newdor = (olddor & mask) | data; if (newdor != olddor) { unit = olddor & 0x3; @@ -812,7 +811,7 @@ static int set_dor(int fdc, char mask, char data) "calling disk change from set_dor\n"); disk_change(drive); } - FDCS->dor = newdor; + fdc_state[fdc].dor = newdor; fd_outb(newdor, FD_DOR); unit = newdor & 0x3; @@ -828,8 +827,8 @@ static void twaddle(void) { if (DP->select_delay) return; - fd_outb(FDCS->dor & ~(0x10 << UNIT(current_drive)), FD_DOR); - fd_outb(FDCS->dor, FD_DOR); + fd_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(current_drive)), FD_DOR); + fd_outb(fdc_state[fdc].dor, FD_DOR); DRS->select_date = jiffies; } @@ -841,10 +840,10 @@ static void reset_fdc_info(int mode) { int drive; - FDCS->spec1 = FDCS->spec2 = -1; - FDCS->need_configure = 1; - FDCS->perp_mode = 1; - FDCS->rawcmd = 0; + fdc_state[fdc].spec1 = fdc_state[fdc].spec2 = -1; + fdc_state[fdc].need_configure = 1; + fdc_state[fdc].perp_mode = 1; + fdc_state[fdc].rawcmd = 0; for (drive = 0; drive < N_DRIVE; drive++) if (FDC(drive) == fdc && (mode || UDRS->track != NEED_1_RECAL)) UDRS->track = NEED_2_RECAL; @@ -868,10 +867,10 @@ static void set_fdc(int drive) #if N_FDC > 1 set_dor(1 - fdc, ~8, 0); #endif - if (FDCS->rawcmd == 2) + if (fdc_state[fdc].rawcmd == 2) reset_fdc_info(1); if (fd_inb(FD_STATUS) != STATUS_READY) - FDCS->reset = 1; + fdc_state[fdc].reset = 1; } /* locks the driver */ @@ -924,7 +923,7 @@ static void floppy_off(unsigned int drive) unsigned long volatile delta; int fdc = FDC(drive); - if (!(FDCS->dor & (0x10 << UNIT(drive)))) + if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive)))) return; del_timer(motor_off_timer + drive); @@ -1035,7 +1034,7 @@ static void main_command_interrupt(void) static int fd_wait_for_completion(unsigned long expires, void (*function)(void)) { - if (FDCS->reset) { + if (fdc_state[fdc].reset) { reset_fdc(); /* do the reset during sleep to win time * if we don't need to sleep, it's a good * occasion anyways */ @@ -1063,13 +1062,13 @@ static void setup_DMA(void) pr_cont("%x,", raw_cmd->cmd[i]); pr_cont("\n"); cont->done(0); - FDCS->reset = 1; + fdc_state[fdc].reset = 1; return; } if (((unsigned long)raw_cmd->kernel_data) % 512) { pr_info("non aligned address: %p\n", raw_cmd->kernel_data); cont->done(0); - FDCS->reset = 1; + fdc_state[fdc].reset = 1; return; } f = claim_dma_lock(); @@ -1077,10 +1076,10 @@ static void setup_DMA(void) #ifdef fd_dma_setup if (fd_dma_setup(raw_cmd->kernel_data, raw_cmd->length, (raw_cmd->flags & FD_RAW_READ) ? - DMA_MODE_READ : DMA_MODE_WRITE, FDCS->address) < 0) { + DMA_MODE_READ : DMA_MODE_WRITE, fdc_state[fdc].address) < 0) { release_dma_lock(f); cont->done(0); - FDCS->reset = 1; + fdc_state[fdc].reset = 1; return; } release_dma_lock(f); @@ -1091,7 +1090,7 @@ static void setup_DMA(void) DMA_MODE_READ : DMA_MODE_WRITE); fd_set_dma_addr(raw_cmd->kernel_data); fd_set_dma_count(raw_cmd->length); - virtual_dma_port = FDCS->address; + virtual_dma_port = fdc_state[fdc].address; fd_enable_dma(); release_dma_lock(f); #endif @@ -1105,7 +1104,7 @@ static int wait_til_ready(void) int status; int counter; - if (FDCS->reset) + if (fdc_state[fdc].reset) return -1; for (counter = 0; counter < 10000; counter++) { status = fd_inb(FD_STATUS); @@ -1116,7 +1115,7 @@ static int wait_til_ready(void) DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc); show_floppy(); } - FDCS->reset = 1; + fdc_state[fdc].reset = 1; return -1; } @@ -1136,7 +1135,7 @@ static int output_byte(char byte) output_log_pos = (output_log_pos + 1) % OLOGSIZE; return 0; } - FDCS->reset = 1; + fdc_state[fdc].reset = 1; if (initialized) { DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n", byte, fdc, status); @@ -1171,7 +1170,7 @@ static int result(void) fdc, status, i); show_floppy(); } - FDCS->reset = 1; + fdc_state[fdc].reset = 1; return -1; } @@ -1208,7 +1207,7 @@ static void perpendicular_mode(void) default: DPRINT("Invalid data rate for perpendicular mode!\n"); cont->done(0); - FDCS->reset = 1; + fdc_state[fdc].reset = 1; /* * convenient way to return to * redo without too much hassle @@ -1219,12 +1218,12 @@ static void perpendicular_mode(void) } else perp_mode = 0; - if (FDCS->perp_mode == perp_mode) + if (fdc_state[fdc].perp_mode == perp_mode) return; - if (FDCS->version >= FDC_82077_ORIG) { + if (fdc_state[fdc].version >= FDC_82077_ORIG) { output_byte(FD_PERPENDICULAR); output_byte(perp_mode); - FDCS->perp_mode = perp_mode; + fdc_state[fdc].perp_mode = perp_mode; } else if (perp_mode) { DPRINT("perpendicular mode not supported by this FDC.\n"); } @@ -1279,9 +1278,9 @@ static void fdc_specify(void) int hlt_max_code = 0x7f; int hut_max_code = 0xf; - if (FDCS->need_configure && FDCS->version >= FDC_82072A) { + if (fdc_state[fdc].need_configure && fdc_state[fdc].version >= FDC_82072A) { fdc_configure(); - FDCS->need_configure = 0; + fdc_state[fdc].need_configure = 0; } switch (raw_cmd->rate & 0x03) { @@ -1290,7 +1289,7 @@ static void fdc_specify(void) break; case 1: dtr = 300; - if (FDCS->version >= FDC_82078) { + if (fdc_state[fdc].version >= FDC_82078) { /* chose the default rate table, not the one * where 1 = 2 Mbps */ output_byte(FD_DRIVESPEC); @@ -1305,7 +1304,7 @@ static void fdc_specify(void) break; } - if (FDCS->version >= FDC_82072) { + if (fdc_state[fdc].version >= FDC_82072) { scale_dtr = dtr; hlt_max_code = 0x00; /* 0==256msec*dtr0/dtr (not linear!) */ hut_max_code = 0x0; /* 0==256msec*dtr0/dtr (not linear!) */ @@ -1335,11 +1334,11 @@ static void fdc_specify(void) spec2 = (hlt << 1) | (use_virtual_dma & 1); /* If these parameters did not change, just return with success */ - if (FDCS->spec1 != spec1 || FDCS->spec2 != spec2) { + if (fdc_state[fdc].spec1 != spec1 || fdc_state[fdc].spec2 != spec2) { /* Go ahead and set spec1 and spec2 */ output_byte(FD_SPECIFY); - output_byte(FDCS->spec1 = spec1); - output_byte(FDCS->spec2 = spec2); + output_byte(fdc_state[fdc].spec1 = spec1); + output_byte(fdc_state[fdc].spec2 = spec2); } } /* fdc_specify */ @@ -1350,7 +1349,7 @@ static void fdc_specify(void) static int fdc_dtr(void) { /* If data rate not already set to desired value, set it. */ - if ((raw_cmd->rate & 3) == FDCS->dtr) + if ((raw_cmd->rate & 3) == fdc_state[fdc].dtr) return 0; /* Set dtr */ @@ -1361,7 +1360,7 @@ static int fdc_dtr(void) * enforced after data rate changes before R/W operations. * Pause 5 msec to avoid trouble. (Needs to be 2 jiffies) */ - FDCS->dtr = raw_cmd->rate & 3; + fdc_state[fdc].dtr = raw_cmd->rate & 3; return fd_wait_for_completion(jiffies + 2UL * HZ / 100, floppy_ready); } /* fdc_dtr */ @@ -1414,7 +1413,7 @@ static int interpret_errors(void) if (inr != 7) { DPRINT("-- FDC reply error\n"); - FDCS->reset = 1; + fdc_state[fdc].reset = 1; return 1; } @@ -1548,7 +1547,7 @@ static void check_wp(void) output_byte(FD_GETSTATUS); output_byte(UNIT(current_drive)); if (result() != 1) { - FDCS->reset = 1; + fdc_state[fdc].reset = 1; return; } clear_bit(FD_VERIFY_BIT, &DRS->flags); @@ -1625,7 +1624,7 @@ static void recal_interrupt(void) { debugt(__func__, ""); if (inr != 2) - FDCS->reset = 1; + fdc_state[fdc].reset = 1; else if (ST0 & ST0_ECE) { switch (DRS->track) { case NEED_1_RECAL: @@ -1693,7 +1692,7 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) release_dma_lock(f); do_floppy = NULL; - if (fdc >= N_FDC || FDCS->address == -1) { + if (fdc >= N_FDC || fdc_state[fdc].address == -1) { /* we don't even know which FDC is the culprit */ pr_info("DOR0=%x\n", fdc_state[0].dor); pr_info("floppy interrupt on bizarre fdc %d\n", fdc); @@ -1702,11 +1701,11 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) return IRQ_NONE; } - FDCS->reset = 0; + fdc_state[fdc].reset = 0; /* We have to clear the reset flag here, because apparently on boxes * with level triggered interrupts (PS/2, Sparc, ...), it is needed to - * emit SENSEI's to clear the interrupt line. And FDCS->reset blocks the - * emission of the SENSEI's. + * emit SENSEI's to clear the interrupt line. And fdc_state[fdc].reset + * blocks the emission of the SENSEI's. * It is OK to emit floppy commands because we are in an interrupt * handler here, and thus we have to fear no interference of other * activity. @@ -1729,7 +1728,7 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) inr == 2 && max_sensei); } if (!handler) { - FDCS->reset = 1; + fdc_state[fdc].reset = 1; return IRQ_NONE; } schedule_bh(handler); @@ -1755,7 +1754,7 @@ static void reset_interrupt(void) { debugt(__func__, ""); result(); /* get the status ready for set_fdc */ - if (FDCS->reset) { + if (fdc_state[fdc].reset) { pr_info("reset set in interrupt, calling %ps\n", cont->error); cont->error(); /* a reset just after a reset. BAD! */ } @@ -1771,7 +1770,7 @@ static void reset_fdc(void) unsigned long flags; do_floppy = reset_interrupt; - FDCS->reset = 0; + fdc_state[fdc].reset = 0; reset_fdc_info(0); /* Pseudo-DMA may intercept 'reset finished' interrupt. */ @@ -1781,12 +1780,12 @@ static void reset_fdc(void) fd_disable_dma(); release_dma_lock(flags); - if (FDCS->version >= FDC_82072A) - fd_outb(0x80 | (FDCS->dtr & 3), FD_STATUS); + if (fdc_state[fdc].version >= FDC_82072A) + fd_outb(0x80 | (fdc_state[fdc].dtr & 3), FD_STATUS); else { - fd_outb(FDCS->dor & ~0x04, FD_DOR); + fd_outb(fdc_state[fdc].dor & ~0x04, FD_DOR); udelay(FD_RESET_DELAY); - fd_outb(FDCS->dor, FD_DOR); + fd_outb(fdc_state[fdc].dor, FD_DOR); } } @@ -1850,7 +1849,7 @@ static void floppy_shutdown(struct work_struct *arg) if (initialized) DPRINT("floppy timeout called\n"); - FDCS->reset = 1; + fdc_state[fdc].reset = 1; if (cont) { cont->done(0); cont->redo(); /* this will recall reset when needed */ @@ -1870,7 +1869,7 @@ static int start_motor(void (*function)(void)) mask = 0xfc; data = UNIT(current_drive); if (!(raw_cmd->flags & FD_RAW_NO_MOTOR)) { - if (!(FDCS->dor & (0x10 << UNIT(current_drive)))) { + if (!(fdc_state[fdc].dor & (0x10 << UNIT(current_drive)))) { set_debugt(); /* no read since this drive is running */ DRS->first_read_date = 0; @@ -1878,7 +1877,7 @@ static int start_motor(void (*function)(void)) DRS->spinup_date = jiffies; data |= (0x10 << UNIT(current_drive)); } - } else if (FDCS->dor & (0x10 << UNIT(current_drive))) + } else if (fdc_state[fdc].dor & (0x10 << UNIT(current_drive))) mask &= ~(0x10 << UNIT(current_drive)); /* starts motor and selects floppy */ @@ -1892,7 +1891,7 @@ static int start_motor(void (*function)(void)) static void floppy_ready(void) { - if (FDCS->reset) { + if (fdc_state[fdc].reset) { reset_fdc(); return; } @@ -1991,7 +1990,7 @@ static int wait_til_done(void (*handler)(void), bool interruptible) return -EINTR; } - if (FDCS->reset) + if (fdc_state[fdc].reset) command_status = FD_COMMAND_ERROR; if (command_status == FD_COMMAND_OKAY) ret = 0; @@ -2060,7 +2059,7 @@ static void bad_flp_intr(void) if (err_count > DP->max_errors.abort) cont->done(0); if (err_count > DP->max_errors.reset) - FDCS->reset = 1; + fdc_state[fdc].reset = 1; else if (err_count > DP->max_errors.recal) DRS->track = NEED_2_RECAL; } @@ -2967,8 +2966,8 @@ static int user_reset_fdc(int drive, int arg, bool interruptible) return -EINTR; if (arg == FD_RESET_ALWAYS) - FDCS->reset = 1; - if (FDCS->reset) { + fdc_state[fdc].reset = 1; + if (fdc_state[fdc].reset) { cont = &reset_cont; ret = wait_til_done(reset_fdc, interruptible); if (ret == -EINTR) @@ -3179,23 +3178,23 @@ static int raw_cmd_ioctl(int cmd, void __user *param) int ret2; int ret; - if (FDCS->rawcmd <= 1) - FDCS->rawcmd = 1; + if (fdc_state[fdc].rawcmd <= 1) + fdc_state[fdc].rawcmd = 1; for (drive = 0; drive < N_DRIVE; drive++) { if (FDC(drive) != fdc) continue; if (drive == current_drive) { if (UDRS->fd_ref > 1) { - FDCS->rawcmd = 2; + fdc_state[fdc].rawcmd = 2; break; } } else if (UDRS->fd_ref) { - FDCS->rawcmd = 2; + fdc_state[fdc].rawcmd = 2; break; } } - if (FDCS->reset) + if (fdc_state[fdc].reset) return -EIO; ret = raw_cmd_copyin(cmd, param, &my_raw_cmd); @@ -3209,7 +3208,7 @@ static int raw_cmd_ioctl(int cmd, void __user *param) ret = wait_til_done(floppy_start, true); debug_dcl(DP->flags, "calling disk change from raw_cmd ioctl\n"); - if (ret != -EINTR && FDCS->reset) + if (ret != -EINTR && fdc_state[fdc].reset) ret = -EIO; DRS->track = NO_TRACK; @@ -4261,7 +4260,7 @@ static char __init get_fdc_version(void) int r; output_byte(FD_DUMPREGS); /* 82072 and better know DUMPREGS */ - if (FDCS->reset) + if (fdc_state[fdc].reset) return FDC_NONE; r = result(); if (r <= 0x00) @@ -4494,7 +4493,7 @@ static int floppy_resume(struct device *dev) int fdc; for (fdc = 0; fdc < N_FDC; fdc++) - if (FDCS->address != -1) + if (fdc_state[fdc].address != -1) user_reset_fdc(-1, FD_RESET_ALWAYS, false); return 0; @@ -4605,15 +4604,15 @@ static int __init do_floppy_init(void) for (i = 0; i < N_FDC; i++) { fdc = i; - memset(FDCS, 0, sizeof(*FDCS)); - FDCS->dtr = -1; - FDCS->dor = 0x4; + memset(&fdc_state[fdc], 0, sizeof(*fdc_state)); + fdc_state[fdc].dtr = -1; + fdc_state[fdc].dor = 0x4; #if defined(__sparc__) || defined(__mc68000__) /*sparcs/sun3x don't have a DOR reset which we can fall back on to */ #ifdef __mc68000__ if (MACH_IS_SUN3X) #endif - FDCS->version = FDC_82072A; + fdc_state[fdc].version = FDC_82072A; #endif } @@ -4656,28 +4655,28 @@ static int __init do_floppy_init(void) for (i = 0; i < N_FDC; i++) { fdc = i; - FDCS->driver_version = FD_DRIVER_VERSION; + fdc_state[fdc].driver_version = FD_DRIVER_VERSION; for (unit = 0; unit < 4; unit++) - FDCS->track[unit] = 0; - if (FDCS->address == -1) + fdc_state[fdc].track[unit] = 0; + if (fdc_state[fdc].address == -1) continue; - FDCS->rawcmd = 2; + fdc_state[fdc].rawcmd = 2; if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) { /* free ioports reserved by floppy_grab_irq_and_dma() */ floppy_release_regions(fdc); - FDCS->address = -1; - FDCS->version = FDC_NONE; + fdc_state[fdc].address = -1; + fdc_state[fdc].version = FDC_NONE; continue; } /* Try to determine the floppy controller type */ - FDCS->version = get_fdc_version(); - if (FDCS->version == FDC_NONE) { + fdc_state[fdc].version = get_fdc_version(); + if (fdc_state[fdc].version == FDC_NONE) { /* free ioports reserved by floppy_grab_irq_and_dma() */ floppy_release_regions(fdc); - FDCS->address = -1; + fdc_state[fdc].address = -1; continue; } - if (can_use_virtual_dma == 2 && FDCS->version < FDC_82072A) + if (can_use_virtual_dma == 2 && fdc_state[fdc].version < FDC_82072A) can_use_virtual_dma = 0; have_no_fdc = 0; @@ -4783,7 +4782,7 @@ static void floppy_release_allocated_regions(int fdc, const struct io_region *p) { while (p != io_regions) { p--; - release_region(FDCS->address + p->offset, p->size); + release_region(fdc_state[fdc].address + p->offset, p->size); } } @@ -4794,10 +4793,10 @@ static int floppy_request_regions(int fdc) const struct io_region *p; for (p = io_regions; p < ARRAY_END(io_regions); p++) { - if (!request_region(FDCS->address + p->offset, + if (!request_region(fdc_state[fdc].address + p->offset, p->size, "floppy")) { DPRINT("Floppy io-port 0x%04lx in use\n", - FDCS->address + p->offset); + fdc_state[fdc].address + p->offset); floppy_release_allocated_regions(fdc, p); return -EBUSY; } @@ -4840,23 +4839,23 @@ static int floppy_grab_irq_and_dma(void) } for (fdc = 0; fdc < N_FDC; fdc++) { - if (FDCS->address != -1) { + if (fdc_state[fdc].address != -1) { if (floppy_request_regions(fdc)) goto cleanup; } } for (fdc = 0; fdc < N_FDC; fdc++) { - if (FDCS->address != -1) { + if (fdc_state[fdc].address != -1) { reset_fdc_info(1); - fd_outb(FDCS->dor, FD_DOR); + fd_outb(fdc_state[fdc].dor, FD_DOR); } } fdc = 0; set_dor(0, ~0, 8); /* avoid immediate interrupt */ for (fdc = 0; fdc < N_FDC; fdc++) - if (FDCS->address != -1) - fd_outb(FDCS->dor, FD_DOR); + if (fdc_state[fdc].address != -1) + fd_outb(fdc_state[fdc].dor, FD_DOR); /* * The driver will try and free resources and relies on us * to know if they were allocated or not. @@ -4918,7 +4917,7 @@ static void floppy_release_irq_and_dma(void) pr_info("work still pending\n"); old_fdc = fdc; for (fdc = 0; fdc < N_FDC; fdc++) - if (FDCS->address != -1) + if (fdc_state[fdc].address != -1) floppy_release_regions(fdc); fdc = old_fdc; } From f9d322bdb1e24111704a7eb167b7bdde7496c4cd Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 24 Feb 2020 22:23:44 +0100 Subject: [PATCH 18/81] floppy: cleanup: expand macro UFDCS This macro doesn't bring much value and only slightly obfuscates the code by silently using local variable "drive", let's expand it. Link: https://lore.kernel.org/r/20200224212352.8640-3-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 93e08403556f..182148a828c7 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -313,7 +313,6 @@ static bool initialized; #define UDP (&drive_params[drive]) #define UDRS (&drive_state[drive]) #define UDRWE (&write_errors[drive]) -#define UFDCS (&fdc_state[FDC(drive)]) #define PH_HEAD(floppy, head) (((((floppy)->stretch & 2) >> 1) ^ head) << 2) #define STRETCH(floppy) ((floppy)->stretch & FD_STRETCH) @@ -3549,7 +3548,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int case FDRESET: return user_reset_fdc(drive, (int)param, true); case FDGETFDCSTAT: - outparam = UFDCS; + outparam = &fdc_state[FDC(drive)]; break; case FDWERRORCLR: memset(UDRWE, 0, sizeof(*UDRWE)); @@ -3833,7 +3832,7 @@ static int compat_getfdcstat(int drive, struct floppy_fdc_state v; mutex_lock(&floppy_mutex); - v = *UFDCS; + v = fdc_state[FDC(drive)]; mutex_unlock(&floppy_mutex); memset(&v32, 0, sizeof(struct compat_floppy_fdc_state)); @@ -4062,8 +4061,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) buffer_track = -1; } - if (UFDCS->rawcmd == 1) - UFDCS->rawcmd = 2; + if (fdc_state[FDC(drive)].rawcmd == 1) + fdc_state[FDC(drive)].rawcmd = 2; if (!(mode & FMODE_NDELAY)) { if (mode & (FMODE_READ|FMODE_WRITE)) { From 1ce9ae9654c910f331dd453c53eb69f10e3e35b9 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 24 Feb 2020 22:23:45 +0100 Subject: [PATCH 19/81] floppy: cleanup: expand macro UDP This macro doesn't bring much value and only slightly obfuscates the code by silently using local variable "drive", let's expand it. Link: https://lore.kernel.org/r/20200224212352.8640-4-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 152 +++++++++++++++++++++-------------------- 1 file changed, 77 insertions(+), 75 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 182148a828c7..8fcedb2f5068 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -310,7 +310,6 @@ static bool initialized; #define DRS (&drive_state[current_drive]) #define DRWE (&write_errors[current_drive]) -#define UDP (&drive_params[drive]) #define UDRS (&drive_state[drive]) #define UDRWE (&write_errors[drive]) @@ -681,10 +680,10 @@ static void __reschedule_timeout(int drive, const char *message) delay = 20UL * HZ; drive = 0; } else - delay = UDP->timeout; + delay = drive_params[drive].timeout; mod_delayed_work(floppy_wq, &fd_timeout, delay); - if (UDP->flags & FD_DEBUG) + if (drive_params[drive].flags & FD_DEBUG) DPRINT("reschedule timeout %s\n", message); timeout_message = message; } @@ -738,7 +737,7 @@ static int disk_change(int drive) { int fdc = FDC(drive); - if (time_before(jiffies, UDRS->select_date + UDP->select_delay)) + if (time_before(jiffies, UDRS->select_date + drive_params[drive].select_delay)) DPRINT("WARNING disk change called early\n"); if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive))) || (fdc_state[fdc].dor & 3) != UNIT(drive) || fdc != FDC(drive)) { @@ -747,15 +746,16 @@ static int disk_change(int drive) (unsigned int)fdc_state[fdc].dor); } - debug_dcl(UDP->flags, + debug_dcl(drive_params[drive].flags, "checking disk change line for drive %d\n", drive); - debug_dcl(UDP->flags, "jiffies=%lu\n", jiffies); - debug_dcl(UDP->flags, "disk change line=%x\n", fd_inb(FD_DIR) & 0x80); - debug_dcl(UDP->flags, "flags=%lx\n", UDRS->flags); + debug_dcl(drive_params[drive].flags, "jiffies=%lu\n", jiffies); + debug_dcl(drive_params[drive].flags, "disk change line=%x\n", + fd_inb(FD_DIR) & 0x80); + debug_dcl(drive_params[drive].flags, "flags=%lx\n", UDRS->flags); - if (UDP->flags & FD_BROKEN_DCL) + if (drive_params[drive].flags & FD_BROKEN_DCL) return test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); - if ((fd_inb(FD_DIR) ^ UDP->flags) & 0x80) { + if ((fd_inb(FD_DIR) ^ drive_params[drive].flags) & 0x80) { set_bit(FD_VERIFY_BIT, &UDRS->flags); /* verify write protection */ @@ -764,7 +764,7 @@ static int disk_change(int drive) /* invalidate its geometry */ if (UDRS->keep_data >= 0) { - if ((UDP->flags & FTD_MSG) && + if ((drive_params[drive].flags & FTD_MSG) && current_type[drive] != NULL) DPRINT("Disk type is undefined after disk change\n"); current_type[drive] = NULL; @@ -806,7 +806,7 @@ static int set_dor(int fdc, char mask, char data) unit = olddor & 0x3; if (is_selected(olddor, unit) && !is_selected(newdor, unit)) { drive = REVDRIVE(fdc, unit); - debug_dcl(UDP->flags, + debug_dcl(drive_params[drive].flags, "calling disk change from set_dor\n"); disk_change(drive); } @@ -929,12 +929,12 @@ static void floppy_off(unsigned int drive) /* make spindle stop in a position which minimizes spinup time * next time */ - if (UDP->rps) { + if (drive_params[drive].rps) { delta = jiffies - UDRS->first_read_date + HZ - - UDP->spindown_offset; - delta = ((delta * UDP->rps) % HZ) / UDP->rps; + drive_params[drive].spindown_offset; + delta = ((delta * drive_params[drive].rps) % HZ) / drive_params[drive].rps; motor_off_timer[drive].expires = - jiffies + UDP->spindown - delta; + jiffies + drive_params[drive].spindown - delta; } add_timer(motor_off_timer + drive); } @@ -956,7 +956,7 @@ static void scandrives(void) saved_drive = current_drive; for (i = 0; i < N_DRIVE; i++) { drive = (saved_drive + i + 1) % N_DRIVE; - if (UDRS->fd_ref == 0 || UDP->select_delay != 0) + if (UDRS->fd_ref == 0 || drive_params[drive].select_delay != 0) continue; /* skip closed drives */ set_fdc(drive); if (!(set_dor(fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) & @@ -2999,8 +2999,8 @@ static const char *drive_name(int type, int drive) if (type) floppy = floppy_type + type; else { - if (UDP->native_format) - floppy = floppy_type + UDP->native_format; + if (drive_params[drive].native_format) + floppy = floppy_type + drive_params[drive].native_format; else return "(null)"; } @@ -3240,7 +3240,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g, (int)(g->sect * g->head) <= 0 || /* check for zero in F_SECT_PER_TRACK */ (unsigned char)((g->sect << 2) >> FD_SIZECODE(g)) == 0 || - g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) || + g->track <= 0 || g->track > drive_params[drive].tracks >> STRETCH(g) || /* check if reserved bits are set */ (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0) return -EINVAL; @@ -3487,10 +3487,10 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int outparam = &inparam.g; break; case FDMSGON: - UDP->flags |= FTD_MSG; + drive_params[drive].flags |= FTD_MSG; return 0; case FDMSGOFF: - UDP->flags &= ~FTD_MSG; + drive_params[drive].flags &= ~FTD_MSG; return 0; case FDFMTBEG: if (lock_fdc(drive)) @@ -3514,13 +3514,13 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int return -EINTR; return invalidate_drive(bdev); case FDSETEMSGTRESH: - UDP->max_errors.reporting = (unsigned short)(param & 0x0f); + drive_params[drive].max_errors.reporting = (unsigned short)(param & 0x0f); return 0; case FDGETMAXERRS: - outparam = &UDP->max_errors; + outparam = &drive_params[drive].max_errors; break; case FDSETMAXERRS: - UDP->max_errors = inparam.max_errors; + drive_params[drive].max_errors = inparam.max_errors; break; case FDGETDRVTYP: outparam = drive_name(type, drive); @@ -3530,10 +3530,10 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int if (!valid_floppy_drive_params(inparam.dp.autodetect, inparam.dp.native_format)) return -EINVAL; - *UDP = inparam.dp; + drive_params[drive] = inparam.dp; break; case FDGETDRVPRM: - outparam = UDP; + outparam = &drive_params[drive]; break; case FDPOLLDRVSTAT: if (lock_fdc(drive)) @@ -3730,25 +3730,26 @@ static int compat_setdrvprm(int drive, if (!valid_floppy_drive_params(v.autodetect, v.native_format)) return -EINVAL; mutex_lock(&floppy_mutex); - UDP->cmos = v.cmos; - UDP->max_dtr = v.max_dtr; - UDP->hlt = v.hlt; - UDP->hut = v.hut; - UDP->srt = v.srt; - UDP->spinup = v.spinup; - UDP->spindown = v.spindown; - UDP->spindown_offset = v.spindown_offset; - UDP->select_delay = v.select_delay; - UDP->rps = v.rps; - UDP->tracks = v.tracks; - UDP->timeout = v.timeout; - UDP->interleave_sect = v.interleave_sect; - UDP->max_errors = v.max_errors; - UDP->flags = v.flags; - UDP->read_track = v.read_track; - memcpy(UDP->autodetect, v.autodetect, sizeof(v.autodetect)); - UDP->checkfreq = v.checkfreq; - UDP->native_format = v.native_format; + drive_params[drive].cmos = v.cmos; + drive_params[drive].max_dtr = v.max_dtr; + drive_params[drive].hlt = v.hlt; + drive_params[drive].hut = v.hut; + drive_params[drive].srt = v.srt; + drive_params[drive].spinup = v.spinup; + drive_params[drive].spindown = v.spindown; + drive_params[drive].spindown_offset = v.spindown_offset; + drive_params[drive].select_delay = v.select_delay; + drive_params[drive].rps = v.rps; + drive_params[drive].tracks = v.tracks; + drive_params[drive].timeout = v.timeout; + drive_params[drive].interleave_sect = v.interleave_sect; + drive_params[drive].max_errors = v.max_errors; + drive_params[drive].flags = v.flags; + drive_params[drive].read_track = v.read_track; + memcpy(drive_params[drive].autodetect, v.autodetect, + sizeof(v.autodetect)); + drive_params[drive].checkfreq = v.checkfreq; + drive_params[drive].native_format = v.native_format; mutex_unlock(&floppy_mutex); return 0; } @@ -3760,25 +3761,26 @@ static int compat_getdrvprm(int drive, memset(&v, 0, sizeof(struct compat_floppy_drive_params)); mutex_lock(&floppy_mutex); - v.cmos = UDP->cmos; - v.max_dtr = UDP->max_dtr; - v.hlt = UDP->hlt; - v.hut = UDP->hut; - v.srt = UDP->srt; - v.spinup = UDP->spinup; - v.spindown = UDP->spindown; - v.spindown_offset = UDP->spindown_offset; - v.select_delay = UDP->select_delay; - v.rps = UDP->rps; - v.tracks = UDP->tracks; - v.timeout = UDP->timeout; - v.interleave_sect = UDP->interleave_sect; - v.max_errors = UDP->max_errors; - v.flags = UDP->flags; - v.read_track = UDP->read_track; - memcpy(v.autodetect, UDP->autodetect, sizeof(v.autodetect)); - v.checkfreq = UDP->checkfreq; - v.native_format = UDP->native_format; + v.cmos = drive_params[drive].cmos; + v.max_dtr = drive_params[drive].max_dtr; + v.hlt = drive_params[drive].hlt; + v.hut = drive_params[drive].hut; + v.srt = drive_params[drive].srt; + v.spinup = drive_params[drive].spinup; + v.spindown = drive_params[drive].spindown; + v.spindown_offset = drive_params[drive].spindown_offset; + v.select_delay = drive_params[drive].select_delay; + v.rps = drive_params[drive].rps; + v.tracks = drive_params[drive].tracks; + v.timeout = drive_params[drive].timeout; + v.interleave_sect = drive_params[drive].interleave_sect; + v.max_errors = drive_params[drive].max_errors; + v.flags = drive_params[drive].flags; + v.read_track = drive_params[drive].read_track; + memcpy(v.autodetect, drive_params[drive].autodetect, + sizeof(v.autodetect)); + v.checkfreq = drive_params[drive].checkfreq; + v.native_format = drive_params[drive].native_format; mutex_unlock(&floppy_mutex); if (copy_to_user(arg, &v, sizeof(struct compat_floppy_drive_params))) @@ -3931,16 +3933,16 @@ static void __init config_types(void) /* read drive info out of physical CMOS */ drive = 0; - if (!UDP->cmos) - UDP->cmos = FLOPPY0_TYPE; + if (!drive_params[drive].cmos) + drive_params[drive].cmos = FLOPPY0_TYPE; drive = 1; - if (!UDP->cmos) - UDP->cmos = FLOPPY1_TYPE; + if (!drive_params[drive].cmos) + drive_params[drive].cmos = FLOPPY1_TYPE; /* FIXME: additional physical CMOS drive detection should go here */ for (drive = 0; drive < N_DRIVE; drive++) { - unsigned int type = UDP->cmos; + unsigned int type = drive_params[drive].cmos; struct floppy_drive_params *params; const char *name = NULL; char temparea[32]; @@ -3970,7 +3972,7 @@ static void __init config_types(void) pr_cont("%s fd%d is %s", prepend, drive, name); } - *UDP = *params; + drive_params[drive] = *params; } if (has_drive) @@ -4012,7 +4014,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (opened_bdev[drive] && opened_bdev[drive] != bdev) goto out2; - if (!UDRS->fd_ref && (UDP->flags & FD_BROKEN_DCL)) { + if (!UDRS->fd_ref && (drive_params[drive].flags & FD_BROKEN_DCL)) { set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); set_bit(FD_VERIFY_BIT, &UDRS->flags); } @@ -4026,7 +4028,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (!floppy_track_buffer) { /* if opening an ED drive, reserve a big buffer, * else reserve a small one */ - if ((UDP->cmos == 6) || (UDP->cmos == 5)) + if ((drive_params[drive].cmos == 6) || (drive_params[drive].cmos == 5)) try = 64; /* Only 48 actually useful */ else try = 32; /* Only 24 actually useful */ @@ -4105,7 +4107,7 @@ static unsigned int floppy_check_events(struct gendisk *disk, test_bit(FD_VERIFY_BIT, &UDRS->flags)) return DISK_EVENT_MEDIA_CHANGE; - if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) { + if (time_after(jiffies, UDRS->last_checked + drive_params[drive].checkfreq)) { if (lock_fdc(drive)) return 0; poll_drive(false, 0); @@ -4471,7 +4473,7 @@ static ssize_t floppy_cmos_show(struct device *dev, int drive; drive = p->id; - return sprintf(buf, "%X\n", UDP->cmos); + return sprintf(buf, "%X\n", drive_params[drive].cmos); } static DEVICE_ATTR(cmos, 0444, floppy_cmos_show, NULL); From 8d9d34e25a372b3841f102a39e2def18e7dc805a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 24 Feb 2020 22:23:46 +0100 Subject: [PATCH 20/81] floppy: cleanup: expand macro UDRS This macro doesn't bring much value and only slightly obfuscates the code by silently using local variable "drive", let's expand it. Link: https://lore.kernel.org/r/20200224212352.8640-5-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 162 +++++++++++++++++++++-------------------- 1 file changed, 83 insertions(+), 79 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8fcedb2f5068..522fbccc4e32 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -310,7 +310,6 @@ static bool initialized; #define DRS (&drive_state[current_drive]) #define DRWE (&write_errors[current_drive]) -#define UDRS (&drive_state[drive]) #define UDRWE (&write_errors[drive]) #define PH_HEAD(floppy, head) (((((floppy)->stretch & 2) >> 1) ^ head) << 2) @@ -603,7 +602,7 @@ static unsigned char in_sector_offset; /* offset within physical sector, static inline bool drive_no_geom(int drive) { - return !current_type[drive] && !ITYPE(UDRS->fd_device); + return !current_type[drive] && !ITYPE(drive_state[drive].fd_device); } #ifndef fd_eject @@ -737,7 +736,7 @@ static int disk_change(int drive) { int fdc = FDC(drive); - if (time_before(jiffies, UDRS->select_date + drive_params[drive].select_delay)) + if (time_before(jiffies, drive_state[drive].select_date + drive_params[drive].select_delay)) DPRINT("WARNING disk change called early\n"); if (!(fdc_state[fdc].dor & (0x10 << UNIT(drive))) || (fdc_state[fdc].dor & 3) != UNIT(drive) || fdc != FDC(drive)) { @@ -751,19 +750,22 @@ static int disk_change(int drive) debug_dcl(drive_params[drive].flags, "jiffies=%lu\n", jiffies); debug_dcl(drive_params[drive].flags, "disk change line=%x\n", fd_inb(FD_DIR) & 0x80); - debug_dcl(drive_params[drive].flags, "flags=%lx\n", UDRS->flags); + debug_dcl(drive_params[drive].flags, "flags=%lx\n", + drive_state[drive].flags); if (drive_params[drive].flags & FD_BROKEN_DCL) - return test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); + return test_bit(FD_DISK_CHANGED_BIT, + &drive_state[drive].flags); if ((fd_inb(FD_DIR) ^ drive_params[drive].flags) & 0x80) { - set_bit(FD_VERIFY_BIT, &UDRS->flags); + set_bit(FD_VERIFY_BIT, &drive_state[drive].flags); /* verify write protection */ - if (UDRS->maxblock) /* mark it changed */ - set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); + if (drive_state[drive].maxblock) /* mark it changed */ + set_bit(FD_DISK_CHANGED_BIT, + &drive_state[drive].flags); /* invalidate its geometry */ - if (UDRS->keep_data >= 0) { + if (drive_state[drive].keep_data >= 0) { if ((drive_params[drive].flags & FTD_MSG) && current_type[drive] != NULL) DPRINT("Disk type is undefined after disk change\n"); @@ -773,8 +775,8 @@ static int disk_change(int drive) return 1; } else { - UDRS->last_checked = jiffies; - clear_bit(FD_DISK_NEWCHANGE_BIT, &UDRS->flags); + drive_state[drive].last_checked = jiffies; + clear_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[drive].flags); } return 0; } @@ -816,7 +818,7 @@ static int set_dor(int fdc, char mask, char data) unit = newdor & 0x3; if (!is_selected(olddor, unit) && is_selected(newdor, unit)) { drive = REVDRIVE(fdc, unit); - UDRS->select_date = jiffies; + drive_state[drive].select_date = jiffies; } } return olddor; @@ -844,8 +846,8 @@ static void reset_fdc_info(int mode) fdc_state[fdc].perp_mode = 1; fdc_state[fdc].rawcmd = 0; for (drive = 0; drive < N_DRIVE; drive++) - if (FDC(drive) == fdc && (mode || UDRS->track != NEED_1_RECAL)) - UDRS->track = NEED_2_RECAL; + if (FDC(drive) == fdc && (mode || drive_state[drive].track != NEED_1_RECAL)) + drive_state[drive].track = NEED_2_RECAL; } /* selects the fdc and drive, and enables the fdc's input/dma. */ @@ -930,7 +932,7 @@ static void floppy_off(unsigned int drive) /* make spindle stop in a position which minimizes spinup time * next time */ if (drive_params[drive].rps) { - delta = jiffies - UDRS->first_read_date + HZ - + delta = jiffies - drive_state[drive].first_read_date + HZ - drive_params[drive].spindown_offset; delta = ((delta * drive_params[drive].rps) % HZ) / drive_params[drive].rps; motor_off_timer[drive].expires = @@ -956,7 +958,7 @@ static void scandrives(void) saved_drive = current_drive; for (i = 0; i < N_DRIVE; i++) { drive = (saved_drive + i + 1) % N_DRIVE; - if (UDRS->fd_ref == 0 || drive_params[drive].select_delay != 0) + if (drive_state[drive].fd_ref == 0 || drive_params[drive].select_delay != 0) continue; /* skip closed drives */ set_fdc(drive); if (!(set_dor(fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) & @@ -2065,7 +2067,7 @@ static void bad_flp_intr(void) static void set_floppy(int drive) { - int type = ITYPE(UDRS->fd_device); + int type = ITYPE(drive_state[drive].fd_device); if (type) _floppy = floppy_type + type; @@ -3183,11 +3185,11 @@ static int raw_cmd_ioctl(int cmd, void __user *param) if (FDC(drive) != fdc) continue; if (drive == current_drive) { - if (UDRS->fd_ref > 1) { + if (drive_state[drive].fd_ref > 1) { fdc_state[fdc].rawcmd = 2; break; } - } else if (UDRS->fd_ref) { + } else if (drive_state[drive].fd_ref) { fdc_state[fdc].rawcmd = 2; break; } @@ -3405,7 +3407,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int unsigned long param) { int drive = (long)bdev->bd_disk->private_data; - int type = ITYPE(UDRS->fd_device); + int type = ITYPE(drive_state[drive].fd_device); int i; int ret; int size; @@ -3453,7 +3455,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int switch (cmd) { case FDEJECT: - if (UDRS->fd_ref != 1) + if (drive_state[drive].fd_ref != 1) /* somebody else has this drive open */ return -EBUSY; if (lock_fdc(drive)) @@ -3463,8 +3465,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int * non-Sparc architectures */ ret = fd_eject(UNIT(drive)); - set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); - set_bit(FD_VERIFY_BIT, &UDRS->flags); + set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags); + set_bit(FD_VERIFY_BIT, &drive_state[drive].flags); process_fd_request(); return ret; case FDCLRPRM: @@ -3472,7 +3474,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int return -EINTR; current_type[drive] = NULL; floppy_sizes[drive] = MAX_DISK_SIZE << 1; - UDRS->keep_data = 0; + drive_state[drive].keep_data = 0; return invalidate_drive(bdev); case FDSETPRM: case FDDEFPRM: @@ -3497,7 +3499,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int return -EINTR; if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR) return -EINTR; - ret = UDRS->flags; + ret = drive_state[drive].flags; process_fd_request(); if (ret & FD_VERIFY) return -ENODEV; @@ -3505,7 +3507,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int return -EROFS; return 0; case FDFMTTRK: - if (UDRS->fd_ref != 1) + if (drive_state[drive].fd_ref != 1) return -EBUSY; return do_format(drive, &inparam.f); case FDFMTEND: @@ -3543,7 +3545,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int process_fd_request(); /* fall through */ case FDGETDRVSTAT: - outparam = UDRS; + outparam = &drive_state[drive]; break; case FDRESET: return user_reset_fdc(drive, (int)param, true); @@ -3690,7 +3692,7 @@ static int compat_set_geometry(struct block_device *bdev, fmode_t mode, unsigned mutex_lock(&floppy_mutex); drive = (long)bdev->bd_disk->private_data; - type = ITYPE(UDRS->fd_device); + type = ITYPE(drive_state[drive].fd_device); err = set_geometry(cmd == FDSETPRM32 ? FDSETPRM : FDDEFPRM, &v, drive, type, bdev); mutex_unlock(&floppy_mutex); @@ -3706,7 +3708,8 @@ static int compat_get_prm(int drive, memset(&v, 0, sizeof(v)); mutex_lock(&floppy_mutex); - err = get_floppy_geometry(drive, ITYPE(UDRS->fd_device), &p); + err = get_floppy_geometry(drive, ITYPE(drive_state[drive].fd_device), + &p); if (err) { mutex_unlock(&floppy_mutex); return err; @@ -3803,20 +3806,20 @@ static int compat_getdrvstat(int drive, bool poll, goto Eintr; process_fd_request(); } - v.spinup_date = UDRS->spinup_date; - v.select_date = UDRS->select_date; - v.first_read_date = UDRS->first_read_date; - v.probed_format = UDRS->probed_format; - v.track = UDRS->track; - v.maxblock = UDRS->maxblock; - v.maxtrack = UDRS->maxtrack; - v.generation = UDRS->generation; - v.keep_data = UDRS->keep_data; - v.fd_ref = UDRS->fd_ref; - v.fd_device = UDRS->fd_device; - v.last_checked = UDRS->last_checked; - v.dmabuf = (uintptr_t)UDRS->dmabuf; - v.bufblocks = UDRS->bufblocks; + v.spinup_date = drive_state[drive].spinup_date; + v.select_date = drive_state[drive].select_date; + v.first_read_date = drive_state[drive].first_read_date; + v.probed_format = drive_state[drive].probed_format; + v.track = drive_state[drive].track; + v.maxblock = drive_state[drive].maxblock; + v.maxtrack = drive_state[drive].maxtrack; + v.generation = drive_state[drive].generation; + v.keep_data = drive_state[drive].keep_data; + v.fd_ref = drive_state[drive].fd_ref; + v.fd_device = drive_state[drive].fd_device; + v.last_checked = drive_state[drive].last_checked; + v.dmabuf = (uintptr_t) drive_state[drive].dmabuf; + v.bufblocks = drive_state[drive].bufblocks; mutex_unlock(&floppy_mutex); if (copy_to_user(arg, &v, sizeof(struct compat_floppy_drive_struct))) @@ -3985,11 +3988,11 @@ static void floppy_release(struct gendisk *disk, fmode_t mode) mutex_lock(&floppy_mutex); mutex_lock(&open_lock); - if (!UDRS->fd_ref--) { + if (!drive_state[drive].fd_ref--) { DPRINT("floppy_release with fd_ref == 0"); - UDRS->fd_ref = 0; + drive_state[drive].fd_ref = 0; } - if (!UDRS->fd_ref) + if (!drive_state[drive].fd_ref) opened_bdev[drive] = NULL; mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); @@ -4010,16 +4013,16 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) mutex_lock(&floppy_mutex); mutex_lock(&open_lock); - old_dev = UDRS->fd_device; + old_dev = drive_state[drive].fd_device; if (opened_bdev[drive] && opened_bdev[drive] != bdev) goto out2; - if (!UDRS->fd_ref && (drive_params[drive].flags & FD_BROKEN_DCL)) { - set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); - set_bit(FD_VERIFY_BIT, &UDRS->flags); + if (!drive_state[drive].fd_ref && (drive_params[drive].flags & FD_BROKEN_DCL)) { + set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags); + set_bit(FD_VERIFY_BIT, &drive_state[drive].flags); } - UDRS->fd_ref++; + drive_state[drive].fd_ref++; opened_bdev[drive] = bdev; @@ -4056,7 +4059,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) } new_dev = MINOR(bdev->bd_dev); - UDRS->fd_device = new_dev; + drive_state[drive].fd_device = new_dev; set_capacity(disks[drive], floppy_sizes[new_dev]); if (old_dev != -1 && old_dev != new_dev) { if (buffer_drive == drive) @@ -4068,26 +4071,27 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (!(mode & FMODE_NDELAY)) { if (mode & (FMODE_READ|FMODE_WRITE)) { - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + drive_state[drive].last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, + &drive_state[drive].flags); check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) goto out; } res = -EROFS; if ((mode & FMODE_WRITE) && - !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) + !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags)) goto out; } mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); return 0; out: - UDRS->fd_ref--; + drive_state[drive].fd_ref--; - if (!UDRS->fd_ref) + if (!drive_state[drive].fd_ref) opened_bdev[drive] = NULL; out2: mutex_unlock(&open_lock); @@ -4103,19 +4107,19 @@ static unsigned int floppy_check_events(struct gendisk *disk, { int drive = (long)disk->private_data; - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || - test_bit(FD_VERIFY_BIT, &UDRS->flags)) + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) || + test_bit(FD_VERIFY_BIT, &drive_state[drive].flags)) return DISK_EVENT_MEDIA_CHANGE; - if (time_after(jiffies, UDRS->last_checked + drive_params[drive].checkfreq)) { + if (time_after(jiffies, drive_state[drive].last_checked + drive_params[drive].checkfreq)) { if (lock_fdc(drive)) return 0; poll_drive(false, 0); process_fd_request(); } - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || - test_bit(FD_VERIFY_BIT, &UDRS->flags) || + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) || + test_bit(FD_VERIFY_BIT, &drive_state[drive].flags) || test_bit(drive, &fake_change) || drive_no_geom(drive)) return DISK_EVENT_MEDIA_CHANGE; @@ -4141,7 +4145,7 @@ static void floppy_rb0_cb(struct bio *bio) if (bio->bi_status) { pr_info("floppy: error %d while reading block 0\n", bio->bi_status); - set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + set_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags); } complete(&cbdata->complete); } @@ -4198,8 +4202,8 @@ static int floppy_revalidate(struct gendisk *disk) int cf; int res = 0; - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || - test_bit(FD_VERIFY_BIT, &UDRS->flags) || + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) || + test_bit(FD_VERIFY_BIT, &drive_state[drive].flags) || test_bit(drive, &fake_change) || drive_no_geom(drive)) { if (WARN(atomic_read(&usage_count) == 0, @@ -4209,20 +4213,20 @@ static int floppy_revalidate(struct gendisk *disk) res = lock_fdc(drive); if (res) return res; - cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || - test_bit(FD_VERIFY_BIT, &UDRS->flags)); + cf = (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags) || + test_bit(FD_VERIFY_BIT, &drive_state[drive].flags)); if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) { process_fd_request(); /*already done by another thread */ return 0; } - UDRS->maxblock = 0; - UDRS->maxtrack = 0; + drive_state[drive].maxblock = 0; + drive_state[drive].maxtrack = 0; if (buffer_drive == drive) buffer_track = -1; clear_bit(drive, &fake_change); - clear_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); + clear_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags); if (cf) - UDRS->generation++; + drive_state[drive].generation++; if (drive_no_geom(drive)) { /* auto-sensing */ res = __floppy_read_block_0(opened_bdev[drive], drive); @@ -4232,7 +4236,7 @@ static int floppy_revalidate(struct gendisk *disk) process_fd_request(); } } - set_capacity(disk, floppy_sizes[UDRS->fd_device]); + set_capacity(disk, floppy_sizes[drive_state[drive].fd_device]); return res; } @@ -4638,12 +4642,12 @@ static int __init do_floppy_init(void) /* initialise drive state */ for (drive = 0; drive < N_DRIVE; drive++) { - memset(UDRS, 0, sizeof(*UDRS)); + memset(&drive_state[drive], 0, sizeof(drive_state[drive])); memset(UDRWE, 0, sizeof(*UDRWE)); - set_bit(FD_DISK_NEWCHANGE_BIT, &UDRS->flags); - set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags); - set_bit(FD_VERIFY_BIT, &UDRS->flags); - UDRS->fd_device = -1; + set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[drive].flags); + set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags); + set_bit(FD_VERIFY_BIT, &drive_state[drive].flags); + drive_state[drive].fd_device = -1; floppy_track_buffer = NULL; max_buffer_sectors = 0; } From 121e297955e312bee9edb151c9f68a550c28284b Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 24 Feb 2020 22:23:47 +0100 Subject: [PATCH 21/81] floppy: cleanup: expand macro UDRWE This macro doesn't bring much value and only slightly obfuscates the code by silently using local variable "drive", let's expand it. Link: https://lore.kernel.org/r/20200224212352.8640-6-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 522fbccc4e32..a76a9bbaa3e0 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -310,8 +310,6 @@ static bool initialized; #define DRS (&drive_state[current_drive]) #define DRWE (&write_errors[current_drive]) -#define UDRWE (&write_errors[drive]) - #define PH_HEAD(floppy, head) (((((floppy)->stretch & 2) >> 1) ^ head) << 2) #define STRETCH(floppy) ((floppy)->stretch & FD_STRETCH) @@ -3553,10 +3551,10 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int outparam = &fdc_state[FDC(drive)]; break; case FDWERRORCLR: - memset(UDRWE, 0, sizeof(*UDRWE)); + memset(&write_errors[drive], 0, sizeof(write_errors[drive])); return 0; case FDWERRORGET: - outparam = UDRWE; + outparam = &write_errors[drive]; break; case FDRAWCMD: if (type) @@ -3867,7 +3865,7 @@ static int compat_werrorget(int drive, memset(&v32, 0, sizeof(struct compat_floppy_write_errors)); mutex_lock(&floppy_mutex); - v = *UDRWE; + v = write_errors[drive]; mutex_unlock(&floppy_mutex); v32.write_errors = v.write_errors; v32.first_error_sector = v.first_error_sector; @@ -4643,7 +4641,7 @@ static int __init do_floppy_init(void) /* initialise drive state */ for (drive = 0; drive < N_DRIVE; drive++) { memset(&drive_state[drive], 0, sizeof(drive_state[drive])); - memset(UDRWE, 0, sizeof(*UDRWE)); + memset(&write_errors[drive], 0, sizeof(write_errors[drive])); set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[drive].flags); set_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags); set_bit(FD_VERIFY_BIT, &drive_state[drive].flags); From 031faabd80452f511676402ff3c7a2dce5cd5678 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 24 Feb 2020 22:23:48 +0100 Subject: [PATCH 22/81] floppy: cleanup: expand macro DP This macro doesn't bring much value and only slightly obfuscates the code by silently using global variable "current_drive", let's expand it. Link: https://lore.kernel.org/r/20200224212352.8640-7-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 84 +++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a76a9bbaa3e0..7744e4281743 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -306,7 +306,6 @@ static bool initialized; /* reverse mapping from unit and fdc to drive */ #define REVDRIVE(fdc, unit) ((unit) + ((fdc) << 2)) -#define DP (&drive_params[current_drive]) #define DRS (&drive_state[current_drive]) #define DRWE (&write_errors[current_drive]) @@ -624,7 +623,7 @@ static inline void set_debugt(void) static inline void debugt(const char *func, const char *msg) { - if (DP->flags & DEBUGT) + if (drive_params[current_drive].flags & DEBUGT) pr_info("%s:%s dtime=%lu\n", func, msg, jiffies - debugtimer); } #else @@ -824,7 +823,7 @@ static int set_dor(int fdc, char mask, char data) static void twaddle(void) { - if (DP->select_delay) + if (drive_params[current_drive].select_delay) return; fd_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(current_drive)), FD_DOR); fd_outb(fdc_state[fdc].dor, FD_DOR); @@ -950,7 +949,7 @@ static void scandrives(void) int drive; int saved_drive; - if (DP->select_delay) + if (drive_params[current_drive].select_delay) return; saved_drive = current_drive; @@ -1009,7 +1008,8 @@ static void cancel_activity(void) * transfer */ static void fd_watchdog(void) { - debug_dcl(DP->flags, "calling disk change from watchdog\n"); + debug_dcl(drive_params[current_drive].flags, + "calling disk change from watchdog\n"); if (disk_change(current_drive)) { DPRINT("disk removed during i/o\n"); @@ -1310,20 +1310,23 @@ static void fdc_specify(void) } /* Convert step rate from microseconds to milliseconds and 4 bits */ - srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR); + srt = 16 - DIV_ROUND_UP(drive_params[current_drive].srt * scale_dtr / 1000, + NOMINAL_DTR); if (slow_floppy) srt = srt / 4; SUPBOUND(srt, 0xf); INFBOUND(srt, 0); - hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR); + hlt = DIV_ROUND_UP(drive_params[current_drive].hlt * scale_dtr / 2, + NOMINAL_DTR); if (hlt < 0x01) hlt = 0x01; else if (hlt > 0x7f) hlt = hlt_max_code; - hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR); + hut = DIV_ROUND_UP(drive_params[current_drive].hut * scale_dtr / 16, + NOMINAL_DTR); if (hut < 0x1) hut = 0x1; else if (hut > 0xf) @@ -1430,10 +1433,10 @@ static int interpret_errors(void) } else if (ST1 & ST1_ND) { set_bit(FD_NEED_TWADDLE_BIT, &DRS->flags); } else if (ST1 & ST1_OR) { - if (DP->flags & FTD_MSG) + if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); bad = 0; - } else if (*errors >= DP->max_errors.reporting) { + } else if (*errors >= drive_params[current_drive].max_errors.reporting) { print_errors(); } if (ST2 & ST2_WC || ST2 & ST2_BC) @@ -1471,13 +1474,13 @@ static void setup_rw_floppy(void) flags |= FD_RAW_INTR; if ((flags & FD_RAW_SPIN) && !(flags & FD_RAW_NO_MOTOR)) { - ready_date = DRS->spinup_date + DP->spinup; + ready_date = DRS->spinup_date + drive_params[current_drive].spinup; /* If spinup will take a long time, rerun scandrives * again just before spinup completion. Beware that * after scandrives, we must again wait for selection. */ - if (time_after(ready_date, jiffies + DP->select_delay)) { - ready_date -= DP->select_delay; + if (time_after(ready_date, jiffies + drive_params[current_drive].select_delay)) { + ready_date -= drive_params[current_drive].select_delay; function = floppy_start; } else function = setup_rw_floppy; @@ -1528,9 +1531,10 @@ static void seek_interrupt(void) return; } if (DRS->track >= 0 && DRS->track != ST1 && !blind_seek) { - debug_dcl(DP->flags, + debug_dcl(drive_params[current_drive].flags, "clearing NEWCHANGE flag because of effective seek\n"); - debug_dcl(DP->flags, "jiffies=%lu\n", jiffies); + debug_dcl(drive_params[current_drive].flags, "jiffies=%lu\n", + jiffies); clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); /* effective seek */ DRS->select_date = jiffies; @@ -1551,9 +1555,10 @@ static void check_wp(void) } clear_bit(FD_VERIFY_BIT, &DRS->flags); clear_bit(FD_NEED_TWADDLE_BIT, &DRS->flags); - debug_dcl(DP->flags, + debug_dcl(drive_params[current_drive].flags, "checking whether disk is write protected\n"); - debug_dcl(DP->flags, "wp=%x\n", ST3 & 0x40); + debug_dcl(drive_params[current_drive].flags, "wp=%x\n", + ST3 & 0x40); if (!(ST3 & 0x40)) set_bit(FD_DISK_WRITABLE_BIT, &DRS->flags); else @@ -1567,7 +1572,8 @@ static void seek_floppy(void) blind_seek = 0; - debug_dcl(DP->flags, "calling disk change from %s\n", __func__); + debug_dcl(drive_params[current_drive].flags, + "calling disk change from %s\n", __func__); if (!test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) && disk_change(current_drive) && (raw_cmd->flags & FD_RAW_NEED_DISK)) { @@ -1591,7 +1597,7 @@ static void seek_floppy(void) if (raw_cmd->track) track = raw_cmd->track - 1; else { - if (DP->flags & FD_SILENT_DCL_CLEAR) { + if (drive_params[current_drive].flags & FD_SILENT_DCL_CLEAR) { set_dor(fdc, ~(0x10 << UNIT(current_drive)), 0); blind_seek = 1; raw_cmd->flags |= FD_RAW_NEED_SEEK; @@ -1643,7 +1649,7 @@ static void recal_interrupt(void) * not to move at recalibration is to * be already at track 0.) Clear the * new change flag */ - debug_dcl(DP->flags, + debug_dcl(drive_params[current_drive].flags, "clearing NEWCHANGE flag because of second recalibrate\n"); clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); @@ -1884,7 +1890,7 @@ static int start_motor(void (*function)(void)) set_dor(fdc, mask, data); /* wait_for_completion also schedules reset if needed. */ - return fd_wait_for_completion(DRS->select_date + DP->select_delay, + return fd_wait_for_completion(DRS->select_date + drive_params[current_drive].select_delay, function); } @@ -1899,9 +1905,10 @@ static void floppy_ready(void) if (fdc_dtr()) return; - debug_dcl(DP->flags, "calling disk change from floppy_ready\n"); + debug_dcl(drive_params[current_drive].flags, + "calling disk change from floppy_ready\n"); if (!(raw_cmd->flags & FD_RAW_NO_MOTOR) && - disk_change(current_drive) && !DP->select_delay) + disk_change(current_drive) && !drive_params[current_drive].select_delay) twaddle(); /* this clears the dcl on certain * drive/controller combinations */ @@ -1930,7 +1937,8 @@ static void floppy_start(void) reschedule_timeout(current_reqD, "floppy start"); scandrives(); - debug_dcl(DP->flags, "setting NEWCHANGE in floppy_start\n"); + debug_dcl(drive_params[current_drive].flags, + "setting NEWCHANGE in floppy_start\n"); set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); floppy_ready(); } @@ -2032,11 +2040,11 @@ static int next_valid_format(void) probed_format = DRS->probed_format; while (1) { - if (probed_format >= 8 || !DP->autodetect[probed_format]) { + if (probed_format >= 8 || !drive_params[current_drive].autodetect[probed_format]) { DRS->probed_format = 0; return 1; } - if (floppy_type[DP->autodetect[probed_format]].sect) { + if (floppy_type[drive_params[current_drive].autodetect[probed_format]].sect) { DRS->probed_format = probed_format; return 0; } @@ -2055,11 +2063,11 @@ static void bad_flp_intr(void) } err_count = ++(*errors); INFBOUND(DRWE->badness, err_count); - if (err_count > DP->max_errors.abort) + if (err_count > drive_params[current_drive].max_errors.abort) cont->done(0); - if (err_count > DP->max_errors.reset) + if (err_count > drive_params[current_drive].max_errors.reset) fdc_state[fdc].reset = 1; - else if (err_count > DP->max_errors.recal) + else if (err_count > drive_params[current_drive].max_errors.recal) DRS->track = NEED_2_RECAL; } @@ -2189,7 +2197,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req) set_floppy(drive); if (!_floppy || - _floppy->track > DP->tracks || + _floppy->track > drive_params[current_drive].tracks || tmp_format_req->track >= _floppy->track || tmp_format_req->head >= _floppy->head || (_floppy->sect << 2) % (1 << FD_SIZECODE(_floppy)) || @@ -2345,7 +2353,7 @@ static void rw_interrupt(void) } if (probing) { - if (DP->flags & FTD_MSG) + if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Auto-detected floppy type %s in fd%d\n", _floppy->name, current_drive); current_type[current_drive] = _floppy; @@ -2675,9 +2683,9 @@ static int make_raw_rw_request(void) */ if (!direct || (indirect * 2 > direct * 3 && - *errors < DP->max_errors.read_track && + *errors < drive_params[current_drive].max_errors.read_track && ((!probing || - (DP->read_track & (1 << DRS->probed_format)))))) { + (drive_params[current_drive].read_track & (1 << DRS->probed_format)))))) { max_size = blk_rq_sectors(current_req); } else { raw_cmd->kernel_data = bio_data(current_req->bio); @@ -2855,7 +2863,7 @@ do_request: } } probing = 1; - _floppy = floppy_type + DP->autodetect[DRS->probed_format]; + _floppy = floppy_type + drive_params[current_drive].autodetect[DRS->probed_format]; } else probing = 0; errors = &(current_req->error_count); @@ -2934,7 +2942,8 @@ static int poll_drive(bool interruptible, int flag) raw_cmd->track = 0; raw_cmd->cmd_count = 0; cont = &poll_cont; - debug_dcl(DP->flags, "setting NEWCHANGE in poll_drive\n"); + debug_dcl(drive_params[current_drive].flags, + "setting NEWCHANGE in poll_drive\n"); set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); return wait_til_done(floppy_ready, interruptible); @@ -3205,7 +3214,8 @@ static int raw_cmd_ioctl(int cmd, void __user *param) raw_cmd = my_raw_cmd; cont = &raw_cmd_cont; ret = wait_til_done(floppy_start, true); - debug_dcl(DP->flags, "calling disk change from raw_cmd ioctl\n"); + debug_dcl(drive_params[current_drive].flags, + "calling disk change from raw_cmd ioctl\n"); if (ret != -EINTR && fdc_state[fdc].reset) ret = -EIO; @@ -4386,7 +4396,7 @@ static void __init set_cmos(int *ints, int dummy, int dummy2) if (current_drive >= 4 && !FDC2) FDC2 = 0x370; #endif - DP->cmos = ints[2]; + drive_params[current_drive].cmos = ints[2]; DPRINT("setting CMOS code to %d\n", ints[2]); } From 3bd7f87c685bc588d16b486301aa571b7dff9c8d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 24 Feb 2020 22:23:49 +0100 Subject: [PATCH 23/81] floppy: cleanup: expand macro DRS This macro doesn't bring much value and only slightly obfuscates the code by silently using global variable "current_drive", let's expand it. Link: https://lore.kernel.org/r/20200224212352.8640-8-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 115 ++++++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 54 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 7744e4281743..6d4a2e14799a 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -306,7 +306,6 @@ static bool initialized; /* reverse mapping from unit and fdc to drive */ #define REVDRIVE(fdc, unit) ((unit) + ((fdc) << 2)) -#define DRS (&drive_state[current_drive]) #define DRWE (&write_errors[current_drive]) #define PH_HEAD(floppy, head) (((((floppy)->stretch & 2) >> 1) ^ head) << 2) @@ -827,7 +826,7 @@ static void twaddle(void) return; fd_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(current_drive)), FD_DOR); fd_outb(fdc_state[fdc].dor, FD_DOR); - DRS->select_date = jiffies; + drive_state[current_drive].select_date = jiffies; } /* @@ -1427,11 +1426,13 @@ static int interpret_errors(void) bad = 1; if (ST1 & ST1_WP) { DPRINT("Drive is write protected\n"); - clear_bit(FD_DISK_WRITABLE_BIT, &DRS->flags); + clear_bit(FD_DISK_WRITABLE_BIT, + &drive_state[current_drive].flags); cont->done(0); bad = 2; } else if (ST1 & ST1_ND) { - set_bit(FD_NEED_TWADDLE_BIT, &DRS->flags); + set_bit(FD_NEED_TWADDLE_BIT, + &drive_state[current_drive].flags); } else if (ST1 & ST1_OR) { if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); @@ -1441,7 +1442,7 @@ static int interpret_errors(void) } if (ST2 & ST2_WC || ST2 & ST2_BC) /* wrong cylinder => recal */ - DRS->track = NEED_2_RECAL; + drive_state[current_drive].track = NEED_2_RECAL; return bad; case 0x80: /* invalid command given */ DPRINT("Invalid FDC command given!\n"); @@ -1474,7 +1475,7 @@ static void setup_rw_floppy(void) flags |= FD_RAW_INTR; if ((flags & FD_RAW_SPIN) && !(flags & FD_RAW_NO_MOTOR)) { - ready_date = DRS->spinup_date + drive_params[current_drive].spinup; + ready_date = drive_state[current_drive].spinup_date + drive_params[current_drive].spinup; /* If spinup will take a long time, rerun scandrives * again just before spinup completion. Beware that * after scandrives, we must again wait for selection. @@ -1525,27 +1526,28 @@ static void seek_interrupt(void) debugt(__func__, ""); if (inr != 2 || (ST0 & 0xF8) != 0x20) { DPRINT("seek failed\n"); - DRS->track = NEED_2_RECAL; + drive_state[current_drive].track = NEED_2_RECAL; cont->error(); cont->redo(); return; } - if (DRS->track >= 0 && DRS->track != ST1 && !blind_seek) { + if (drive_state[current_drive].track >= 0 && drive_state[current_drive].track != ST1 && !blind_seek) { debug_dcl(drive_params[current_drive].flags, "clearing NEWCHANGE flag because of effective seek\n"); debug_dcl(drive_params[current_drive].flags, "jiffies=%lu\n", jiffies); - clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); + clear_bit(FD_DISK_NEWCHANGE_BIT, + &drive_state[current_drive].flags); /* effective seek */ - DRS->select_date = jiffies; + drive_state[current_drive].select_date = jiffies; } - DRS->track = ST1; + drive_state[current_drive].track = ST1; floppy_ready(); } static void check_wp(void) { - if (test_bit(FD_VERIFY_BIT, &DRS->flags)) { + if (test_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags)) { /* check write protection */ output_byte(FD_GETSTATUS); output_byte(UNIT(current_drive)); @@ -1553,16 +1555,19 @@ static void check_wp(void) fdc_state[fdc].reset = 1; return; } - clear_bit(FD_VERIFY_BIT, &DRS->flags); - clear_bit(FD_NEED_TWADDLE_BIT, &DRS->flags); + clear_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags); + clear_bit(FD_NEED_TWADDLE_BIT, + &drive_state[current_drive].flags); debug_dcl(drive_params[current_drive].flags, "checking whether disk is write protected\n"); debug_dcl(drive_params[current_drive].flags, "wp=%x\n", ST3 & 0x40); if (!(ST3 & 0x40)) - set_bit(FD_DISK_WRITABLE_BIT, &DRS->flags); + set_bit(FD_DISK_WRITABLE_BIT, + &drive_state[current_drive].flags); else - clear_bit(FD_DISK_WRITABLE_BIT, &DRS->flags); + clear_bit(FD_DISK_WRITABLE_BIT, + &drive_state[current_drive].flags); } } @@ -1575,23 +1580,24 @@ static void seek_floppy(void) debug_dcl(drive_params[current_drive].flags, "calling disk change from %s\n", __func__); - if (!test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) && + if (!test_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags) && disk_change(current_drive) && (raw_cmd->flags & FD_RAW_NEED_DISK)) { /* the media changed flag should be cleared after the seek. * If it isn't, this means that there is really no disk in * the drive. */ - set_bit(FD_DISK_CHANGED_BIT, &DRS->flags); + set_bit(FD_DISK_CHANGED_BIT, + &drive_state[current_drive].flags); cont->done(0); cont->redo(); return; } - if (DRS->track <= NEED_1_RECAL) { + if (drive_state[current_drive].track <= NEED_1_RECAL) { recalibrate_floppy(); return; - } else if (test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) && + } else if (test_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags) && (raw_cmd->flags & FD_RAW_NEED_DISK) && - (DRS->track <= NO_TRACK || DRS->track == raw_cmd->track)) { + (drive_state[current_drive].track <= NO_TRACK || drive_state[current_drive].track == raw_cmd->track)) { /* we seek to clear the media-changed condition. Does anybody * know a more elegant way, which works on all drives? */ if (raw_cmd->track) @@ -1606,7 +1612,7 @@ static void seek_floppy(void) } } else { check_wp(); - if (raw_cmd->track != DRS->track && + if (raw_cmd->track != drive_state[current_drive].track && (raw_cmd->flags & FD_RAW_NEED_SEEK)) track = raw_cmd->track; else { @@ -1631,7 +1637,7 @@ static void recal_interrupt(void) if (inr != 2) fdc_state[fdc].reset = 1; else if (ST0 & ST0_ECE) { - switch (DRS->track) { + switch (drive_state[current_drive].track) { case NEED_1_RECAL: debugt(__func__, "need 1 recal"); /* after a second recalibrate, we still haven't @@ -1652,8 +1658,9 @@ static void recal_interrupt(void) debug_dcl(drive_params[current_drive].flags, "clearing NEWCHANGE flag because of second recalibrate\n"); - clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); - DRS->select_date = jiffies; + clear_bit(FD_DISK_NEWCHANGE_BIT, + &drive_state[current_drive].flags); + drive_state[current_drive].select_date = jiffies; /* fall through */ default: debugt(__func__, "default"); @@ -1663,11 +1670,11 @@ static void recal_interrupt(void) * track 0, this might mean that we * started beyond track 80. Try * again. */ - DRS->track = NEED_1_RECAL; + drive_state[current_drive].track = NEED_1_RECAL; break; } } else - DRS->track = ST1; + drive_state[current_drive].track = ST1; floppy_ready(); } @@ -1877,9 +1884,9 @@ static int start_motor(void (*function)(void)) if (!(fdc_state[fdc].dor & (0x10 << UNIT(current_drive)))) { set_debugt(); /* no read since this drive is running */ - DRS->first_read_date = 0; + drive_state[current_drive].first_read_date = 0; /* note motor start time if motor is not yet running */ - DRS->spinup_date = jiffies; + drive_state[current_drive].spinup_date = jiffies; data |= (0x10 << UNIT(current_drive)); } } else if (fdc_state[fdc].dor & (0x10 << UNIT(current_drive))) @@ -1890,7 +1897,7 @@ static int start_motor(void (*function)(void)) set_dor(fdc, mask, data); /* wait_for_completion also schedules reset if needed. */ - return fd_wait_for_completion(DRS->select_date + drive_params[current_drive].select_delay, + return fd_wait_for_completion(drive_state[current_drive].select_date + drive_params[current_drive].select_delay, function); } @@ -1939,7 +1946,7 @@ static void floppy_start(void) scandrives(); debug_dcl(drive_params[current_drive].flags, "setting NEWCHANGE in floppy_start\n"); - set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); + set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags); floppy_ready(); } @@ -2038,14 +2045,14 @@ static int next_valid_format(void) { int probed_format; - probed_format = DRS->probed_format; + probed_format = drive_state[current_drive].probed_format; while (1) { if (probed_format >= 8 || !drive_params[current_drive].autodetect[probed_format]) { - DRS->probed_format = 0; + drive_state[current_drive].probed_format = 0; return 1; } if (floppy_type[drive_params[current_drive].autodetect[probed_format]].sect) { - DRS->probed_format = probed_format; + drive_state[current_drive].probed_format = probed_format; return 0; } probed_format++; @@ -2057,7 +2064,7 @@ static void bad_flp_intr(void) int err_count; if (probing) { - DRS->probed_format++; + drive_state[current_drive].probed_format++; if (!next_valid_format()) return; } @@ -2068,7 +2075,7 @@ static void bad_flp_intr(void) if (err_count > drive_params[current_drive].max_errors.reset) fdc_state[fdc].reset = 1; else if (err_count > drive_params[current_drive].max_errors.recal) - DRS->track = NEED_2_RECAL; + drive_state[current_drive].track = NEED_2_RECAL; } static void set_floppy(int drive) @@ -2259,9 +2266,9 @@ static void request_done(int uptodate) /* maintain values for invalidation on geometry * change */ block = current_count_sectors + blk_rq_pos(req); - INFBOUND(DRS->maxblock, block); + INFBOUND(drive_state[current_drive].maxblock, block); if (block > _floppy->sect) - DRS->maxtrack = 1; + drive_state[current_drive].maxtrack = 1; floppy_end_request(req, 0); } else { @@ -2270,10 +2277,10 @@ static void request_done(int uptodate) DRWE->write_errors++; if (DRWE->write_errors == 1) { DRWE->first_error_sector = blk_rq_pos(req); - DRWE->first_error_generation = DRS->generation; + DRWE->first_error_generation = drive_state[current_drive].generation; } DRWE->last_error_sector = blk_rq_pos(req); - DRWE->last_error_generation = DRS->generation; + DRWE->last_error_generation = drive_state[current_drive].generation; } floppy_end_request(req, BLK_STS_IOERR); } @@ -2294,8 +2301,8 @@ static void rw_interrupt(void) return; } - if (!DRS->first_read_date) - DRS->first_read_date = jiffies; + if (!drive_state[current_drive].first_read_date) + drive_state[current_drive].first_read_date = jiffies; nr_sectors = 0; ssize = DIV_ROUND_UP(1 << SIZECODE, 4); @@ -2568,7 +2575,7 @@ static int make_raw_rw_request(void) HEAD = fsector_t / _floppy->sect; if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) || - test_bit(FD_NEED_TWADDLE_BIT, &DRS->flags)) && + test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags)) && fsector_t < _floppy->sect) max_sector = _floppy->sect; @@ -2685,7 +2692,7 @@ static int make_raw_rw_request(void) (indirect * 2 > direct * 3 && *errors < drive_params[current_drive].max_errors.read_track && ((!probing || - (drive_params[current_drive].read_track & (1 << DRS->probed_format)))))) { + (drive_params[current_drive].read_track & (1 << drive_state[current_drive].probed_format)))))) { max_size = blk_rq_sectors(current_req); } else { raw_cmd->kernel_data = bio_data(current_req->bio); @@ -2847,14 +2854,14 @@ do_request: disk_change(current_drive); if (test_bit(current_drive, &fake_change) || - test_bit(FD_DISK_CHANGED_BIT, &DRS->flags)) { + test_bit(FD_DISK_CHANGED_BIT, &drive_state[current_drive].flags)) { DPRINT("disk absent or changed during operation\n"); request_done(0); goto do_request; } if (!_floppy) { /* Autodetection */ if (!probing) { - DRS->probed_format = 0; + drive_state[current_drive].probed_format = 0; if (next_valid_format()) { DPRINT("no autodetectable formats\n"); _floppy = NULL; @@ -2863,7 +2870,7 @@ do_request: } } probing = 1; - _floppy = floppy_type + drive_params[current_drive].autodetect[DRS->probed_format]; + _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format]; } else probing = 0; errors = &(current_req->error_count); @@ -2873,7 +2880,7 @@ do_request: goto do_request; } - if (test_bit(FD_NEED_TWADDLE_BIT, &DRS->flags)) + if (test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags)) twaddle(); schedule_bh(floppy_start); debugt(__func__, "queue fd request"); @@ -2944,7 +2951,7 @@ static int poll_drive(bool interruptible, int flag) cont = &poll_cont; debug_dcl(drive_params[current_drive].flags, "setting NEWCHANGE in poll_drive\n"); - set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags); + set_bit(FD_DISK_NEWCHANGE_BIT, &drive_state[current_drive].flags); return wait_til_done(floppy_ready, interruptible); } @@ -3220,7 +3227,7 @@ static int raw_cmd_ioctl(int cmd, void __user *param) if (ret != -EINTR && fdc_state[fdc].reset) ret = -EIO; - DRS->track = NO_TRACK; + drive_state[current_drive].track = NO_TRACK; ret2 = raw_cmd_copyout(cmd, param, my_raw_cmd); if (!ret) @@ -3293,16 +3300,16 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g, current_type[drive] = &user_params[drive]; floppy_sizes[drive] = user_params[drive].size; if (cmd == FDDEFPRM) - DRS->keep_data = -1; + drive_state[current_drive].keep_data = -1; else - DRS->keep_data = 1; + drive_state[current_drive].keep_data = 1; /* invalidation. Invalidate only when needed, i.e. * when there are already sectors in the buffer cache * whose number will change. This is useful, because * mtools often changes the geometry of the disk after * looking at the boot block */ - if (DRS->maxblock > user_params[drive].sect || - DRS->maxtrack || + if (drive_state[current_drive].maxblock > user_params[drive].sect || + drive_state[current_drive].maxtrack || ((user_params[drive].sect ^ oldStretch) & (FD_SWAPSIDES | FD_SECTBASEMASK))) invalidate_drive(bdev); From 2a3487527950a4a84917a9e309ed4c76cb78489a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 24 Feb 2020 22:23:50 +0100 Subject: [PATCH 24/81] floppy: cleanup: expand macro DRWE This macro doesn't bring much value and only slightly obfuscates the code by silently using global variable "current_drive", let's expand it. Link: https://lore.kernel.org/r/20200224212352.8640-9-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 6d4a2e14799a..d771579df57e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -306,8 +306,6 @@ static bool initialized; /* reverse mapping from unit and fdc to drive */ #define REVDRIVE(fdc, unit) ((unit) + ((fdc) << 2)) -#define DRWE (&write_errors[current_drive]) - #define PH_HEAD(floppy, head) (((((floppy)->stretch & 2) >> 1) ^ head) << 2) #define STRETCH(floppy) ((floppy)->stretch & FD_STRETCH) @@ -2069,7 +2067,7 @@ static void bad_flp_intr(void) return; } err_count = ++(*errors); - INFBOUND(DRWE->badness, err_count); + INFBOUND(write_errors[current_drive].badness, err_count); if (err_count > drive_params[current_drive].max_errors.abort) cont->done(0); if (err_count > drive_params[current_drive].max_errors.reset) @@ -2274,13 +2272,13 @@ static void request_done(int uptodate) } else { if (rq_data_dir(req) == WRITE) { /* record write error information */ - DRWE->write_errors++; - if (DRWE->write_errors == 1) { - DRWE->first_error_sector = blk_rq_pos(req); - DRWE->first_error_generation = drive_state[current_drive].generation; + write_errors[current_drive].write_errors++; + if (write_errors[current_drive].write_errors == 1) { + write_errors[current_drive].first_error_sector = blk_rq_pos(req); + write_errors[current_drive].first_error_generation = drive_state[current_drive].generation; } - DRWE->last_error_sector = blk_rq_pos(req); - DRWE->last_error_generation = drive_state[current_drive].generation; + write_errors[current_drive].last_error_sector = blk_rq_pos(req); + write_errors[current_drive].last_error_generation = drive_state[current_drive].generation; } floppy_end_request(req, BLK_STS_IOERR); } From 76dabe79605bebce6f34ca65c0e42e411e4ccb7b Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 24 Feb 2020 22:23:51 +0100 Subject: [PATCH 25/81] floppy: cleanup: expand the R/W / format command macros Various macros were used to access raw_cmd for R/W or format commands without making it obvious that raw_cmd->cmd[] was used. Let's expand the macros to make this more obvious. Link: https://lore.kernel.org/r/20200224212352.8640-10-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 194 +++++++++++++++++++++-------------------- 1 file changed, 98 insertions(+), 96 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index d771579df57e..0d5333570544 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -309,23 +309,23 @@ static bool initialized; #define PH_HEAD(floppy, head) (((((floppy)->stretch & 2) >> 1) ^ head) << 2) #define STRETCH(floppy) ((floppy)->stretch & FD_STRETCH) -/* read/write */ -#define COMMAND (raw_cmd->cmd[0]) -#define DR_SELECT (raw_cmd->cmd[1]) -#define TRACK (raw_cmd->cmd[2]) -#define HEAD (raw_cmd->cmd[3]) -#define SECTOR (raw_cmd->cmd[4]) -#define SIZECODE (raw_cmd->cmd[5]) -#define SECT_PER_TRACK (raw_cmd->cmd[6]) -#define GAP (raw_cmd->cmd[7]) -#define SIZECODE2 (raw_cmd->cmd[8]) +/* read/write commands */ +#define COMMAND 0 +#define DR_SELECT 1 +#define TRACK 2 +#define HEAD 3 +#define SECTOR 4 +#define SIZECODE 5 +#define SECT_PER_TRACK 6 +#define GAP 7 +#define SIZECODE2 8 #define NR_RW 9 -/* format */ -#define F_SIZECODE (raw_cmd->cmd[2]) -#define F_SECT_PER_TRACK (raw_cmd->cmd[3]) -#define F_GAP (raw_cmd->cmd[4]) -#define F_FILL (raw_cmd->cmd[5]) +/* format commands */ +#define F_SIZECODE 2 +#define F_SECT_PER_TRACK 3 +#define F_GAP 4 +#define F_FILL 5 #define NR_F 6 /* @@ -2124,28 +2124,28 @@ static void setup_format_params(int track) FD_RAW_NEED_DISK | FD_RAW_NEED_SEEK); raw_cmd->rate = _floppy->rate & 0x43; raw_cmd->cmd_count = NR_F; - COMMAND = FM_MODE(_floppy, FD_FORMAT); - DR_SELECT = UNIT(current_drive) + PH_HEAD(_floppy, format_req.head); - F_SIZECODE = FD_SIZECODE(_floppy); - F_SECT_PER_TRACK = _floppy->sect << 2 >> F_SIZECODE; - F_GAP = _floppy->fmt_gap; - F_FILL = FD_FILL_BYTE; + raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_FORMAT); + raw_cmd->cmd[DR_SELECT] = UNIT(current_drive) + PH_HEAD(_floppy, format_req.head); + raw_cmd->cmd[F_SIZECODE] = FD_SIZECODE(_floppy); + raw_cmd->cmd[F_SECT_PER_TRACK] = _floppy->sect << 2 >> raw_cmd->cmd[F_SIZECODE]; + raw_cmd->cmd[F_GAP] = _floppy->fmt_gap; + raw_cmd->cmd[F_FILL] = FD_FILL_BYTE; raw_cmd->kernel_data = floppy_track_buffer; - raw_cmd->length = 4 * F_SECT_PER_TRACK; + raw_cmd->length = 4 * raw_cmd->cmd[F_SECT_PER_TRACK]; - if (!F_SECT_PER_TRACK) + if (!raw_cmd->cmd[F_SECT_PER_TRACK]) return; /* allow for about 30ms for data transport per track */ - head_shift = (F_SECT_PER_TRACK + 5) / 6; + head_shift = (raw_cmd->cmd[F_SECT_PER_TRACK] + 5) / 6; /* a ``cylinder'' is two tracks plus a little stepping time */ track_shift = 2 * head_shift + 3; /* position of logical sector 1 on this track */ n = (track_shift * format_req.track + head_shift * format_req.head) - % F_SECT_PER_TRACK; + % raw_cmd->cmd[F_SECT_PER_TRACK]; /* determine interleave */ il = 1; @@ -2153,27 +2153,27 @@ static void setup_format_params(int track) il++; /* initialize field */ - for (count = 0; count < F_SECT_PER_TRACK; ++count) { + for (count = 0; count < raw_cmd->cmd[F_SECT_PER_TRACK]; ++count) { here[count].track = format_req.track; here[count].head = format_req.head; here[count].sect = 0; - here[count].size = F_SIZECODE; + here[count].size = raw_cmd->cmd[F_SIZECODE]; } /* place logical sectors */ - for (count = 1; count <= F_SECT_PER_TRACK; ++count) { + for (count = 1; count <= raw_cmd->cmd[F_SECT_PER_TRACK]; ++count) { here[n].sect = count; - n = (n + il) % F_SECT_PER_TRACK; + n = (n + il) % raw_cmd->cmd[F_SECT_PER_TRACK]; if (here[n].sect) { /* sector busy, find next free sector */ ++n; - if (n >= F_SECT_PER_TRACK) { - n -= F_SECT_PER_TRACK; + if (n >= raw_cmd->cmd[F_SECT_PER_TRACK]) { + n -= raw_cmd->cmd[F_SECT_PER_TRACK]; while (here[n].sect) ++n; } } } if (_floppy->stretch & FD_SECTBASEMASK) { - for (count = 0; count < F_SECT_PER_TRACK; count++) + for (count = 0; count < raw_cmd->cmd[F_SECT_PER_TRACK]; count++) here[count].sect += FD_SECTBASE(_floppy) - 1; } } @@ -2303,32 +2303,32 @@ static void rw_interrupt(void) drive_state[current_drive].first_read_date = jiffies; nr_sectors = 0; - ssize = DIV_ROUND_UP(1 << SIZECODE, 4); + ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4); if (ST1 & ST1_EOC) eoc = 1; else eoc = 0; - if (COMMAND & 0x80) + if (raw_cmd->cmd[COMMAND] & 0x80) heads = 2; else heads = 1; - nr_sectors = (((R_TRACK - TRACK) * heads + - R_HEAD - HEAD) * SECT_PER_TRACK + - R_SECTOR - SECTOR + eoc) << SIZECODE >> 2; + nr_sectors = (((R_TRACK - raw_cmd->cmd[TRACK]) * heads + + R_HEAD - raw_cmd->cmd[HEAD]) * raw_cmd->cmd[SECT_PER_TRACK] + + R_SECTOR - raw_cmd->cmd[SECTOR] + eoc) << raw_cmd->cmd[SIZECODE] >> 2; if (nr_sectors / ssize > DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) { DPRINT("long rw: %x instead of %lx\n", nr_sectors, current_count_sectors); - pr_info("rs=%d s=%d\n", R_SECTOR, SECTOR); - pr_info("rh=%d h=%d\n", R_HEAD, HEAD); - pr_info("rt=%d t=%d\n", R_TRACK, TRACK); + pr_info("rs=%d s=%d\n", R_SECTOR, raw_cmd->cmd[SECTOR]); + pr_info("rh=%d h=%d\n", R_HEAD, raw_cmd->cmd[HEAD]); + pr_info("rt=%d t=%d\n", R_TRACK, raw_cmd->cmd[TRACK]); pr_info("heads=%d eoc=%d\n", heads, eoc); pr_info("spt=%d st=%d ss=%d\n", - SECT_PER_TRACK, fsector_t, ssize); + raw_cmd->cmd[SECT_PER_TRACK], fsector_t, ssize); pr_info("in_sector_offset=%d\n", in_sector_offset); } @@ -2366,11 +2366,11 @@ static void rw_interrupt(void) probing = 0; } - if (CT(COMMAND) != FD_READ || + if (CT(raw_cmd->cmd[COMMAND]) != FD_READ || raw_cmd->kernel_data == bio_data(current_req->bio)) { /* transfer directly from buffer */ cont->done(1); - } else if (CT(COMMAND) == FD_READ) { + } else if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) { buffer_track = raw_cmd->track; buffer_drive = current_drive; INFBOUND(buffer_max, nr_sectors + fsector_t); @@ -2429,13 +2429,13 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) min(max_sector, max_sector_2), blk_rq_sectors(current_req)); - if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE && + if (current_count_sectors <= 0 && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE && buffer_max > fsector_t + blk_rq_sectors(current_req)) current_count_sectors = min_t(int, buffer_max - fsector_t, blk_rq_sectors(current_req)); remaining = current_count_sectors << 9; - if (remaining > blk_rq_bytes(current_req) && CT(COMMAND) == FD_WRITE) { + if (remaining > blk_rq_bytes(current_req) && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) { DPRINT("in copy buffer\n"); pr_info("current_count_sectors=%ld\n", current_count_sectors); pr_info("remaining=%d\n", remaining >> 9); @@ -2470,16 +2470,16 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) fsector_t, buffer_min); pr_info("current_count_sectors=%ld\n", current_count_sectors); - if (CT(COMMAND) == FD_READ) + if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) pr_info("read\n"); - if (CT(COMMAND) == FD_WRITE) + if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) pr_info("write\n"); break; } if (((unsigned long)buffer) % 512) DPRINT("%p buffer not aligned\n", buffer); - if (CT(COMMAND) == FD_READ) + if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) memcpy(buffer, dma_buffer, size); else memcpy(dma_buffer, buffer, size); @@ -2497,7 +2497,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) /* work around a bug in pseudo DMA * (on some FDCs) pseudo DMA does not stop when the CPU stops * sending data. Hence we need a different way to signal the - * transfer length: We use SECT_PER_TRACK. Unfortunately, this + * transfer length: We use raw_cmd->cmd[SECT_PER_TRACK]. Unfortunately, this * does not work with MT, hence we can only transfer one head at * a time */ @@ -2506,18 +2506,18 @@ static void virtualdmabug_workaround(void) int hard_sectors; int end_sector; - if (CT(COMMAND) == FD_WRITE) { - COMMAND &= ~0x80; /* switch off multiple track mode */ + if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) { + raw_cmd->cmd[COMMAND] &= ~0x80; /* switch off multiple track mode */ - hard_sectors = raw_cmd->length >> (7 + SIZECODE); - end_sector = SECTOR + hard_sectors - 1; - if (end_sector > SECT_PER_TRACK) { + hard_sectors = raw_cmd->length >> (7 + raw_cmd->cmd[SIZECODE]); + end_sector = raw_cmd->cmd[SECTOR] + hard_sectors - 1; + if (end_sector > raw_cmd->cmd[SECT_PER_TRACK]) { pr_info("too many sectors %d > %d\n", - end_sector, SECT_PER_TRACK); + end_sector, raw_cmd->cmd[SECT_PER_TRACK]); return; } - SECT_PER_TRACK = end_sector; - /* make sure SECT_PER_TRACK + raw_cmd->cmd[SECT_PER_TRACK] = end_sector; + /* make sure raw_cmd->cmd[SECT_PER_TRACK] * points to end of transfer */ } } @@ -2550,10 +2550,10 @@ static int make_raw_rw_request(void) raw_cmd->cmd_count = NR_RW; if (rq_data_dir(current_req) == READ) { raw_cmd->flags |= FD_RAW_READ; - COMMAND = FM_MODE(_floppy, FD_READ); + raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_READ); } else if (rq_data_dir(current_req) == WRITE) { raw_cmd->flags |= FD_RAW_WRITE; - COMMAND = FM_MODE(_floppy, FD_WRITE); + raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_WRITE); } else { DPRINT("%s: unknown command\n", __func__); return 0; @@ -2561,16 +2561,16 @@ static int make_raw_rw_request(void) max_sector = _floppy->sect * _floppy->head; - TRACK = (int)blk_rq_pos(current_req) / max_sector; + raw_cmd->cmd[TRACK] = (int)blk_rq_pos(current_req) / max_sector; fsector_t = (int)blk_rq_pos(current_req) % max_sector; - if (_floppy->track && TRACK >= _floppy->track) { + if (_floppy->track && raw_cmd->cmd[TRACK] >= _floppy->track) { if (blk_rq_cur_sectors(current_req) & 1) { current_count_sectors = 1; return 1; } else return 0; } - HEAD = fsector_t / _floppy->sect; + raw_cmd->cmd[HEAD] = fsector_t / _floppy->sect; if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) || test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags)) && @@ -2578,7 +2578,7 @@ static int make_raw_rw_request(void) max_sector = _floppy->sect; /* 2M disks have phantom sectors on the first track */ - if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)) { + if ((_floppy->rate & FD_2M) && (!raw_cmd->cmd[TRACK]) && (!raw_cmd->cmd[HEAD])) { max_sector = 2 * _floppy->sect / 3; if (fsector_t >= max_sector) { current_count_sectors = @@ -2586,23 +2586,24 @@ static int make_raw_rw_request(void) blk_rq_sectors(current_req)); return 1; } - SIZECODE = 2; + raw_cmd->cmd[SIZECODE] = 2; } else - SIZECODE = FD_SIZECODE(_floppy); + raw_cmd->cmd[SIZECODE] = FD_SIZECODE(_floppy); raw_cmd->rate = _floppy->rate & 0x43; - if ((_floppy->rate & FD_2M) && (TRACK || HEAD) && raw_cmd->rate == 2) + if ((_floppy->rate & FD_2M) && + (raw_cmd->cmd[TRACK] || raw_cmd->cmd[HEAD]) && raw_cmd->rate == 2) raw_cmd->rate = 1; - if (SIZECODE) - SIZECODE2 = 0xff; + if (raw_cmd->cmd[SIZECODE]) + raw_cmd->cmd[SIZECODE2] = 0xff; else - SIZECODE2 = 0x80; - raw_cmd->track = TRACK << STRETCH(_floppy); - DR_SELECT = UNIT(current_drive) + PH_HEAD(_floppy, HEAD); - GAP = _floppy->gap; - ssize = DIV_ROUND_UP(1 << SIZECODE, 4); - SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; - SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + + raw_cmd->cmd[SIZECODE2] = 0x80; + raw_cmd->track = raw_cmd->cmd[TRACK] << STRETCH(_floppy); + raw_cmd->cmd[DR_SELECT] = UNIT(current_drive) + PH_HEAD(_floppy, raw_cmd->cmd[HEAD]); + raw_cmd->cmd[GAP] = _floppy->gap; + ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4); + raw_cmd->cmd[SECT_PER_TRACK] = _floppy->sect << 2 >> raw_cmd->cmd[SIZECODE]; + raw_cmd->cmd[SECTOR] = ((fsector_t % _floppy->sect) << 2 >> raw_cmd->cmd[SIZECODE]) + FD_SECTBASE(_floppy); /* tracksize describes the size which can be filled up with sectors @@ -2610,24 +2611,24 @@ static int make_raw_rw_request(void) */ tracksize = _floppy->sect - _floppy->sect % ssize; if (tracksize < _floppy->sect) { - SECT_PER_TRACK++; + raw_cmd->cmd[SECT_PER_TRACK]++; if (tracksize <= fsector_t % _floppy->sect) - SECTOR--; + raw_cmd->cmd[SECTOR]--; /* if we are beyond tracksize, fill up using smaller sectors */ while (tracksize <= fsector_t % _floppy->sect) { while (tracksize + ssize > _floppy->sect) { - SIZECODE--; + raw_cmd->cmd[SIZECODE]--; ssize >>= 1; } - SECTOR++; - SECT_PER_TRACK++; + raw_cmd->cmd[SECTOR]++; + raw_cmd->cmd[SECT_PER_TRACK]++; tracksize += ssize; } - max_sector = HEAD * _floppy->sect + tracksize; - } else if (!TRACK && !HEAD && !(_floppy->rate & FD_2M) && probing) { + max_sector = raw_cmd->cmd[HEAD] * _floppy->sect + tracksize; + } else if (!raw_cmd->cmd[TRACK] && !raw_cmd->cmd[HEAD] && !(_floppy->rate & FD_2M) && probing) { max_sector = _floppy->sect; - } else if (!HEAD && CT(COMMAND) == FD_WRITE) { + } else if (!raw_cmd->cmd[HEAD] && CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) { /* for virtual DMA bug workaround */ max_sector = _floppy->sect; } @@ -2639,12 +2640,12 @@ static int make_raw_rw_request(void) (current_drive == buffer_drive) && (fsector_t >= buffer_min) && (fsector_t < buffer_max)) { /* data already in track buffer */ - if (CT(COMMAND) == FD_READ) { + if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) { copy_buffer(1, max_sector, buffer_max); return 1; } } else if (in_sector_offset || blk_rq_sectors(current_req) < ssize) { - if (CT(COMMAND) == FD_WRITE) { + if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) { unsigned int sectors; sectors = fsector_t + blk_rq_sectors(current_req); @@ -2655,7 +2656,7 @@ static int make_raw_rw_request(void) } raw_cmd->flags &= ~FD_RAW_WRITE; raw_cmd->flags |= FD_RAW_READ; - COMMAND = FM_MODE(_floppy, FD_READ); + raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_READ); } else if ((unsigned long)bio_data(current_req->bio) < MAX_DMA_ADDRESS) { unsigned long dma_limit; int direct, indirect; @@ -2706,7 +2707,7 @@ static int make_raw_rw_request(void) } } - if (CT(COMMAND) == FD_READ) + if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) max_size = max_sector; /* unbounded */ /* claim buffer track if needed */ @@ -2714,7 +2715,7 @@ static int make_raw_rw_request(void) buffer_drive != current_drive || /* bad drive */ fsector_t > buffer_max || fsector_t < buffer_min || - ((CT(COMMAND) == FD_READ || + ((CT(raw_cmd->cmd[COMMAND]) == FD_READ || (!in_sector_offset && blk_rq_sectors(current_req) >= ssize)) && max_sector > 2 * max_buffer_sectors + buffer_min && max_size + fsector_t > 2 * max_buffer_sectors + buffer_min)) { @@ -2726,7 +2727,7 @@ static int make_raw_rw_request(void) raw_cmd->kernel_data = floppy_track_buffer + ((aligned_sector_t - buffer_min) << 9); - if (CT(COMMAND) == FD_WRITE) { + if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) { /* copy write buffer to track buffer. * if we get here, we know that the write * is either aligned or the data already in the buffer @@ -2748,10 +2749,10 @@ static int make_raw_rw_request(void) raw_cmd->length <<= 9; if ((raw_cmd->length < current_count_sectors << 9) || (raw_cmd->kernel_data != bio_data(current_req->bio) && - CT(COMMAND) == FD_WRITE && + CT(raw_cmd->cmd[COMMAND]) == FD_WRITE && (aligned_sector_t + (raw_cmd->length >> 9) > buffer_max || aligned_sector_t < buffer_min)) || - raw_cmd->length % (128 << SIZECODE) || + raw_cmd->length % (128 << raw_cmd->cmd[SIZECODE]) || raw_cmd->length <= 0 || current_count_sectors <= 0) { DPRINT("fractionary current count b=%lx s=%lx\n", raw_cmd->length, current_count_sectors); @@ -2762,9 +2763,10 @@ static int make_raw_rw_request(void) current_count_sectors); pr_info("st=%d ast=%d mse=%d msi=%d\n", fsector_t, aligned_sector_t, max_sector, max_size); - pr_info("ssize=%x SIZECODE=%d\n", ssize, SIZECODE); + pr_info("ssize=%x SIZECODE=%d\n", ssize, raw_cmd->cmd[SIZECODE]); pr_info("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n", - COMMAND, SECTOR, HEAD, TRACK); + raw_cmd->cmd[COMMAND], raw_cmd->cmd[SECTOR], + raw_cmd->cmd[HEAD], raw_cmd->cmd[TRACK]); pr_info("buffer drive=%d\n", buffer_drive); pr_info("buffer track=%d\n", buffer_track); pr_info("buffer_min=%d\n", buffer_min); @@ -2783,9 +2785,9 @@ static int make_raw_rw_request(void) fsector_t, buffer_min, raw_cmd->length >> 9); pr_info("current_count_sectors=%ld\n", current_count_sectors); - if (CT(COMMAND) == FD_READ) + if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) pr_info("read\n"); - if (CT(COMMAND) == FD_WRITE) + if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE) pr_info("write\n"); return 0; } @@ -3253,7 +3255,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g, (int)g->head <= 0 || /* check for overflow in max_sector */ (int)(g->sect * g->head) <= 0 || - /* check for zero in F_SECT_PER_TRACK */ + /* check for zero in raw_cmd->cmd[F_SECT_PER_TRACK] */ (unsigned char)((g->sect << 2) >> FD_SIZECODE(g)) == 0 || g->track <= 0 || g->track > drive_params[drive].tracks >> STRETCH(g) || /* check if reserved bits are set */ From 8fb3845023e9f25d708c47dd50048a470884a946 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 24 Feb 2020 22:23:52 +0100 Subject: [PATCH 26/81] floppy: cleanup: expand the reply_buffer macros Several macros were used to access reply_buffer[] at discrete positions without making it obvious they were relying on this. These ones have been replaced by their offset in the reply buffer to make these accesses more obvious. Link: https://lore.kernel.org/r/20200224212352.8640-11-w@1wt.eu Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 86 +++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 0d5333570544..d521899b2a3a 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -341,14 +341,14 @@ static bool initialized; #define MAX_REPLIES 16 static unsigned char reply_buffer[MAX_REPLIES]; static int inr; /* size of reply buffer, when called from interrupt */ -#define ST0 (reply_buffer[0]) -#define ST1 (reply_buffer[1]) -#define ST2 (reply_buffer[2]) -#define ST3 (reply_buffer[0]) /* result of GETSTATUS */ -#define R_TRACK (reply_buffer[3]) -#define R_HEAD (reply_buffer[4]) -#define R_SECTOR (reply_buffer[5]) -#define R_SIZECODE (reply_buffer[6]) +#define ST0 0 +#define ST1 1 +#define ST2 2 +#define ST3 0 /* result of GETSTATUS */ +#define R_TRACK 3 +#define R_HEAD 4 +#define R_SECTOR 5 +#define R_SIZECODE 6 #define SEL_DLY (2 * HZ / 100) @@ -1366,34 +1366,37 @@ static int fdc_dtr(void) static void tell_sector(void) { pr_cont(": track %d, head %d, sector %d, size %d", - R_TRACK, R_HEAD, R_SECTOR, R_SIZECODE); + reply_buffer[R_TRACK], reply_buffer[R_HEAD], + reply_buffer[R_SECTOR], + reply_buffer[R_SIZECODE]); } /* tell_sector */ static void print_errors(void) { DPRINT(""); - if (ST0 & ST0_ECE) { + if (reply_buffer[ST0] & ST0_ECE) { pr_cont("Recalibrate failed!"); - } else if (ST2 & ST2_CRC) { + } else if (reply_buffer[ST2] & ST2_CRC) { pr_cont("data CRC error"); tell_sector(); - } else if (ST1 & ST1_CRC) { + } else if (reply_buffer[ST1] & ST1_CRC) { pr_cont("CRC error"); tell_sector(); - } else if ((ST1 & (ST1_MAM | ST1_ND)) || - (ST2 & ST2_MAM)) { + } else if ((reply_buffer[ST1] & (ST1_MAM | ST1_ND)) || + (reply_buffer[ST2] & ST2_MAM)) { if (!probing) { pr_cont("sector not found"); tell_sector(); } else pr_cont("probe failed..."); - } else if (ST2 & ST2_WC) { /* seek error */ + } else if (reply_buffer[ST2] & ST2_WC) { /* seek error */ pr_cont("wrong cylinder"); - } else if (ST2 & ST2_BC) { /* cylinder marked as bad */ + } else if (reply_buffer[ST2] & ST2_BC) { /* cylinder marked as bad */ pr_cont("bad cylinder"); } else { pr_cont("unknown error. ST[0..2] are: 0x%x 0x%x 0x%x", - ST0, ST1, ST2); + reply_buffer[ST0], reply_buffer[ST1], + reply_buffer[ST2]); tell_sector(); } pr_cont("\n"); @@ -1417,28 +1420,28 @@ static int interpret_errors(void) } /* check IC to find cause of interrupt */ - switch (ST0 & ST0_INTR) { + switch (reply_buffer[ST0] & ST0_INTR) { case 0x40: /* error occurred during command execution */ - if (ST1 & ST1_EOC) + if (reply_buffer[ST1] & ST1_EOC) return 0; /* occurs with pseudo-DMA */ bad = 1; - if (ST1 & ST1_WP) { + if (reply_buffer[ST1] & ST1_WP) { DPRINT("Drive is write protected\n"); clear_bit(FD_DISK_WRITABLE_BIT, &drive_state[current_drive].flags); cont->done(0); bad = 2; - } else if (ST1 & ST1_ND) { + } else if (reply_buffer[ST1] & ST1_ND) { set_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags); - } else if (ST1 & ST1_OR) { + } else if (reply_buffer[ST1] & ST1_OR) { if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); bad = 0; } else if (*errors >= drive_params[current_drive].max_errors.reporting) { print_errors(); } - if (ST2 & ST2_WC || ST2 & ST2_BC) + if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC) /* wrong cylinder => recal */ drive_state[current_drive].track = NEED_2_RECAL; return bad; @@ -1522,14 +1525,16 @@ static int blind_seek; static void seek_interrupt(void) { debugt(__func__, ""); - if (inr != 2 || (ST0 & 0xF8) != 0x20) { + if (inr != 2 || (reply_buffer[ST0] & 0xF8) != 0x20) { DPRINT("seek failed\n"); drive_state[current_drive].track = NEED_2_RECAL; cont->error(); cont->redo(); return; } - if (drive_state[current_drive].track >= 0 && drive_state[current_drive].track != ST1 && !blind_seek) { + if (drive_state[current_drive].track >= 0 && + drive_state[current_drive].track != reply_buffer[ST1] && + !blind_seek) { debug_dcl(drive_params[current_drive].flags, "clearing NEWCHANGE flag because of effective seek\n"); debug_dcl(drive_params[current_drive].flags, "jiffies=%lu\n", @@ -1539,7 +1544,7 @@ static void seek_interrupt(void) /* effective seek */ drive_state[current_drive].select_date = jiffies; } - drive_state[current_drive].track = ST1; + drive_state[current_drive].track = reply_buffer[ST1]; floppy_ready(); } @@ -1559,8 +1564,8 @@ static void check_wp(void) debug_dcl(drive_params[current_drive].flags, "checking whether disk is write protected\n"); debug_dcl(drive_params[current_drive].flags, "wp=%x\n", - ST3 & 0x40); - if (!(ST3 & 0x40)) + reply_buffer[ST3] & 0x40); + if (!(reply_buffer[ST3] & 0x40)) set_bit(FD_DISK_WRITABLE_BIT, &drive_state[current_drive].flags); else @@ -1634,7 +1639,7 @@ static void recal_interrupt(void) debugt(__func__, ""); if (inr != 2) fdc_state[fdc].reset = 1; - else if (ST0 & ST0_ECE) { + else if (reply_buffer[ST0] & ST0_ECE) { switch (drive_state[current_drive].track) { case NEED_1_RECAL: debugt(__func__, "need 1 recal"); @@ -1672,7 +1677,7 @@ static void recal_interrupt(void) break; } } else - drive_state[current_drive].track = ST1; + drive_state[current_drive].track = reply_buffer[ST1]; floppy_ready(); } @@ -1734,7 +1739,7 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) if (do_print) print_result("sensei", inr); max_sensei--; - } while ((ST0 & 0x83) != UNIT(current_drive) && + } while ((reply_buffer[ST0] & 0x83) != UNIT(current_drive) && inr == 2 && max_sensei); } if (!handler) { @@ -2292,7 +2297,7 @@ static void rw_interrupt(void) int heads; int nr_sectors; - if (R_HEAD >= 2) { + if (reply_buffer[R_HEAD] >= 2) { /* some Toshiba floppy controllers occasionnally seem to * return bogus interrupts after read/write operations, which * can be recognized by a bad head number (>= 2) */ @@ -2305,7 +2310,7 @@ static void rw_interrupt(void) nr_sectors = 0; ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4); - if (ST1 & ST1_EOC) + if (reply_buffer[ST1] & ST1_EOC) eoc = 1; else eoc = 0; @@ -2315,17 +2320,20 @@ static void rw_interrupt(void) else heads = 1; - nr_sectors = (((R_TRACK - raw_cmd->cmd[TRACK]) * heads + - R_HEAD - raw_cmd->cmd[HEAD]) * raw_cmd->cmd[SECT_PER_TRACK] + - R_SECTOR - raw_cmd->cmd[SECTOR] + eoc) << raw_cmd->cmd[SIZECODE] >> 2; + nr_sectors = (((reply_buffer[R_TRACK] - raw_cmd->cmd[TRACK]) * heads + + reply_buffer[R_HEAD] - raw_cmd->cmd[HEAD]) * raw_cmd->cmd[SECT_PER_TRACK] + + reply_buffer[R_SECTOR] - raw_cmd->cmd[SECTOR] + eoc) << raw_cmd->cmd[SIZECODE] >> 2; if (nr_sectors / ssize > DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) { DPRINT("long rw: %x instead of %lx\n", nr_sectors, current_count_sectors); - pr_info("rs=%d s=%d\n", R_SECTOR, raw_cmd->cmd[SECTOR]); - pr_info("rh=%d h=%d\n", R_HEAD, raw_cmd->cmd[HEAD]); - pr_info("rt=%d t=%d\n", R_TRACK, raw_cmd->cmd[TRACK]); + pr_info("rs=%d s=%d\n", reply_buffer[R_SECTOR], + raw_cmd->cmd[SECTOR]); + pr_info("rh=%d h=%d\n", reply_buffer[R_HEAD], + raw_cmd->cmd[HEAD]); + pr_info("rt=%d t=%d\n", reply_buffer[R_TRACK], + raw_cmd->cmd[TRACK]); pr_info("heads=%d eoc=%d\n", heads, eoc); pr_info("spt=%d st=%d ss=%d\n", raw_cmd->cmd[SECT_PER_TRACK], fsector_t, ssize); From 3c6051afa3d4e22273e7eed11186513a12554515 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 1 Mar 2020 20:55:50 +0100 Subject: [PATCH 27/81] floppy: remove dead code for drives scanning on ARM On ARM, function fd_scandrives pre-dates Git era, is #ifed 0 out, not used, and cannot even compile since it references an fdc variable that's not declared anywhere (supposed to be the global one that we're turning to current_fdc apparently). There was also an ifdefde out include of mach/floppy.h that does not exist anymore either. Let's get rid of them since they complicate the fixing of the driver. Link: https://lore.kernel.org/r/20200301195555.11154-2-w@1wt.eu Cc: Ian Molton Cc: Russell King Cc: Linus Torvalds Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- arch/arm/include/asm/floppy.h | 51 ----------------------------------- 1 file changed, 51 deletions(-) diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h index f4fe4d02cef2..465565274345 100644 --- a/arch/arm/include/asm/floppy.h +++ b/arch/arm/include/asm/floppy.h @@ -8,9 +8,6 @@ */ #ifndef __ASM_ARM_FLOPPY_H #define __ASM_ARM_FLOPPY_H -#if 0 -#include -#endif #define fd_outb(val,port) \ do { \ @@ -69,54 +66,6 @@ do { \ outb(new_dor, FD_DOR); \ } while (0) -/* - * Someday, we'll automatically detect which drives are present... - */ -static inline void fd_scandrives (void) -{ -#if 0 - int floppy, drive_count; - - fd_disable_irq(); - raw_cmd = &default_raw_cmd; - raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_SEEK; - raw_cmd->track = 0; - raw_cmd->rate = ?; - drive_count = 0; - for (floppy = 0; floppy < 4; floppy ++) { - current_drive = drive_count; - /* - * Turn on floppy motor - */ - if (start_motor(redo_fd_request)) - continue; - /* - * Set up FDC - */ - fdc_specify(); - /* - * Tell FDC to recalibrate - */ - output_byte(FD_RECALIBRATE); - LAST_OUT(UNIT(floppy)); - /* wait for command to complete */ - if (!successful) { - int i; - for (i = drive_count; i < 3; i--) - floppy_selects[fdc][i] = floppy_selects[fdc][i + 1]; - floppy_selects[fdc][3] = 0; - floppy -= 1; - } else - drive_count++; - } -#else - floppy_selects[0][0] = 0x10; - floppy_selects[0][1] = 0x21; - floppy_selects[0][2] = 0x23; - floppy_selects[0][3] = 0x33; -#endif -} - #define FDC1 (0x3f0) #define FLOPPY0_TYPE 4 From 336eae37338590ea259954546a44a5f524256b6e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 1 Mar 2020 20:55:51 +0100 Subject: [PATCH 28/81] floppy: remove incomplete support for second FDC from ARM code The ARM code was written with the apparent hope to one day support a second FDC except that the code was incomplete and only touches the first one, which is also reflected by N_FDC==1. However this made its fd_outb() macro artificially depend on the global or local "fdc" variable. Let's get rid of this and make it explicit it doesn't rely on this variable anymore. Link: https://lore.kernel.org/r/20200301195555.11154-3-w@1wt.eu Cc: Ian Molton Cc: Russell King Cc: Linus Torvalds Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- arch/arm/include/asm/floppy.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h index 465565274345..7e58979f02bf 100644 --- a/arch/arm/include/asm/floppy.h +++ b/arch/arm/include/asm/floppy.h @@ -50,17 +50,13 @@ static inline int fd_dma_setup(void *data, unsigned int length, * to a non-zero track, and then restoring it to track 0. If an error occurs, * then there is no floppy drive present. [to be put back in again] */ -static unsigned char floppy_selects[2][4] = -{ - { 0x10, 0x21, 0x23, 0x33 }, - { 0x10, 0x21, 0x23, 0x33 } -}; +static unsigned char floppy_selects[4] = { 0x10, 0x21, 0x23, 0x33 }; #define fd_setdor(dor) \ do { \ int new_dor = (dor); \ if (new_dor & 0xf0) \ - new_dor = (new_dor & 0x0c) | floppy_selects[fdc][new_dor & 3]; \ + new_dor = (new_dor & 0x0c) | floppy_selects[new_dor & 3]; \ else \ new_dor &= 0x0c; \ outb(new_dor, FD_DOR); \ @@ -84,9 +80,7 @@ do { \ */ static void driveswap(int *ints, int dummy, int dummy2) { - floppy_selects[0][0] ^= floppy_selects[0][1]; - floppy_selects[0][1] ^= floppy_selects[0][0]; - floppy_selects[0][0] ^= floppy_selects[0][1]; + swap(floppy_selects[0], floppy_selects[1]); } #define EXTRA_FLOPPY_PARAMS ,{ "driveswap", &driveswap, NULL, 0, 0 } From fc0c5c0c85a83207a16011d34916ec4c5db083a1 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 1 Mar 2020 20:55:52 +0100 Subject: [PATCH 29/81] floppy: prepare ARM code to simplify base address separation The fd_outb() macro on ARM relies on a special fd_setdor() macro when the register is FD_DOR and both will need to be changed to accept a separate base address. Let's just remerge them to simplify the change and make this code more easily reviewable. Link: https://lore.kernel.org/r/20200301195555.11154-4-w@1wt.eu Cc: Ian Molton Cc: Russell King Cc: Linus Torvalds Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- arch/arm/include/asm/floppy.h | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h index 7e58979f02bf..34ebd86bf58b 100644 --- a/arch/arm/include/asm/floppy.h +++ b/arch/arm/include/asm/floppy.h @@ -9,12 +9,17 @@ #ifndef __ASM_ARM_FLOPPY_H #define __ASM_ARM_FLOPPY_H -#define fd_outb(val,port) \ - do { \ - if ((port) == (u32)FD_DOR) \ - fd_setdor((val)); \ - else \ - outb((val),(port)); \ +#define fd_outb(val,port) \ + do { \ + int new_val = (val); \ + if ((port) == (u32)FD_DOR) { \ + if (new_val & 0xf0) \ + new_val = (new_val & 0x0c) | \ + floppy_selects[new_val & 3]; \ + else \ + new_val &= 0x0c; \ + } \ + outb(new_val, (port)); \ } while(0) #define fd_inb(port) inb((port)) @@ -52,16 +57,6 @@ static inline int fd_dma_setup(void *data, unsigned int length, */ static unsigned char floppy_selects[4] = { 0x10, 0x21, 0x23, 0x33 }; -#define fd_setdor(dor) \ -do { \ - int new_dor = (dor); \ - if (new_dor & 0xf0) \ - new_dor = (new_dor & 0x0c) | floppy_selects[new_dor & 3]; \ - else \ - new_dor &= 0x0c; \ - outb(new_dor, FD_DOR); \ -} while (0) - #define FDC1 (0x3f0) #define FLOPPY0_TYPE 4 From ac7018614dd958c68ffbb67e8eb6826e1533b96e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 1 Mar 2020 20:55:53 +0100 Subject: [PATCH 30/81] floppy: introduce new functions fdc_inb() and fdc_outb() These two functions replace fd_inb() and fd_outb() in that they take the FDC in argument. This will ease the separation of the base address and the port everywhere the code is used. Link: https://lore.kernel.org/r/20200301195555.11154-5-w@1wt.eu Cc: Linus Torvalds Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index d521899b2a3a..250a451048ac 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -594,6 +594,16 @@ static unsigned char fsector_t; /* sector in track */ static unsigned char in_sector_offset; /* offset within physical sector, * expressed in units of 512 bytes */ +static inline unsigned char fdc_inb(int fdc, unsigned long addr) +{ + return fd_inb(addr); +} + +static inline void fdc_outb(unsigned char value, int fdc, unsigned long addr) +{ + fd_outb(value, addr); +} + static inline bool drive_no_geom(int drive) { return !current_type[drive] && !ITYPE(drive_state[drive].fd_device); @@ -743,14 +753,14 @@ static int disk_change(int drive) "checking disk change line for drive %d\n", drive); debug_dcl(drive_params[drive].flags, "jiffies=%lu\n", jiffies); debug_dcl(drive_params[drive].flags, "disk change line=%x\n", - fd_inb(FD_DIR) & 0x80); + fdc_inb(fdc, FD_DIR) & 0x80); debug_dcl(drive_params[drive].flags, "flags=%lx\n", drive_state[drive].flags); if (drive_params[drive].flags & FD_BROKEN_DCL) return test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags); - if ((fd_inb(FD_DIR) ^ drive_params[drive].flags) & 0x80) { + if ((fdc_inb(fdc, FD_DIR) ^ drive_params[drive].flags) & 0x80) { set_bit(FD_VERIFY_BIT, &drive_state[drive].flags); /* verify write protection */ @@ -807,7 +817,7 @@ static int set_dor(int fdc, char mask, char data) disk_change(drive); } fdc_state[fdc].dor = newdor; - fd_outb(newdor, FD_DOR); + fdc_outb(newdor, fdc, FD_DOR); unit = newdor & 0x3; if (!is_selected(olddor, unit) && is_selected(newdor, unit)) { @@ -822,8 +832,8 @@ static void twaddle(void) { if (drive_params[current_drive].select_delay) return; - fd_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(current_drive)), FD_DOR); - fd_outb(fdc_state[fdc].dor, FD_DOR); + fdc_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(current_drive)), fdc, FD_DOR); + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); drive_state[current_drive].select_date = jiffies; } @@ -864,7 +874,7 @@ static void set_fdc(int drive) #endif if (fdc_state[fdc].rawcmd == 2) reset_fdc_info(1); - if (fd_inb(FD_STATUS) != STATUS_READY) + if (fdc_inb(fdc, FD_STATUS) != STATUS_READY) fdc_state[fdc].reset = 1; } @@ -1103,7 +1113,7 @@ static int wait_til_ready(void) if (fdc_state[fdc].reset) return -1; for (counter = 0; counter < 10000; counter++) { - status = fd_inb(FD_STATUS); + status = fdc_inb(fdc, FD_STATUS); if (status & STATUS_READY) return status; } @@ -1124,7 +1134,7 @@ static int output_byte(char byte) return -1; if (is_ready_state(status)) { - fd_outb(byte, FD_DATA); + fdc_outb(byte, fdc, FD_DATA); output_log[output_log_pos].data = byte; output_log[output_log_pos].status = status; output_log[output_log_pos].jiffies = jiffies; @@ -1157,7 +1167,7 @@ static int result(void) return i; } if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY)) - reply_buffer[i] = fd_inb(FD_DATA); + reply_buffer[i] = fdc_inb(fdc, FD_DATA); else break; } @@ -1352,7 +1362,7 @@ static int fdc_dtr(void) return 0; /* Set dtr */ - fd_outb(raw_cmd->rate & 3, FD_DCR); + fdc_outb(raw_cmd->rate & 3, fdc, FD_DCR); /* TODO: some FDC/drive combinations (C&T 82C711 with TEAC 1.2MB) * need a stabilization period of several milliseconds to be @@ -1796,11 +1806,11 @@ static void reset_fdc(void) release_dma_lock(flags); if (fdc_state[fdc].version >= FDC_82072A) - fd_outb(0x80 | (fdc_state[fdc].dtr & 3), FD_STATUS); + fdc_outb(0x80 | (fdc_state[fdc].dtr & 3), fdc, FD_STATUS); else { - fd_outb(fdc_state[fdc].dor & ~0x04, FD_DOR); + fdc_outb(fdc_state[fdc].dor & ~0x04, fdc, FD_DOR); udelay(FD_RESET_DELAY); - fd_outb(fdc_state[fdc].dor, FD_DOR); + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); } } @@ -1827,7 +1837,7 @@ static void show_floppy(void) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, reply_buffer, resultsize, true); - pr_info("status=%x\n", fd_inb(FD_STATUS)); + pr_info("status=%x\n", fdc_inb(fdc, FD_STATUS)); pr_info("fdc_busy=%lu\n", fdc_busy); if (do_floppy) pr_info("do_floppy=%ps\n", do_floppy); @@ -4875,7 +4885,7 @@ static int floppy_grab_irq_and_dma(void) for (fdc = 0; fdc < N_FDC; fdc++) { if (fdc_state[fdc].address != -1) { reset_fdc_info(1); - fd_outb(fdc_state[fdc].dor, FD_DOR); + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); } } fdc = 0; @@ -4883,7 +4893,7 @@ static int floppy_grab_irq_and_dma(void) for (fdc = 0; fdc < N_FDC; fdc++) if (fdc_state[fdc].address != -1) - fd_outb(fdc_state[fdc].dor, FD_DOR); + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); /* * The driver will try and free resources and relies on us * to know if they were allocated or not. From e2032464fe189c33005a6296fa67c47d2fea4d05 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 1 Mar 2020 20:55:54 +0100 Subject: [PATCH 31/81] floppy: separate the FDC's base address from its registers FDC registers FD_STATUS, FD_DATA, FD_DOR, FD_DIR and FD_DCR used to be defined relative to FD_IOPORT, which is the FDC's base address, itself a macro depending on the "fdc" local or global variable. This patch changes this so that the register macros above now only reference the address offset, and that the FDC's address is explicitly passed in each call to fd_inb() and fd_outb(), thus removing the macro. With this change there is no more implicit usage of the local/global "fdc" variable. One place in the ARM code used to check if the port was equal to FD_DOR, this was changed to testing the register by applying a mask to the port, as was already done in the sparc code. There are still occurrences of fd_inb() and fd_outb() in the PARISC code and these ones remain unaffected since they already used to work with a base address and a register offset. The sparc, m68k and parisc code could now be slightly cleaned up to benefit from the macro definitions above instead of the equivalent hard-coded values. Link: https://lore.kernel.org/r/20200301195555.11154-6-w@1wt.eu Cc: Ian Molton Cc: Russell King Cc: Linus Torvalds Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- arch/arm/include/asm/floppy.h | 2 +- drivers/block/floppy.c | 9 ++++----- include/uapi/linux/fdreg.h | 18 +++++------------- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h index 34ebd86bf58b..79fa327238e8 100644 --- a/arch/arm/include/asm/floppy.h +++ b/arch/arm/include/asm/floppy.h @@ -12,7 +12,7 @@ #define fd_outb(val,port) \ do { \ int new_val = (val); \ - if ((port) == (u32)FD_DOR) { \ + if (((port) & 7) == FD_DOR) { \ if (new_val & 0xf0) \ new_val = (new_val & 0x0c) | \ floppy_selects[new_val & 3]; \ diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 250a451048ac..4e43a7ef5184 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -171,7 +171,6 @@ static int print_unex = 1; #include #include #include -#define FDPATCHES #include #include #include @@ -594,14 +593,14 @@ static unsigned char fsector_t; /* sector in track */ static unsigned char in_sector_offset; /* offset within physical sector, * expressed in units of 512 bytes */ -static inline unsigned char fdc_inb(int fdc, unsigned long addr) +static inline unsigned char fdc_inb(int fdc, int reg) { - return fd_inb(addr); + return fd_inb(fdc_state[fdc].address + reg); } -static inline void fdc_outb(unsigned char value, int fdc, unsigned long addr) +static inline void fdc_outb(unsigned char value, int fdc, int reg) { - fd_outb(value, addr); + fd_outb(value, fdc_state[fdc].address + reg); } static inline bool drive_no_geom(int drive) diff --git a/include/uapi/linux/fdreg.h b/include/uapi/linux/fdreg.h index 5e2981d5c523..1318881954e1 100644 --- a/include/uapi/linux/fdreg.h +++ b/include/uapi/linux/fdreg.h @@ -7,26 +7,18 @@ * Handbook", Sanches and Canton. */ -#ifdef FDPATCHES -#define FD_IOPORT fdc_state[fdc].address -#else -/* It would be a lot saner just to force fdc_state[fdc].address to always - be set ! FIXME */ -#define FD_IOPORT 0x3f0 -#endif - /* Fd controller regs. S&C, about page 340 */ -#define FD_STATUS (4 + FD_IOPORT ) -#define FD_DATA (5 + FD_IOPORT ) +#define FD_STATUS 4 +#define FD_DATA 5 /* Digital Output Register */ -#define FD_DOR (2 + FD_IOPORT ) +#define FD_DOR 2 /* Digital Input Register (read) */ -#define FD_DIR (7 + FD_IOPORT ) +#define FD_DIR 7 /* Diskette Control Register (write)*/ -#define FD_DCR (7 + FD_IOPORT ) +#define FD_DCR 7 /* Bits of main status register */ #define STATUS_BUSYMASK 0x0F /* drive busy mask */ From e83995c9f84161900b80d337d6df358a7803870a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 1 Mar 2020 20:55:55 +0100 Subject: [PATCH 32/81] floppy: rename the global "fdc" variable to "current_fdc" This is done in order to remove the confusion that arises at some places in the code where local variables or arguments shadow the global variable. It is already visible that some places are a bit awkward and iterate over the global variable, for the sole reason that they used to rely on it being named "fdc" in order to get the correct address when using FD_DOR. These ones are easy to spot by searching for "for (current_fdc...". Some more cleanup is definitely possible. For example "fdc_state[current_fdc].somefield" is used all over the code and would probably be better with "fdc_state->somefield" with fdc_state being set when current_fdc is assigned. This would require to pass the pointer to the current state instead of the current_fdc to the I/O functions. Link: https://lore.kernel.org/r/20200301195555.11154-7-w@1wt.eu Cc: Linus Torvalds Signed-off-by: Willy Tarreau Signed-off-by: Denis Efremov Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 267 +++++++++++++++++++++-------------------- 1 file changed, 137 insertions(+), 130 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 4e43a7ef5184..c3daa64cb52c 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -582,7 +582,7 @@ static int buffer_max = -1; /* fdc related variables, should end up in a struct */ static struct floppy_fdc_state fdc_state[N_FDC]; -static int fdc; /* current fdc */ +static int current_fdc; /* current fdc */ static struct workqueue_struct *floppy_wq; @@ -831,8 +831,9 @@ static void twaddle(void) { if (drive_params[current_drive].select_delay) return; - fdc_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(current_drive)), fdc, FD_DOR); - fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); + fdc_outb(fdc_state[current_fdc].dor & ~(0x10 << UNIT(current_drive)), + current_fdc, FD_DOR); + fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); drive_state[current_drive].select_date = jiffies; } @@ -844,19 +845,20 @@ static void reset_fdc_info(int mode) { int drive; - fdc_state[fdc].spec1 = fdc_state[fdc].spec2 = -1; - fdc_state[fdc].need_configure = 1; - fdc_state[fdc].perp_mode = 1; - fdc_state[fdc].rawcmd = 0; + fdc_state[current_fdc].spec1 = fdc_state[current_fdc].spec2 = -1; + fdc_state[current_fdc].need_configure = 1; + fdc_state[current_fdc].perp_mode = 1; + fdc_state[current_fdc].rawcmd = 0; for (drive = 0; drive < N_DRIVE; drive++) - if (FDC(drive) == fdc && (mode || drive_state[drive].track != NEED_1_RECAL)) + if (FDC(drive) == current_fdc && + (mode || drive_state[drive].track != NEED_1_RECAL)) drive_state[drive].track = NEED_2_RECAL; } /* selects the fdc and drive, and enables the fdc's input/dma. */ static void set_fdc(int drive) { - unsigned int new_fdc = fdc; + unsigned int new_fdc = current_fdc; if (drive >= 0 && drive < N_DRIVE) { new_fdc = FDC(drive); @@ -866,15 +868,15 @@ static void set_fdc(int drive) pr_info("bad fdc value\n"); return; } - fdc = new_fdc; - set_dor(fdc, ~0, 8); + current_fdc = new_fdc; + set_dor(current_fdc, ~0, 8); #if N_FDC > 1 - set_dor(1 - fdc, ~8, 0); + set_dor(1 - current_fdc, ~8, 0); #endif - if (fdc_state[fdc].rawcmd == 2) + if (fdc_state[current_fdc].rawcmd == 2) reset_fdc_info(1); - if (fdc_inb(fdc, FD_STATUS) != STATUS_READY) - fdc_state[fdc].reset = 1; + if (fdc_inb(current_fdc, FD_STATUS) != STATUS_READY) + fdc_state[current_fdc].reset = 1; } /* locks the driver */ @@ -964,11 +966,11 @@ static void scandrives(void) if (drive_state[drive].fd_ref == 0 || drive_params[drive].select_delay != 0) continue; /* skip closed drives */ set_fdc(drive); - if (!(set_dor(fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) & + if (!(set_dor(current_fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) & (0x10 << UNIT(drive)))) /* switch the motor off again, if it was off to * begin with */ - set_dor(fdc, ~(0x10 << UNIT(drive)), 0); + set_dor(current_fdc, ~(0x10 << UNIT(drive)), 0); } set_fdc(saved_drive); } @@ -1039,7 +1041,7 @@ static void main_command_interrupt(void) static int fd_wait_for_completion(unsigned long expires, void (*function)(void)) { - if (fdc_state[fdc].reset) { + if (fdc_state[current_fdc].reset) { reset_fdc(); /* do the reset during sleep to win time * if we don't need to sleep, it's a good * occasion anyways */ @@ -1067,13 +1069,13 @@ static void setup_DMA(void) pr_cont("%x,", raw_cmd->cmd[i]); pr_cont("\n"); cont->done(0); - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; return; } if (((unsigned long)raw_cmd->kernel_data) % 512) { pr_info("non aligned address: %p\n", raw_cmd->kernel_data); cont->done(0); - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; return; } f = claim_dma_lock(); @@ -1081,10 +1083,11 @@ static void setup_DMA(void) #ifdef fd_dma_setup if (fd_dma_setup(raw_cmd->kernel_data, raw_cmd->length, (raw_cmd->flags & FD_RAW_READ) ? - DMA_MODE_READ : DMA_MODE_WRITE, fdc_state[fdc].address) < 0) { + DMA_MODE_READ : DMA_MODE_WRITE, + fdc_state[current_fdc].address) < 0) { release_dma_lock(f); cont->done(0); - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; return; } release_dma_lock(f); @@ -1095,7 +1098,7 @@ static void setup_DMA(void) DMA_MODE_READ : DMA_MODE_WRITE); fd_set_dma_addr(raw_cmd->kernel_data); fd_set_dma_count(raw_cmd->length); - virtual_dma_port = fdc_state[fdc].address; + virtual_dma_port = fdc_state[current_fdc].address; fd_enable_dma(); release_dma_lock(f); #endif @@ -1109,18 +1112,18 @@ static int wait_til_ready(void) int status; int counter; - if (fdc_state[fdc].reset) + if (fdc_state[current_fdc].reset) return -1; for (counter = 0; counter < 10000; counter++) { - status = fdc_inb(fdc, FD_STATUS); + status = fdc_inb(current_fdc, FD_STATUS); if (status & STATUS_READY) return status; } if (initialized) { - DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc); + DPRINT("Getstatus times out (%x) on fdc %d\n", status, current_fdc); show_floppy(); } - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; return -1; } @@ -1133,17 +1136,17 @@ static int output_byte(char byte) return -1; if (is_ready_state(status)) { - fdc_outb(byte, fdc, FD_DATA); + fdc_outb(byte, current_fdc, FD_DATA); output_log[output_log_pos].data = byte; output_log[output_log_pos].status = status; output_log[output_log_pos].jiffies = jiffies; output_log_pos = (output_log_pos + 1) % OLOGSIZE; return 0; } - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; if (initialized) { DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n", - byte, fdc, status); + byte, current_fdc, status); show_floppy(); } return -1; @@ -1166,16 +1169,16 @@ static int result(void) return i; } if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY)) - reply_buffer[i] = fdc_inb(fdc, FD_DATA); + reply_buffer[i] = fdc_inb(current_fdc, FD_DATA); else break; } if (initialized) { DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n", - fdc, status, i); + current_fdc, status, i); show_floppy(); } - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; return -1; } @@ -1212,7 +1215,7 @@ static void perpendicular_mode(void) default: DPRINT("Invalid data rate for perpendicular mode!\n"); cont->done(0); - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; /* * convenient way to return to * redo without too much hassle @@ -1223,12 +1226,12 @@ static void perpendicular_mode(void) } else perp_mode = 0; - if (fdc_state[fdc].perp_mode == perp_mode) + if (fdc_state[current_fdc].perp_mode == perp_mode) return; - if (fdc_state[fdc].version >= FDC_82077_ORIG) { + if (fdc_state[current_fdc].version >= FDC_82077_ORIG) { output_byte(FD_PERPENDICULAR); output_byte(perp_mode); - fdc_state[fdc].perp_mode = perp_mode; + fdc_state[current_fdc].perp_mode = perp_mode; } else if (perp_mode) { DPRINT("perpendicular mode not supported by this FDC.\n"); } @@ -1283,9 +1286,10 @@ static void fdc_specify(void) int hlt_max_code = 0x7f; int hut_max_code = 0xf; - if (fdc_state[fdc].need_configure && fdc_state[fdc].version >= FDC_82072A) { + if (fdc_state[current_fdc].need_configure && + fdc_state[current_fdc].version >= FDC_82072A) { fdc_configure(); - fdc_state[fdc].need_configure = 0; + fdc_state[current_fdc].need_configure = 0; } switch (raw_cmd->rate & 0x03) { @@ -1294,7 +1298,7 @@ static void fdc_specify(void) break; case 1: dtr = 300; - if (fdc_state[fdc].version >= FDC_82078) { + if (fdc_state[current_fdc].version >= FDC_82078) { /* chose the default rate table, not the one * where 1 = 2 Mbps */ output_byte(FD_DRIVESPEC); @@ -1309,7 +1313,7 @@ static void fdc_specify(void) break; } - if (fdc_state[fdc].version >= FDC_82072) { + if (fdc_state[current_fdc].version >= FDC_82072) { scale_dtr = dtr; hlt_max_code = 0x00; /* 0==256msec*dtr0/dtr (not linear!) */ hut_max_code = 0x0; /* 0==256msec*dtr0/dtr (not linear!) */ @@ -1342,11 +1346,12 @@ static void fdc_specify(void) spec2 = (hlt << 1) | (use_virtual_dma & 1); /* If these parameters did not change, just return with success */ - if (fdc_state[fdc].spec1 != spec1 || fdc_state[fdc].spec2 != spec2) { + if (fdc_state[current_fdc].spec1 != spec1 || + fdc_state[current_fdc].spec2 != spec2) { /* Go ahead and set spec1 and spec2 */ output_byte(FD_SPECIFY); - output_byte(fdc_state[fdc].spec1 = spec1); - output_byte(fdc_state[fdc].spec2 = spec2); + output_byte(fdc_state[current_fdc].spec1 = spec1); + output_byte(fdc_state[current_fdc].spec2 = spec2); } } /* fdc_specify */ @@ -1357,18 +1362,18 @@ static void fdc_specify(void) static int fdc_dtr(void) { /* If data rate not already set to desired value, set it. */ - if ((raw_cmd->rate & 3) == fdc_state[fdc].dtr) + if ((raw_cmd->rate & 3) == fdc_state[current_fdc].dtr) return 0; /* Set dtr */ - fdc_outb(raw_cmd->rate & 3, fdc, FD_DCR); + fdc_outb(raw_cmd->rate & 3, current_fdc, FD_DCR); /* TODO: some FDC/drive combinations (C&T 82C711 with TEAC 1.2MB) * need a stabilization period of several milliseconds to be * enforced after data rate changes before R/W operations. * Pause 5 msec to avoid trouble. (Needs to be 2 jiffies) */ - fdc_state[fdc].dtr = raw_cmd->rate & 3; + fdc_state[current_fdc].dtr = raw_cmd->rate & 3; return fd_wait_for_completion(jiffies + 2UL * HZ / 100, floppy_ready); } /* fdc_dtr */ @@ -1424,7 +1429,7 @@ static int interpret_errors(void) if (inr != 7) { DPRINT("-- FDC reply error\n"); - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; return 1; } @@ -1564,7 +1569,7 @@ static void check_wp(void) output_byte(FD_GETSTATUS); output_byte(UNIT(current_drive)); if (result() != 1) { - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; return; } clear_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags); @@ -1616,7 +1621,7 @@ static void seek_floppy(void) track = raw_cmd->track - 1; else { if (drive_params[current_drive].flags & FD_SILENT_DCL_CLEAR) { - set_dor(fdc, ~(0x10 << UNIT(current_drive)), 0); + set_dor(current_fdc, ~(0x10 << UNIT(current_drive)), 0); blind_seek = 1; raw_cmd->flags |= FD_RAW_NEED_SEEK; } @@ -1647,7 +1652,7 @@ static void recal_interrupt(void) { debugt(__func__, ""); if (inr != 2) - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; else if (reply_buffer[ST0] & ST0_ECE) { switch (drive_state[current_drive].track) { case NEED_1_RECAL: @@ -1716,16 +1721,16 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) release_dma_lock(f); do_floppy = NULL; - if (fdc >= N_FDC || fdc_state[fdc].address == -1) { + if (current_fdc >= N_FDC || fdc_state[current_fdc].address == -1) { /* we don't even know which FDC is the culprit */ pr_info("DOR0=%x\n", fdc_state[0].dor); - pr_info("floppy interrupt on bizarre fdc %d\n", fdc); + pr_info("floppy interrupt on bizarre fdc %d\n", current_fdc); pr_info("handler=%ps\n", handler); is_alive(__func__, "bizarre fdc"); return IRQ_NONE; } - fdc_state[fdc].reset = 0; + fdc_state[current_fdc].reset = 0; /* We have to clear the reset flag here, because apparently on boxes * with level triggered interrupts (PS/2, Sparc, ...), it is needed to * emit SENSEI's to clear the interrupt line. And fdc_state[fdc].reset @@ -1752,7 +1757,7 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) inr == 2 && max_sensei); } if (!handler) { - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; return IRQ_NONE; } schedule_bh(handler); @@ -1778,7 +1783,7 @@ static void reset_interrupt(void) { debugt(__func__, ""); result(); /* get the status ready for set_fdc */ - if (fdc_state[fdc].reset) { + if (fdc_state[current_fdc].reset) { pr_info("reset set in interrupt, calling %ps\n", cont->error); cont->error(); /* a reset just after a reset. BAD! */ } @@ -1794,7 +1799,7 @@ static void reset_fdc(void) unsigned long flags; do_floppy = reset_interrupt; - fdc_state[fdc].reset = 0; + fdc_state[current_fdc].reset = 0; reset_fdc_info(0); /* Pseudo-DMA may intercept 'reset finished' interrupt. */ @@ -1804,12 +1809,13 @@ static void reset_fdc(void) fd_disable_dma(); release_dma_lock(flags); - if (fdc_state[fdc].version >= FDC_82072A) - fdc_outb(0x80 | (fdc_state[fdc].dtr & 3), fdc, FD_STATUS); + if (fdc_state[current_fdc].version >= FDC_82072A) + fdc_outb(0x80 | (fdc_state[current_fdc].dtr & 3), + current_fdc, FD_STATUS); else { - fdc_outb(fdc_state[fdc].dor & ~0x04, fdc, FD_DOR); + fdc_outb(fdc_state[current_fdc].dor & ~0x04, current_fdc, FD_DOR); udelay(FD_RESET_DELAY); - fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); + fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); } } @@ -1836,7 +1842,7 @@ static void show_floppy(void) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, reply_buffer, resultsize, true); - pr_info("status=%x\n", fdc_inb(fdc, FD_STATUS)); + pr_info("status=%x\n", fdc_inb(current_fdc, FD_STATUS)); pr_info("fdc_busy=%lu\n", fdc_busy); if (do_floppy) pr_info("do_floppy=%ps\n", do_floppy); @@ -1873,7 +1879,7 @@ static void floppy_shutdown(struct work_struct *arg) if (initialized) DPRINT("floppy timeout called\n"); - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; if (cont) { cont->done(0); cont->redo(); /* this will recall reset when needed */ @@ -1893,7 +1899,7 @@ static int start_motor(void (*function)(void)) mask = 0xfc; data = UNIT(current_drive); if (!(raw_cmd->flags & FD_RAW_NO_MOTOR)) { - if (!(fdc_state[fdc].dor & (0x10 << UNIT(current_drive)))) { + if (!(fdc_state[current_fdc].dor & (0x10 << UNIT(current_drive)))) { set_debugt(); /* no read since this drive is running */ drive_state[current_drive].first_read_date = 0; @@ -1901,12 +1907,12 @@ static int start_motor(void (*function)(void)) drive_state[current_drive].spinup_date = jiffies; data |= (0x10 << UNIT(current_drive)); } - } else if (fdc_state[fdc].dor & (0x10 << UNIT(current_drive))) + } else if (fdc_state[current_fdc].dor & (0x10 << UNIT(current_drive))) mask &= ~(0x10 << UNIT(current_drive)); /* starts motor and selects floppy */ del_timer(motor_off_timer + current_drive); - set_dor(fdc, mask, data); + set_dor(current_fdc, mask, data); /* wait_for_completion also schedules reset if needed. */ return fd_wait_for_completion(drive_state[current_drive].select_date + drive_params[current_drive].select_delay, @@ -1915,7 +1921,7 @@ static int start_motor(void (*function)(void)) static void floppy_ready(void) { - if (fdc_state[fdc].reset) { + if (fdc_state[current_fdc].reset) { reset_fdc(); return; } @@ -2016,7 +2022,7 @@ static int wait_til_done(void (*handler)(void), bool interruptible) return -EINTR; } - if (fdc_state[fdc].reset) + if (fdc_state[current_fdc].reset) command_status = FD_COMMAND_ERROR; if (command_status == FD_COMMAND_OKAY) ret = 0; @@ -2085,7 +2091,7 @@ static void bad_flp_intr(void) if (err_count > drive_params[current_drive].max_errors.abort) cont->done(0); if (err_count > drive_params[current_drive].max_errors.reset) - fdc_state[fdc].reset = 1; + fdc_state[current_fdc].reset = 1; else if (err_count > drive_params[current_drive].max_errors.recal) drive_state[current_drive].track = NEED_2_RECAL; } @@ -2998,8 +3004,8 @@ static int user_reset_fdc(int drive, int arg, bool interruptible) return -EINTR; if (arg == FD_RESET_ALWAYS) - fdc_state[fdc].reset = 1; - if (fdc_state[fdc].reset) { + fdc_state[current_fdc].reset = 1; + if (fdc_state[current_fdc].reset) { cont = &reset_cont; ret = wait_til_done(reset_fdc, interruptible); if (ret == -EINTR) @@ -3210,23 +3216,23 @@ static int raw_cmd_ioctl(int cmd, void __user *param) int ret2; int ret; - if (fdc_state[fdc].rawcmd <= 1) - fdc_state[fdc].rawcmd = 1; + if (fdc_state[current_fdc].rawcmd <= 1) + fdc_state[current_fdc].rawcmd = 1; for (drive = 0; drive < N_DRIVE; drive++) { - if (FDC(drive) != fdc) + if (FDC(drive) != current_fdc) continue; if (drive == current_drive) { if (drive_state[drive].fd_ref > 1) { - fdc_state[fdc].rawcmd = 2; + fdc_state[current_fdc].rawcmd = 2; break; } } else if (drive_state[drive].fd_ref) { - fdc_state[fdc].rawcmd = 2; + fdc_state[current_fdc].rawcmd = 2; break; } } - if (fdc_state[fdc].reset) + if (fdc_state[current_fdc].reset) return -EIO; ret = raw_cmd_copyin(cmd, param, &my_raw_cmd); @@ -3241,7 +3247,7 @@ static int raw_cmd_ioctl(int cmd, void __user *param) debug_dcl(drive_params[current_drive].flags, "calling disk change from raw_cmd ioctl\n"); - if (ret != -EINTR && fdc_state[fdc].reset) + if (ret != -EINTR && fdc_state[current_fdc].reset) ret = -EIO; drive_state[current_drive].track = NO_TRACK; @@ -4297,23 +4303,23 @@ static char __init get_fdc_version(void) int r; output_byte(FD_DUMPREGS); /* 82072 and better know DUMPREGS */ - if (fdc_state[fdc].reset) + if (fdc_state[current_fdc].reset) return FDC_NONE; r = result(); if (r <= 0x00) return FDC_NONE; /* No FDC present ??? */ if ((r == 1) && (reply_buffer[0] == 0x80)) { - pr_info("FDC %d is an 8272A\n", fdc); + pr_info("FDC %d is an 8272A\n", current_fdc); return FDC_8272A; /* 8272a/765 don't know DUMPREGS */ } if (r != 10) { pr_info("FDC %d init: DUMPREGS: unexpected return of %d bytes.\n", - fdc, r); + current_fdc, r); return FDC_UNKNOWN; } if (!fdc_configure()) { - pr_info("FDC %d is an 82072\n", fdc); + pr_info("FDC %d is an 82072\n", current_fdc); return FDC_82072; /* 82072 doesn't know CONFIGURE */ } @@ -4321,50 +4327,50 @@ static char __init get_fdc_version(void) if (need_more_output() == MORE_OUTPUT) { output_byte(0); } else { - pr_info("FDC %d is an 82072A\n", fdc); + pr_info("FDC %d is an 82072A\n", current_fdc); return FDC_82072A; /* 82072A as found on Sparcs. */ } output_byte(FD_UNLOCK); r = result(); if ((r == 1) && (reply_buffer[0] == 0x80)) { - pr_info("FDC %d is a pre-1991 82077\n", fdc); + pr_info("FDC %d is a pre-1991 82077\n", current_fdc); return FDC_82077_ORIG; /* Pre-1991 82077, doesn't know * LOCK/UNLOCK */ } if ((r != 1) || (reply_buffer[0] != 0x00)) { pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n", - fdc, r); + current_fdc, r); return FDC_UNKNOWN; } output_byte(FD_PARTID); r = result(); if (r != 1) { pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n", - fdc, r); + current_fdc, r); return FDC_UNKNOWN; } if (reply_buffer[0] == 0x80) { - pr_info("FDC %d is a post-1991 82077\n", fdc); + pr_info("FDC %d is a post-1991 82077\n", current_fdc); return FDC_82077; /* Revised 82077AA passes all the tests */ } switch (reply_buffer[0] >> 5) { case 0x0: /* Either a 82078-1 or a 82078SL running at 5Volt */ - pr_info("FDC %d is an 82078.\n", fdc); + pr_info("FDC %d is an 82078.\n", current_fdc); return FDC_82078; case 0x1: - pr_info("FDC %d is a 44pin 82078\n", fdc); + pr_info("FDC %d is a 44pin 82078\n", current_fdc); return FDC_82078; case 0x2: - pr_info("FDC %d is a S82078B\n", fdc); + pr_info("FDC %d is a S82078B\n", current_fdc); return FDC_S82078B; case 0x3: - pr_info("FDC %d is a National Semiconductor PC87306\n", fdc); + pr_info("FDC %d is a National Semiconductor PC87306\n", current_fdc); return FDC_87306; default: pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n", - fdc, reply_buffer[0] >> 5); + current_fdc, reply_buffer[0] >> 5); return FDC_82078_UNKN; } } /* get_fdc_version */ @@ -4640,16 +4646,16 @@ static int __init do_floppy_init(void) config_types(); for (i = 0; i < N_FDC; i++) { - fdc = i; - memset(&fdc_state[fdc], 0, sizeof(*fdc_state)); - fdc_state[fdc].dtr = -1; - fdc_state[fdc].dor = 0x4; + current_fdc = i; + memset(&fdc_state[current_fdc], 0, sizeof(*fdc_state)); + fdc_state[current_fdc].dtr = -1; + fdc_state[current_fdc].dor = 0x4; #if defined(__sparc__) || defined(__mc68000__) /*sparcs/sun3x don't have a DOR reset which we can fall back on to */ #ifdef __mc68000__ if (MACH_IS_SUN3X) #endif - fdc_state[fdc].version = FDC_82072A; + fdc_state[current_fdc].version = FDC_82072A; #endif } @@ -4664,7 +4670,7 @@ static int __init do_floppy_init(void) fdc_state[1].address = FDC2; #endif - fdc = 0; /* reset fdc in case of unexpected interrupt */ + current_fdc = 0; /* reset fdc in case of unexpected interrupt */ err = floppy_grab_irq_and_dma(); if (err) { cancel_delayed_work(&fd_timeout); @@ -4691,29 +4697,30 @@ static int __init do_floppy_init(void) msleep(10); for (i = 0; i < N_FDC; i++) { - fdc = i; - fdc_state[fdc].driver_version = FD_DRIVER_VERSION; + current_fdc = i; + fdc_state[current_fdc].driver_version = FD_DRIVER_VERSION; for (unit = 0; unit < 4; unit++) - fdc_state[fdc].track[unit] = 0; - if (fdc_state[fdc].address == -1) + fdc_state[current_fdc].track[unit] = 0; + if (fdc_state[current_fdc].address == -1) continue; - fdc_state[fdc].rawcmd = 2; + fdc_state[current_fdc].rawcmd = 2; if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) { /* free ioports reserved by floppy_grab_irq_and_dma() */ - floppy_release_regions(fdc); - fdc_state[fdc].address = -1; - fdc_state[fdc].version = FDC_NONE; + floppy_release_regions(current_fdc); + fdc_state[current_fdc].address = -1; + fdc_state[current_fdc].version = FDC_NONE; continue; } /* Try to determine the floppy controller type */ - fdc_state[fdc].version = get_fdc_version(); - if (fdc_state[fdc].version == FDC_NONE) { + fdc_state[current_fdc].version = get_fdc_version(); + if (fdc_state[current_fdc].version == FDC_NONE) { /* free ioports reserved by floppy_grab_irq_and_dma() */ - floppy_release_regions(fdc); - fdc_state[fdc].address = -1; + floppy_release_regions(current_fdc); + fdc_state[current_fdc].address = -1; continue; } - if (can_use_virtual_dma == 2 && fdc_state[fdc].version < FDC_82072A) + if (can_use_virtual_dma == 2 && + fdc_state[current_fdc].version < FDC_82072A) can_use_virtual_dma = 0; have_no_fdc = 0; @@ -4723,7 +4730,7 @@ static int __init do_floppy_init(void) */ user_reset_fdc(-1, FD_RESET_ALWAYS, false); } - fdc = 0; + current_fdc = 0; cancel_delayed_work(&fd_timeout); current_drive = 0; initialized = true; @@ -4875,36 +4882,36 @@ static int floppy_grab_irq_and_dma(void) } } - for (fdc = 0; fdc < N_FDC; fdc++) { - if (fdc_state[fdc].address != -1) { - if (floppy_request_regions(fdc)) + for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { + if (fdc_state[current_fdc].address != -1) { + if (floppy_request_regions(current_fdc)) goto cleanup; } } - for (fdc = 0; fdc < N_FDC; fdc++) { - if (fdc_state[fdc].address != -1) { + for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { + if (fdc_state[current_fdc].address != -1) { reset_fdc_info(1); - fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); + fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); } } - fdc = 0; + current_fdc = 0; set_dor(0, ~0, 8); /* avoid immediate interrupt */ - for (fdc = 0; fdc < N_FDC; fdc++) - if (fdc_state[fdc].address != -1) - fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); + for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) + if (fdc_state[current_fdc].address != -1) + fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); /* * The driver will try and free resources and relies on us * to know if they were allocated or not. */ - fdc = 0; + current_fdc = 0; irqdma_allocated = 1; return 0; cleanup: fd_free_irq(); fd_free_dma(); - while (--fdc >= 0) - floppy_release_regions(fdc); + while (--current_fdc >= 0) + floppy_release_regions(current_fdc); atomic_dec(&usage_count); return -1; } @@ -4952,11 +4959,11 @@ static void floppy_release_irq_and_dma(void) pr_info("auxiliary floppy timer still active\n"); if (work_pending(&floppy_work)) pr_info("work still pending\n"); - old_fdc = fdc; - for (fdc = 0; fdc < N_FDC; fdc++) - if (fdc_state[fdc].address != -1) - floppy_release_regions(fdc); - fdc = old_fdc; + old_fdc = current_fdc; + for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) + if (fdc_state[current_fdc].address != -1) + floppy_release_regions(current_fdc); + current_fdc = old_fdc; } #ifdef MODULE From 6b40bec3b13278d21fa6c1ae7a0bdf2e550eed5f Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Tue, 11 Feb 2020 11:10:04 +0100 Subject: [PATCH 33/81] md: check arrays is suspended in mddev_detach before call quiesce operations Don't call quiesce(1) and quiesce(0) if array is already suspended, otherwise in level_store, the array is writable after mddev_detach in below part though the intention is to make array writable after resume. mddev_suspend(mddev); mddev_detach(mddev); ... mddev_resume(mddev); And it also causes calltrace as follows in [1]. [48005.653834] WARNING: CPU: 1 PID: 45380 at kernel/kthread.c:510 kthread_park+0x77/0x90 [...] [48005.653976] CPU: 1 PID: 45380 Comm: mdadm Tainted: G OE 5.4.10-arch1-1 #1 [48005.653979] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4105-ITX, BIOS P1.40 08/06/2018 [48005.653984] RIP: 0010:kthread_park+0x77/0x90 [48005.654015] Call Trace: [48005.654039] r5l_quiesce+0x3c/0x70 [raid456] [48005.654052] raid5_quiesce+0x228/0x2e0 [raid456] [48005.654073] mddev_detach+0x30/0x70 [md_mod] [48005.654090] level_store+0x202/0x670 [md_mod] [48005.654099] ? security_capable+0x40/0x60 [48005.654114] md_attr_store+0x7b/0xc0 [md_mod] [48005.654123] kernfs_fop_write+0xce/0x1b0 [48005.654132] vfs_write+0xb6/0x1a0 [48005.654138] ksys_write+0x67/0xe0 [48005.654146] do_syscall_64+0x4e/0x140 [48005.654155] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [48005.654161] RIP: 0033:0x7fa0c8737497 [1]: https://bugzilla.kernel.org/show_bug.cgi?id=206161 Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 469f551863be..0b30ada971c1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6184,7 +6184,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes); static void mddev_detach(struct mddev *mddev) { md_bitmap_wait_behind_writes(mddev); - if (mddev->pers && mddev->pers->quiesce) { + if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) { mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); } From e74d93e96d721c4297f2a900ad0191890d2fc2b0 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 28 Feb 2020 17:51:48 +0300 Subject: [PATCH 34/81] block: keep bdi->io_pages in sync with max_sectors_kb for stacked devices Field bdi->io_pages added in commit 9491ae4aade6 ("mm: don't cap request size based on read-ahead setting") removes unneeded split of read requests. Stacked drivers do not call blk_queue_max_hw_sectors(). Instead they set limits of their devices by blk_set_stacking_limits() + disk_stack_limits(). Field bio->io_pages stays zero until user set max_sectors_kb via sysfs. This patch updates io_pages after merging limits in disk_stack_limits(). Commit c6d6e9b0f6b4 ("dm: do not allow readahead to limit IO size") fixed the same problem for device-mapper devices, this one fixes MD RAIDs. Fixes: 9491ae4aade6 ("mm: don't cap request size based on read-ahead setting") Reviewed-by: Paul Menzel Reviewed-by: Bob Liu Signed-off-by: Konstantin Khlebnikov Signed-off-by: Song Liu --- block/blk-settings.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index c8eda2e7b91e..be1dca0103a4 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -664,6 +664,9 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", top, bottom); } + + t->backing_dev_info->io_pages = + t->limits.max_sectors >> (PAGE_SHIFT - 9); } EXPORT_SYMBOL(disk_stack_limits); From 431d6e3eec203ac6f9a20d2afc23a4c6508281c0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 12 Feb 2020 13:46:02 -0600 Subject: [PATCH 35/81] rsxx: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertenly introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- drivers/block/rsxx/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 111eb659e66d..1914f5488b22 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -80,7 +80,7 @@ struct dma_tracker { struct dma_tracker_list { spinlock_t lock; int head; - struct dma_tracker list[0]; + struct dma_tracker list[]; }; From 253a99d95d5b30377b0193f1f1294f9068849c0b Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 22 Mar 2020 14:02:59 +0800 Subject: [PATCH 36/81] bcache: move macro btree() and btree_root() into btree.h In order to accelerate bcache registration speed, the macro btree() and btree_root() will be referenced out of btree.c. This patch moves them from btree.c into btree.h with other relative function declaration in btree.h, for the following changes. Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 60 +---------------------------------- drivers/md/bcache/btree.h | 66 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 59 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index fa872df4e770..99cb201809af 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -101,64 +101,6 @@ #define insert_lock(s, b) ((b)->level <= (s)->lock) -/* - * These macros are for recursing down the btree - they handle the details of - * locking and looking up nodes in the cache for you. They're best treated as - * mere syntax when reading code that uses them. - * - * op->lock determines whether we take a read or a write lock at a given depth. - * If you've got a read lock and find that you need a write lock (i.e. you're - * going to have to split), set op->lock and return -EINTR; btree_root() will - * call you again and you'll have the correct lock. - */ - -/** - * btree - recurse down the btree on a specified key - * @fn: function to call, which will be passed the child node - * @key: key to recurse on - * @b: parent btree node - * @op: pointer to struct btree_op - */ -#define btree(fn, key, b, op, ...) \ -({ \ - int _r, l = (b)->level - 1; \ - bool _w = l <= (op)->lock; \ - struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \ - _w, b); \ - if (!IS_ERR(_child)) { \ - _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ - rw_unlock(_w, _child); \ - } else \ - _r = PTR_ERR(_child); \ - _r; \ -}) - -/** - * btree_root - call a function on the root of the btree - * @fn: function to call, which will be passed the child node - * @c: cache set - * @op: pointer to struct btree_op - */ -#define btree_root(fn, c, op, ...) \ -({ \ - int _r = -EINTR; \ - do { \ - struct btree *_b = (c)->root; \ - bool _w = insert_lock(op, _b); \ - rw_lock(_w, _b, _b->level); \ - if (_b == (c)->root && \ - _w == insert_lock(op, _b)) { \ - _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ - } \ - rw_unlock(_w, _b); \ - bch_cannibalize_unlock(c); \ - if (_r == -EINTR) \ - schedule(); \ - } while (_r == -EINTR); \ - \ - finish_wait(&(c)->btree_cache_wait, &(op)->wait); \ - _r; \ -}) static inline struct bset *write_block(struct btree *b) { @@ -2422,7 +2364,7 @@ int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c, return btree_root(map_nodes_recurse, c, op, from, fn, flags); } -static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, +int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, struct bkey *from, btree_map_keys_fn *fn, int flags) { diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index f4dcca449391..f37153db3f6c 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -260,6 +260,13 @@ void bch_initial_gc_finish(struct cache_set *c); void bch_moving_gc(struct cache_set *c); int bch_btree_check(struct cache_set *c); void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k); +typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b, + struct bkey *k); +int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, + struct bkey *from, btree_map_keys_fn *fn, + int flags); +int bch_btree_map_keys(struct btree_op *op, struct cache_set *c, + struct bkey *from, btree_map_keys_fn *fn, int flags); static inline void wake_up_gc(struct cache_set *c) { @@ -284,6 +291,65 @@ static inline void force_wake_up_gc(struct cache_set *c) wake_up_gc(c); } +/* + * These macros are for recursing down the btree - they handle the details of + * locking and looking up nodes in the cache for you. They're best treated as + * mere syntax when reading code that uses them. + * + * op->lock determines whether we take a read or a write lock at a given depth. + * If you've got a read lock and find that you need a write lock (i.e. you're + * going to have to split), set op->lock and return -EINTR; btree_root() will + * call you again and you'll have the correct lock. + */ + +/** + * btree - recurse down the btree on a specified key + * @fn: function to call, which will be passed the child node + * @key: key to recurse on + * @b: parent btree node + * @op: pointer to struct btree_op + */ +#define btree(fn, key, b, op, ...) \ +({ \ + int _r, l = (b)->level - 1; \ + bool _w = l <= (op)->lock; \ + struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \ + _w, b); \ + if (!IS_ERR(_child)) { \ + _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ + rw_unlock(_w, _child); \ + } else \ + _r = PTR_ERR(_child); \ + _r; \ +}) + +/** + * btree_root - call a function on the root of the btree + * @fn: function to call, which will be passed the child node + * @c: cache set + * @op: pointer to struct btree_op + */ +#define btree_root(fn, c, op, ...) \ +({ \ + int _r = -EINTR; \ + do { \ + struct btree *_b = (c)->root; \ + bool _w = insert_lock(op, _b); \ + rw_lock(_w, _b, _b->level); \ + if (_b == (c)->root && \ + _w == insert_lock(op, _b)) { \ + _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ + } \ + rw_unlock(_w, _b); \ + bch_cannibalize_unlock(c); \ + if (_r == -EINTR) \ + schedule(); \ + } while (_r == -EINTR); \ + \ + finish_wait(&(c)->btree_cache_wait, &(op)->wait); \ + _r; \ +}) + #define MAP_DONE 0 #define MAP_CONTINUE 1 From feac1a70b806373d076a95b739c4feeceb21e814 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 22 Mar 2020 14:03:00 +0800 Subject: [PATCH 37/81] bcache: add bcache_ prefix to btree_root() and btree() macros This patch changes macro btree_root() and btree() to bcache_btree_root() and bcache_btree(), to avoid potential generic name clash in future. NOTE: for product kernel maintainers, this patch can be skipped if you feel the rename stuffs introduce inconvenince to patch backport. Suggested-by: Christoph Hellwig Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 15 ++++++++------- drivers/md/bcache/btree.h | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 99cb201809af..faf152524a16 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1790,7 +1790,7 @@ static void bch_btree_gc(struct cache_set *c) /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */ do { - ret = btree_root(gc_root, c, &op, &writes, &stats); + ret = bcache_btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&writes); cond_resched(); @@ -1888,7 +1888,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) } if (p) - ret = btree(check_recurse, p, b, op); + ret = bcache_btree(check_recurse, p, b, op); p = k; } while (p && !ret); @@ -1903,7 +1903,7 @@ int bch_btree_check(struct cache_set *c) bch_btree_op_init(&op, SHRT_MAX); - return btree_root(check_recurse, c, &op); + return bcache_btree_root(check_recurse, c, &op); } void bch_initial_gc_finish(struct cache_set *c) @@ -2343,7 +2343,7 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { - ret = btree(map_nodes_recurse, k, b, + ret = bcache_btree(map_nodes_recurse, k, b, op, from, fn, flags); from = NULL; @@ -2361,7 +2361,7 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c, struct bkey *from, btree_map_nodes_fn *fn, int flags) { - return btree_root(map_nodes_recurse, c, op, from, fn, flags); + return bcache_btree_root(map_nodes_recurse, c, op, from, fn, flags); } int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, @@ -2377,7 +2377,8 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { ret = !b->level ? fn(op, b, k) - : btree(map_keys_recurse, k, b, op, from, fn, flags); + : bcache_btree(map_keys_recurse, k, + b, op, from, fn, flags); from = NULL; if (ret != MAP_CONTINUE) @@ -2394,7 +2395,7 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, int bch_btree_map_keys(struct btree_op *op, struct cache_set *c, struct bkey *from, btree_map_keys_fn *fn, int flags) { - return btree_root(map_keys_recurse, c, op, from, fn, flags); + return bcache_btree_root(map_keys_recurse, c, op, from, fn, flags); } /* Keybuf code */ diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index f37153db3f6c..19e30266070a 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -309,7 +309,7 @@ static inline void force_wake_up_gc(struct cache_set *c) * @b: parent btree node * @op: pointer to struct btree_op */ -#define btree(fn, key, b, op, ...) \ +#define bcache_btree(fn, key, b, op, ...) \ ({ \ int _r, l = (b)->level - 1; \ bool _w = l <= (op)->lock; \ @@ -329,7 +329,7 @@ static inline void force_wake_up_gc(struct cache_set *c) * @c: cache set * @op: pointer to struct btree_op */ -#define btree_root(fn, c, op, ...) \ +#define bcache_btree_root(fn, c, op, ...) \ ({ \ int _r = -EINTR; \ do { \ From 8e7102273f597dbb38af43da874f8c123f8e6dbe Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 22 Mar 2020 14:03:01 +0800 Subject: [PATCH 38/81] bcache: make bch_btree_check() to be multithreaded When registering a cache device, bch_btree_check() is called to check all btree nodes, to make sure the btree is consistent and not corrupted. bch_btree_check() is recursively executed in a single thread, when there are a lot of data cached and the btree is huge, it may take very long time to check all the btree nodes. In my testing, I observed it took around 50 minutes to finish bch_btree_check(). When checking the bcache btree nodes, the cache set is not running yet, and indeed the whole tree is in read-only state, it is safe to create multiple threads to check the btree in parallel. This patch tries to create multiple threads, and each thread tries to one-by-one check the sub-tree indexed by a key from the btree root node. The parallel thread number depends on how many keys in the btree root node. At most BCH_BTR_CHKTHREAD_MAX (64) threads can be created, but in practice is should be min(cpu-number/2, root-node-keys-number). Signed-off-by: Coly Li Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 169 +++++++++++++++++++++++++++++++++++++- drivers/md/bcache/btree.h | 22 +++++ 2 files changed, 188 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index faf152524a16..74d66b641169 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1897,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) return ret; } + +static int bch_btree_check_thread(void *arg) +{ + int ret; + struct btree_check_info *info = arg; + struct btree_check_state *check_state = info->state; + struct cache_set *c = check_state->c; + struct btree_iter iter; + struct bkey *k, *p; + int cur_idx, prev_idx, skip_nr; + int i, n; + + k = p = NULL; + i = n = 0; + cur_idx = prev_idx = 0; + ret = 0; + + /* root node keys are checked before thread created */ + bch_btree_iter_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + BUG_ON(!k); + + p = k; + while (k) { + /* + * Fetch a root node key index, skip the keys which + * should be fetched by other threads, then check the + * sub-tree indexed by the fetched key. + */ + spin_lock(&check_state->idx_lock); + cur_idx = check_state->key_idx; + check_state->key_idx++; + spin_unlock(&check_state->idx_lock); + + skip_nr = cur_idx - prev_idx; + + while (skip_nr) { + k = bch_btree_iter_next_filter(&iter, + &c->root->keys, + bch_ptr_bad); + if (k) + p = k; + else { + /* + * No more keys to check in root node, + * current checking threads are enough, + * stop creating more. + */ + atomic_set(&check_state->enough, 1); + /* Update check_state->enough earlier */ + smp_mb(); + goto out; + } + skip_nr--; + cond_resched(); + } + + if (p) { + struct btree_op op; + + btree_node_prefetch(c->root, p); + c->gc_stats.nodes++; + bch_btree_op_init(&op, 0); + ret = bcache_btree(check_recurse, p, c->root, &op); + if (ret) + goto out; + } + p = NULL; + prev_idx = cur_idx; + cond_resched(); + } + +out: + info->result = ret; + /* update check_state->started among all CPUs */ + smp_mb(); + if (atomic_dec_and_test(&check_state->started)) + wake_up(&check_state->wait); + + return ret; +} + + + +static int bch_btree_chkthread_nr(void) +{ + int n = num_online_cpus()/2; + + if (n == 0) + n = 1; + else if (n > BCH_BTR_CHKTHREAD_MAX) + n = BCH_BTR_CHKTHREAD_MAX; + + return n; +} + int bch_btree_check(struct cache_set *c) { - struct btree_op op; + int ret = 0; + int i; + struct bkey *k = NULL; + struct btree_iter iter; + struct btree_check_state *check_state; + char name[32]; - bch_btree_op_init(&op, SHRT_MAX); + /* check and mark root node keys */ + for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) + bch_initial_mark_key(c, c->root->level, k); - return bcache_btree_root(check_recurse, c, &op); + bch_initial_mark_key(c, c->root->level + 1, &c->root->key); + + if (c->root->level == 0) + return 0; + + check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL); + if (!check_state) + return -ENOMEM; + + check_state->c = c; + check_state->total_threads = bch_btree_chkthread_nr(); + check_state->key_idx = 0; + spin_lock_init(&check_state->idx_lock); + atomic_set(&check_state->started, 0); + atomic_set(&check_state->enough, 0); + init_waitqueue_head(&check_state->wait); + + /* + * Run multiple threads to check btree nodes in parallel, + * if check_state->enough is non-zero, it means current + * running check threads are enough, unncessary to create + * more. + */ + for (i = 0; i < check_state->total_threads; i++) { + /* fetch latest check_state->enough earlier */ + smp_mb(); + if (atomic_read(&check_state->enough)) + break; + + check_state->infos[i].result = 0; + check_state->infos[i].state = check_state; + snprintf(name, sizeof(name), "bch_btrchk[%u]", i); + atomic_inc(&check_state->started); + + check_state->infos[i].thread = + kthread_run(bch_btree_check_thread, + &check_state->infos[i], + name); + if (IS_ERR(check_state->infos[i].thread)) { + pr_err("fails to run thread bch_btrchk[%d]", i); + for (--i; i >= 0; i--) + kthread_stop(check_state->infos[i].thread); + ret = -ENOMEM; + goto out; + } + } + + wait_event_interruptible(check_state->wait, + atomic_read(&check_state->started) == 0 || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)); + + for (i = 0; i < check_state->total_threads; i++) { + if (check_state->infos[i].result) { + ret = check_state->infos[i].result; + goto out; + } + } + +out: + kfree(check_state); + return ret; } void bch_initial_gc_finish(struct cache_set *c) diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 19e30266070a..7c884f278da8 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -145,6 +145,9 @@ struct btree { struct bio *bio; }; + + + #define BTREE_FLAG(flag) \ static inline bool btree_node_ ## flag(struct btree *b) \ { return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ @@ -216,6 +219,25 @@ struct btree_op { unsigned int insert_collision:1; }; +struct btree_check_state; +struct btree_check_info { + struct btree_check_state *state; + struct task_struct *thread; + int result; +}; + +#define BCH_BTR_CHKTHREAD_MAX 64 +struct btree_check_state { + struct cache_set *c; + int total_threads; + int key_idx; + spinlock_t idx_lock; + atomic_t started; + atomic_t enough; + wait_queue_head_t wait; + struct btree_check_info infos[BCH_BTR_CHKTHREAD_MAX]; +}; + static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level) { memset(op, 0, sizeof(struct btree_op)); From b144e45fc57649e15cbc79ff2d32a942af1d91d5 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 22 Mar 2020 14:03:02 +0800 Subject: [PATCH 39/81] bcache: make bch_sectors_dirty_init() to be multithreaded When attaching a cached device (a.k.a backing device) to a cache device, bch_sectors_dirty_init() is called to count dirty sectors and stripes (see what bcache_dev_sectors_dirty_add() does) on the cache device. The counting is done by a single thread recursive function bch_btree_map_keys() to iterate all the bcache btree nodes. If the btree has huge number of nodes, bch_sectors_dirty_init() will take quite long time. In my testing, if the registering cache set has a existed UUID which matches a already registered cached device, the automatical attachment during the registration may take more than 55 minutes. This is too long for waiting the bcache to work in real deployment. Fortunately when bch_sectors_dirty_init() is called, no other thread will access the btree yet, it is safe to do a read-only parallelized dirty sectors counting by multiple threads. This patch tries to create multiple threads, and each thread tries to one-by-one count dirty sectors from the sub-tree indexed by a root node key which the thread fetched. After the sub-tree is counted, the counting thread will continue to fetch another root node key, until the fetched key is NULL. How many threads in parallel depends on the number of keys from the btree root node, and the number of online CPU core. The thread number will be the less number but no more than BCH_DIRTY_INIT_THRD_MAX. If there are only 2 keys in root node, it can only be 2x times faster by this patch. But if there are 10 keys in the root node, with this patch it can be 10x times faster. Signed-off-by: Coly Li Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 158 +++++++++++++++++++++++++++++++++- drivers/md/bcache/writeback.h | 19 ++++ 2 files changed, 174 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 4a40f9eadeaf..6673a37c8bd2 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -785,7 +785,9 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, return MAP_CONTINUE; } -void bch_sectors_dirty_init(struct bcache_device *d) +static int bch_root_node_dirty_init(struct cache_set *c, + struct bcache_device *d, + struct bkey *k) { struct sectors_dirty_init op; int ret; @@ -796,8 +798,13 @@ void bch_sectors_dirty_init(struct bcache_device *d) op.start = KEY(op.inode, 0, 0); do { - ret = bch_btree_map_keys(&op.op, d->c, &op.start, - sectors_dirty_init_fn, 0); + ret = bcache_btree(map_keys_recurse, + k, + c->root, + &op.op, + &op.start, + sectors_dirty_init_fn, + 0); if (ret == -EAGAIN) schedule_timeout_interruptible( msecs_to_jiffies(INIT_KEYS_SLEEP_MS)); @@ -806,6 +813,151 @@ void bch_sectors_dirty_init(struct bcache_device *d) break; } } while (ret == -EAGAIN); + + return ret; +} + +static int bch_dirty_init_thread(void *arg) +{ + struct dirty_init_thrd_info *info = arg; + struct bch_dirty_init_state *state = info->state; + struct cache_set *c = state->c; + struct btree_iter iter; + struct bkey *k, *p; + int cur_idx, prev_idx, skip_nr; + int i; + + k = p = NULL; + i = 0; + cur_idx = prev_idx = 0; + + bch_btree_iter_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + BUG_ON(!k); + + p = k; + + while (k) { + spin_lock(&state->idx_lock); + cur_idx = state->key_idx; + state->key_idx++; + spin_unlock(&state->idx_lock); + + skip_nr = cur_idx - prev_idx; + + while (skip_nr) { + k = bch_btree_iter_next_filter(&iter, + &c->root->keys, + bch_ptr_bad); + if (k) + p = k; + else { + atomic_set(&state->enough, 1); + /* Update state->enough earlier */ + smp_mb(); + goto out; + } + skip_nr--; + cond_resched(); + } + + if (p) { + if (bch_root_node_dirty_init(c, state->d, p) < 0) + goto out; + } + + p = NULL; + prev_idx = cur_idx; + cond_resched(); + } + +out: + /* In order to wake up state->wait in time */ + smp_mb(); + if (atomic_dec_and_test(&state->started)) + wake_up(&state->wait); + + return 0; +} + +static int bch_btre_dirty_init_thread_nr(void) +{ + int n = num_online_cpus()/2; + + if (n == 0) + n = 1; + else if (n > BCH_DIRTY_INIT_THRD_MAX) + n = BCH_DIRTY_INIT_THRD_MAX; + + return n; +} + +void bch_sectors_dirty_init(struct bcache_device *d) +{ + int i; + struct bkey *k = NULL; + struct btree_iter iter; + struct sectors_dirty_init op; + struct cache_set *c = d->c; + struct bch_dirty_init_state *state; + char name[32]; + + /* Just count root keys if no leaf node */ + if (c->root->level == 0) { + bch_btree_op_init(&op.op, -1); + op.inode = d->id; + op.count = 0; + op.start = KEY(op.inode, 0, 0); + + for_each_key_filter(&c->root->keys, + k, &iter, bch_ptr_invalid) + sectors_dirty_init_fn(&op.op, c->root, k); + return; + } + + state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL); + if (!state) { + pr_warn("sectors dirty init failed: cannot allocate memory"); + return; + } + + state->c = c; + state->d = d; + state->total_threads = bch_btre_dirty_init_thread_nr(); + state->key_idx = 0; + spin_lock_init(&state->idx_lock); + atomic_set(&state->started, 0); + atomic_set(&state->enough, 0); + init_waitqueue_head(&state->wait); + + for (i = 0; i < state->total_threads; i++) { + /* Fetch latest state->enough earlier */ + smp_mb(); + if (atomic_read(&state->enough)) + break; + + state->infos[i].state = state; + atomic_inc(&state->started); + snprintf(name, sizeof(name), "bch_dirty_init[%d]", i); + + state->infos[i].thread = + kthread_run(bch_dirty_init_thread, + &state->infos[i], + name); + if (IS_ERR(state->infos[i].thread)) { + pr_err("fails to run thread bch_dirty_init[%d]", i); + for (--i; i >= 0; i--) + kthread_stop(state->infos[i].thread); + goto out; + } + } + + wait_event_interruptible(state->wait, + atomic_read(&state->started) == 0 || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)); + +out: + kfree(state); } void bch_cached_dev_writeback_init(struct cached_dev *dc) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 4e4c6810dc3c..b029843ce5b6 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -16,6 +16,7 @@ #define BCH_AUTO_GC_DIRTY_THRESHOLD 50 +#define BCH_DIRTY_INIT_THRD_MAX 64 /* * 14 (16384ths) is chosen here as something that each backing device * should be a reasonable fraction of the share, and not to blow up @@ -23,6 +24,24 @@ */ #define WRITEBACK_SHARE_SHIFT 14 +struct bch_dirty_init_state; +struct dirty_init_thrd_info { + struct bch_dirty_init_state *state; + struct task_struct *thread; +}; + +struct bch_dirty_init_state { + struct cache_set *c; + struct bcache_device *d; + int total_threads; + int key_idx; + spinlock_t idx_lock; + atomic_t started; + atomic_t enough; + wait_queue_head_t wait; + struct dirty_init_thrd_info infos[BCH_DIRTY_INIT_THRD_MAX]; +}; + static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) { uint64_t i, ret = 0; From 9876e38609a8ea98bbb447eb5a8f1c0400a6ccb8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 22 Mar 2020 14:03:03 +0800 Subject: [PATCH 40/81] bcache: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Signed-off-by: Takashi Iwai Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 3470fae4eabc..323276994aab 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -154,7 +154,7 @@ static ssize_t bch_snprint_string_list(char *buf, size_t i; for (i = 0; list[i]; i++) - out += snprintf(out, buf + size - out, + out += scnprintf(out, buf + size - out, i == selected ? "[%s] " : "%s ", list[i]); out[-1] = '\n'; From b004aa867c48b3232835b61ed9d44b572e29498e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 22 Mar 2020 14:03:04 +0800 Subject: [PATCH 41/81] bcache: optimize barrier usage for Rmw atomic bitops We can avoid the unnecessary barrier on non LL/SC architectures, such as x86. Instead, use the smp_mb__after_atomic(). Signed-off-by: Davidlohr Bueso Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 6673a37c8bd2..72ba6d015786 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -183,7 +183,7 @@ static void update_writeback_rate(struct work_struct *work) */ set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ - smp_mb(); + smp_mb__after_atomic(); /* * CACHE_SET_IO_DISABLE might be set via sysfs interface, @@ -193,7 +193,7 @@ static void update_writeback_rate(struct work_struct *work) test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ - smp_mb(); + smp_mb__after_atomic(); return; } @@ -229,7 +229,7 @@ static void update_writeback_rate(struct work_struct *work) */ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ - smp_mb(); + smp_mb__after_atomic(); } static unsigned int writeback_delay(struct cached_dev *dc, From eb9b6666d6ca6f3d9f218fa23ec6135eee1ac3a7 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 22 Mar 2020 14:03:05 +0800 Subject: [PATCH 42/81] bcache: optimize barrier usage for atomic operations The idea of this patch is from Davidlohr Bueso, he posts a patch for bcache to optimize barrier usage for read-modify-write atomic bitops. Indeed such optimization can also apply on other locations where smp_mb() is used before or after an atomic operation. This patch replaces smp_mb() with smp_mb__before_atomic() or smp_mb__after_atomic() in btree.c and writeback.c, where it is used to synchronize memory cache just earlier on other cores. Although the locations are not on hot code path, it is always not bad to mkae things a little better. Signed-off-by: Coly Li Cc: Davidlohr Bueso Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 6 +++--- drivers/md/bcache/writeback.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 74d66b641169..72856e5f23a3 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1947,7 +1947,7 @@ static int bch_btree_check_thread(void *arg) */ atomic_set(&check_state->enough, 1); /* Update check_state->enough earlier */ - smp_mb(); + smp_mb__after_atomic(); goto out; } skip_nr--; @@ -1972,7 +1972,7 @@ static int bch_btree_check_thread(void *arg) out: info->result = ret; /* update check_state->started among all CPUs */ - smp_mb(); + smp_mb__before_atomic(); if (atomic_dec_and_test(&check_state->started)) wake_up(&check_state->wait); @@ -2031,7 +2031,7 @@ int bch_btree_check(struct cache_set *c) */ for (i = 0; i < check_state->total_threads; i++) { /* fetch latest check_state->enough earlier */ - smp_mb(); + smp_mb__before_atomic(); if (atomic_read(&check_state->enough)) break; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 72ba6d015786..3f7641fb28d5 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -854,7 +854,7 @@ static int bch_dirty_init_thread(void *arg) else { atomic_set(&state->enough, 1); /* Update state->enough earlier */ - smp_mb(); + smp_mb__after_atomic(); goto out; } skip_nr--; @@ -873,7 +873,7 @@ static int bch_dirty_init_thread(void *arg) out: /* In order to wake up state->wait in time */ - smp_mb(); + smp_mb__before_atomic(); if (atomic_dec_and_test(&state->started)) wake_up(&state->wait); @@ -932,7 +932,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) for (i = 0; i < state->total_threads; i++) { /* Fetch latest state->enough earlier */ - smp_mb(); + smp_mb__before_atomic(); if (atomic_read(&state->enough)) break; From 5ae3a2c03d1f5b33f53ce2ba2e57773fc8b35128 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 25 Mar 2020 09:30:57 +0800 Subject: [PATCH 43/81] bcache: remove dupplicated declaration from btree.h Commit 253a99d95d5b ("bcache: move macro btree() and btree_root() into btree.h") makes two duplicated declaration into btree.h, typedef int (btree_map_keys_fn)(); int bch_btree_map_keys(); The kbuild test robot detects and reports this problem and this patch fixes it by removing the duplicated ones. Fixes: 253a99d95d5b ("bcache: move macro btree() and btree_root() into btree.h") Reported-by: kbuild test robot Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 7c884f278da8..257969980c49 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -282,13 +282,6 @@ void bch_initial_gc_finish(struct cache_set *c); void bch_moving_gc(struct cache_set *c); int bch_btree_check(struct cache_set *c); void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k); -typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b, - struct bkey *k); -int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, - struct bkey *from, btree_map_keys_fn *fn, - int flags); -int bch_btree_map_keys(struct btree_op *op, struct cache_set *c, - struct bkey *from, btree_map_keys_fn *fn, int flags); static inline void wake_up_gc(struct cache_set *c) { @@ -402,6 +395,9 @@ typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b, struct bkey *k); int bch_btree_map_keys(struct btree_op *op, struct cache_set *c, struct bkey *from, btree_map_keys_fn *fn, int flags); +int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, + struct bkey *from, btree_map_keys_fn *fn, + int flags); typedef bool (keybuf_pred_fn)(struct keybuf *buf, struct bkey *k); From 76171c6cdf832bc18b6d8207c9be94d78e54ed09 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 7 Feb 2020 17:13:53 -0800 Subject: [PATCH 44/81] nvme: expose hostnqn via sysfs for fabrics controllers We allow userspace to connect with a custom hostnqn which is useful for certain use-cases. However there is no way to tell what is the hostnqn used to connect to a given controller. Expose this so userspace can correlate controllers based on hostnqn. Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 414076aaf52b..4633acc0e68f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3242,6 +3242,16 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev, } static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL); +static ssize_t nvme_sysfs_show_hostnqn(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->opts->host->nqn); +} +static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL); + static ssize_t nvme_sysfs_show_address(struct device *dev, struct device_attribute *attr, char *buf) @@ -3267,6 +3277,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_numa_node.attr, &dev_attr_queue_count.attr, &dev_attr_sqsize.attr, + &dev_attr_hostnqn.attr, NULL }; @@ -3280,6 +3291,8 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, return 0; if (a == &dev_attr_address.attr && !ctrl->ops->get_address) return 0; + if (a == &dev_attr_hostnqn.attr && !ctrl->opts) + return 0; return a->mode; } From 45fb19f766d94a642cd820fe523ac29f502eece2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 7 Feb 2020 17:13:54 -0800 Subject: [PATCH 45/81] nvme: expose hostid via sysfs for fabrics controllers We allow userspace to connect with a custom hostid which is useful for certain use-cases. However there is is no way to tell what is the hostid used to connect to a given controller. Expose this so userspace can correlate controllers based on hostid. Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4633acc0e68f..720840ca875c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3252,6 +3252,16 @@ static ssize_t nvme_sysfs_show_hostnqn(struct device *dev, } static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL); +static ssize_t nvme_sysfs_show_hostid(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%pU\n", &ctrl->opts->host->id); +} +static DEVICE_ATTR(hostid, S_IRUGO, nvme_sysfs_show_hostid, NULL); + static ssize_t nvme_sysfs_show_address(struct device *dev, struct device_attribute *attr, char *buf) @@ -3278,6 +3288,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_queue_count.attr, &dev_attr_sqsize.attr, &dev_attr_hostnqn.attr, + &dev_attr_hostid.attr, NULL }; @@ -3293,6 +3304,8 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, return 0; if (a == &dev_attr_hostnqn.attr && !ctrl->opts) return 0; + if (a == &dev_attr_hostid.attr && !ctrl->opts) + return 0; return a->mode; } From 228914504cecd1c7d1279b5b884ab55d474cc87e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 11 Feb 2020 13:41:36 +0100 Subject: [PATCH 46/81] nvme: Don't deter users from enabling hwmon support I see no good reason for the "If unsure, say N" advice in the description of the NVME_HWMON configuration option. It is not dangerous, it does not select any other option, and has a fairly low overhead. As the option is already not enabled by default, further suggesting hesitant users to not enable it is not useful anyway. Unlike some other options where the description alone may not be sufficient for users to make a decision, NVME_HWMON is pretty simple to grasp in my opinion, so just let the user do what they want. Signed-off-by: Jean Delvare Reviewed-by: Chaitanya Kulkarni Reviewed-by: Guenter Roeck Cc: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index b9358db83e96..9c17ed32be64 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -32,8 +32,6 @@ config NVME_HWMON a hardware monitoring device will be created for each NVMe drive in the system. - If unsure, say N. - config NVME_FABRICS tristate From ad95a613ea447e2404e343ab3636c4d960fa9580 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 19 Feb 2020 08:14:31 -0800 Subject: [PATCH 47/81] nvme: code cleanup nvme_identify_ns_desc() The function nvme_identify_ns_desc() has 3 levels of nesting which make error message to exceeded > 80 char per line which is not aligned with the kernel code standards and rest of the NVMe subsystem code. Add a helper function to move the processing of the log when the command is successful by reducing the nesting and keeping the code < 80 char per line. Reviewed-by: Christoph Hellwig Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 76 +++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 720840ca875c..c4dbc852b5e9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1055,6 +1055,43 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) return error; } +static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, + struct nvme_ns_id_desc *cur) +{ + const char *warn_str = "ctrl returned bogus length:"; + void *data = cur; + + switch (cur->nidt) { + case NVME_NIDT_EUI64: + if (cur->nidl != NVME_NIDT_EUI64_LEN) { + dev_warn(ctrl->device, "%s %d for NVME_NIDT_EUI64\n", + warn_str, cur->nidl); + return -1; + } + memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN); + return NVME_NIDT_EUI64_LEN; + case NVME_NIDT_NGUID: + if (cur->nidl != NVME_NIDT_NGUID_LEN) { + dev_warn(ctrl->device, "%s %d for NVME_NIDT_NGUID\n", + warn_str, cur->nidl); + return -1; + } + memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN); + return NVME_NIDT_NGUID_LEN; + case NVME_NIDT_UUID: + if (cur->nidl != NVME_NIDT_UUID_LEN) { + dev_warn(ctrl->device, "%s %d for NVME_NIDT_UUID\n", + warn_str, cur->nidl); + return -1; + } + uuid_copy(&ids->uuid, data + sizeof(*cur)); + return NVME_NIDT_UUID_LEN; + default: + /* Skip unknown types */ + return cur->nidl; + } +} + static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns_ids *ids) { @@ -1083,42 +1120,9 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, if (cur->nidl == 0) break; - switch (cur->nidt) { - case NVME_NIDT_EUI64: - if (cur->nidl != NVME_NIDT_EUI64_LEN) { - dev_warn(ctrl->device, - "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n", - cur->nidl); - goto free_data; - } - len = NVME_NIDT_EUI64_LEN; - memcpy(ids->eui64, data + pos + sizeof(*cur), len); - break; - case NVME_NIDT_NGUID: - if (cur->nidl != NVME_NIDT_NGUID_LEN) { - dev_warn(ctrl->device, - "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n", - cur->nidl); - goto free_data; - } - len = NVME_NIDT_NGUID_LEN; - memcpy(ids->nguid, data + pos + sizeof(*cur), len); - break; - case NVME_NIDT_UUID: - if (cur->nidl != NVME_NIDT_UUID_LEN) { - dev_warn(ctrl->device, - "ctrl returned bogus length: %d for NVME_NIDT_UUID\n", - cur->nidl); - goto free_data; - } - len = NVME_NIDT_UUID_LEN; - uuid_copy(&ids->uuid, data + pos + sizeof(*cur)); - break; - default: - /* Skip unknown types */ - len = cur->nidl; - break; - } + len = nvme_process_ns_desc(ctrl, ids, cur); + if (len < 0) + goto free_data; len += sizeof(*cur); } From 94d2e705b6a6fe9c56a990c0cd31a7298cfcee9a Mon Sep 17 00:00:00 2001 From: Rupesh Girase Date: Thu, 27 Feb 2020 22:15:26 +0530 Subject: [PATCH 48/81] nvme: log additional message for controller status Log the controller status to know more about issue if it lies within kernel nvme subsytem or controller is unhealthy. Signed-off-by: Rupesh Girase Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c4dbc852b5e9..c9988942d0aa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2083,8 +2083,8 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) return -EINTR; if (time_after(jiffies, timeout)) { dev_err(ctrl->device, - "Device not ready; aborting %s\n", enabled ? - "initialisation" : "reset"); + "Device not ready; aborting %s, CSTS=0x%x\n", + enabled ? "initialisation" : "reset", csts); return -ENODEV; } } From 3e98c2443f5c7f127b5b7492a3089e92a1c85112 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 28 Feb 2020 18:52:28 -0800 Subject: [PATCH 49/81] nvme: Check for readiness more quickly, to speed up boot time After initialization, nvme_wait_ready checks for readiness every 100ms, even though the drive may be ready far sooner than that. This delays system boot by hundreds of milliseconds. Reduce the delay, checking for readiness every millisecond instead. Boot-time tests on an AWS c5.12xlarge: Before: [ 0.546936] initcall nvme_init+0x0/0x5b returned 0 after 37 usecs ... [ 0.764178] nvme nvme0: 2/0/0 default/read/poll queues [ 0.768424] nvme0n1: p1 [ 0.774132] EXT4-fs (nvme0n1p1): mounted filesystem with ordered data mode. Opts: (null) [ 0.774146] VFS: Mounted root (ext4 filesystem) on device 259:1. ... [ 0.788141] Run /sbin/init as init process After: [ 0.537088] initcall nvme_init+0x0/0x5b returned 0 after 37 usecs ... [ 0.543457] nvme nvme0: 2/0/0 default/read/poll queues [ 0.548473] nvme0n1: p1 [ 0.554339] EXT4-fs (nvme0n1p1): mounted filesystem with ordered data mode. Opts: (null) [ 0.554344] VFS: Mounted root (ext4 filesystem) on device 259:1. ... [ 0.567931] Run /sbin/init as init process Signed-off-by: Josh Triplett Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c9988942d0aa..0e38e07a302f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2078,7 +2078,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) if ((csts & NVME_CSTS_RDY) == bit) break; - msleep(100); + usleep_range(1000, 2000); if (fatal_signal_pending(current)) return -EINTR; if (time_after(jiffies, timeout)) { From 6d525f9755c2ce444de2f3d604d41fbe4df91a8c Mon Sep 17 00:00:00 2001 From: Amit Engel Date: Sat, 29 Feb 2020 16:28:41 -0800 Subject: [PATCH 50/81] nvmet: check ncqr & nsqr for set-features cmd For set feature command when setting up NVME_FEAT_NUM_QUEUES, check Number of I/O Completion Queues Requested (NCQR) and Number of I/O Submission Queues Requested (NSQR) before we proceed, for invalid values (i.e. 65535) return an appropriate NVMe invalid field status. Signed-off-by: Amit Engel Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/admin-cmd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 19f949570625..c0aa9c34c699 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -733,13 +733,22 @@ static void nvmet_execute_set_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); + u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11); u16 status = 0; + u16 nsqr; + u16 ncqr; if (!nvmet_check_data_len(req, 0)) return; switch (cdw10 & 0xff) { case NVME_FEAT_NUM_QUEUES: + ncqr = (cdw11 >> 16) & 0xffff; + nsqr = cdw11 & 0xffff; + if (ncqr == 0xffff || nsqr == 0xffff) { + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + break; + } nvmet_set_result(req, (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); break; From e2a366a4b0feaeba8f0bf6091ddd2ac27507a9d3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 28 Feb 2020 21:45:19 +0300 Subject: [PATCH 51/81] nvme-pci: slimmer CQ head update Update CQ head with pre-increment operator. This saves subtraction of 1 and a few registers. Also update phase with "^= 1". This generates only one RMW instruction. ffffffff815ba150 : ffffffff815ba150: 0f b7 47 70 movzx eax,WORD PTR [rdi+0x70] ffffffff815ba154: 83 c0 01 add eax,0x1 ffffffff815ba157: 66 89 47 70 mov WORD PTR [rdi+0x70],ax ffffffff815ba15b: 66 3b 47 68 cmp ax,WORD PTR [rdi+0x68] ffffffff815ba15f: 74 01 je ffffffff815ba162 ffffffff815ba161: c3 ret ffffffff815ba162: 31 c0 xor eax,eax ffffffff815ba164: 80 77 74 01 ===> xor BYTE PTR [rdi+0x74],0x1 ffffffff815ba168: 66 89 47 70 mov WORD PTR [rdi+0x70],ax ffffffff815ba16c: c3 ret add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-119 (-119) Function old new delta nvme_poll 690 678 -12 nvme_dev_disable 1230 1177 -53 nvme_irq 613 559 -54 Signed-off-by: Alexey Dobriyan --- drivers/nvme/host/pci.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d3f23d6254e4..cdc9b6149d38 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -982,11 +982,9 @@ static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) { - if (nvmeq->cq_head == nvmeq->q_depth - 1) { + if (++nvmeq->cq_head == nvmeq->q_depth) { nvmeq->cq_head = 0; - nvmeq->cq_phase = !nvmeq->cq_phase; - } else { - nvmeq->cq_head++; + nvmeq->cq_phase ^= 1; } } From bf392a5dc02a9b796f3da89fc5bb42856aca64cb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 2 Mar 2020 08:45:04 -0800 Subject: [PATCH 52/81] nvme-pci: Remove tag from process cq The only user for tagged completion was for timeout handling. That user, though, really only cares if the timed out command is completed, which we can safely check within the timeout handler. Remove the tag check to simplify completion handling. Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cdc9b6149d38..98d8ddd7aa0f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -989,14 +989,13 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) } static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, - u16 *end, unsigned int tag) + u16 *end) { int found = 0; *start = nvmeq->cq_head; while (nvme_cqe_pending(nvmeq)) { - if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag) - found++; + found++; nvme_update_cq_head(nvmeq); } *end = nvmeq->cq_head; @@ -1017,7 +1016,7 @@ static irqreturn_t nvme_irq(int irq, void *data) * the irq handler, even if that was on another CPU. */ rmb(); - nvme_process_cq(nvmeq, &start, &end, -1); + nvme_process_cq(nvmeq, &start, &end); wmb(); if (start != end) { @@ -1040,7 +1039,7 @@ static irqreturn_t nvme_irq_check(int irq, void *data) * Poll for completions any queue, including those not dedicated to polling. * Can be called from any context. */ -static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag) +static int nvme_poll_irqdisable(struct nvme_queue *nvmeq) { struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); u16 start, end; @@ -1053,11 +1052,11 @@ static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag) */ if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) { spin_lock(&nvmeq->cq_poll_lock); - found = nvme_process_cq(nvmeq, &start, &end, tag); + found = nvme_process_cq(nvmeq, &start, &end); spin_unlock(&nvmeq->cq_poll_lock); } else { disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); - found = nvme_process_cq(nvmeq, &start, &end, tag); + found = nvme_process_cq(nvmeq, &start, &end); enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } @@ -1075,8 +1074,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx) return 0; spin_lock(&nvmeq->cq_poll_lock); - found = nvme_process_cq(nvmeq, &start, &end, -1); - nvme_complete_cqes(nvmeq, start, end); + found = nvme_process_cq(nvmeq, &start, &end); spin_unlock(&nvmeq->cq_poll_lock); return found; @@ -1253,7 +1251,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) /* * Did we miss an interrupt? */ - if (nvme_poll_irqdisable(nvmeq, req->tag)) { + nvme_poll_irqdisable(nvmeq); + if (blk_mq_request_completed(req)) { dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", req->tag, nvmeq->qid); @@ -1396,7 +1395,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) else nvme_disable_ctrl(&dev->ctrl); - nvme_poll_irqdisable(nvmeq, -1); + nvme_poll_irqdisable(nvmeq); } /* @@ -1411,7 +1410,7 @@ static void nvme_reap_pending_cqes(struct nvme_dev *dev) int i; for (i = dev->ctrl.queue_count - 1; i > 0; i--) { - nvme_process_cq(&dev->queues[i], &start, &end, -1); + nvme_process_cq(&dev->queues[i], &start, &end); nvme_complete_cqes(&dev->queues[i], start, end); } } From 324b494c286298d51bc5ed5107644ebe23f9dad6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 2 Mar 2020 08:56:53 -0800 Subject: [PATCH 53/81] nvme-pci: Remove two-pass completions Completion handling had been done in two steps: find all new completions under a lock, then handle those completions outside the lock. This was done to make the locked section as short as possible so that other threads using the same lock wait less time. The driver no longer shares locks during completion, and is in fact lockless for interrupt driven queues, so the optimization no longer serves its original purpose. Replace the two-pass completion queue handler with a single pass that completes entries immediately. Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 42 ++++++++++------------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 98d8ddd7aa0f..02f22c63adcf 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -971,15 +971,6 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) nvme_end_request(req, cqe->status, cqe->result); } -static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) -{ - while (start != end) { - nvme_handle_cqe(nvmeq, start); - if (++start == nvmeq->q_depth) - start = 0; - } -} - static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) { if (++nvmeq->cq_head == nvmeq->q_depth) { @@ -988,19 +979,17 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) } } -static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, - u16 *end) +static inline int nvme_process_cq(struct nvme_queue *nvmeq) { int found = 0; - *start = nvmeq->cq_head; while (nvme_cqe_pending(nvmeq)) { found++; + nvme_handle_cqe(nvmeq, nvmeq->cq_head); nvme_update_cq_head(nvmeq); } - *end = nvmeq->cq_head; - if (*start != *end) + if (found) nvme_ring_cq_doorbell(nvmeq); return found; } @@ -1009,21 +998,16 @@ static irqreturn_t nvme_irq(int irq, void *data) { struct nvme_queue *nvmeq = data; irqreturn_t ret = IRQ_NONE; - u16 start, end; /* * The rmb/wmb pair ensures we see all updates from a previous run of * the irq handler, even if that was on another CPU. */ rmb(); - nvme_process_cq(nvmeq, &start, &end); + if (nvme_process_cq(nvmeq)) + ret = IRQ_HANDLED; wmb(); - if (start != end) { - nvme_complete_cqes(nvmeq, start, end); - return IRQ_HANDLED; - } - return ret; } @@ -1042,7 +1026,6 @@ static irqreturn_t nvme_irq_check(int irq, void *data) static int nvme_poll_irqdisable(struct nvme_queue *nvmeq) { struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); - u16 start, end; int found; /* @@ -1052,29 +1035,27 @@ static int nvme_poll_irqdisable(struct nvme_queue *nvmeq) */ if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) { spin_lock(&nvmeq->cq_poll_lock); - found = nvme_process_cq(nvmeq, &start, &end); + found = nvme_process_cq(nvmeq); spin_unlock(&nvmeq->cq_poll_lock); } else { disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); - found = nvme_process_cq(nvmeq, &start, &end); + found = nvme_process_cq(nvmeq); enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } - nvme_complete_cqes(nvmeq, start, end); return found; } static int nvme_poll(struct blk_mq_hw_ctx *hctx) { struct nvme_queue *nvmeq = hctx->driver_data; - u16 start, end; bool found; if (!nvme_cqe_pending(nvmeq)) return 0; spin_lock(&nvmeq->cq_poll_lock); - found = nvme_process_cq(nvmeq, &start, &end); + found = nvme_process_cq(nvmeq); spin_unlock(&nvmeq->cq_poll_lock); return found; @@ -1406,13 +1387,10 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) */ static void nvme_reap_pending_cqes(struct nvme_dev *dev) { - u16 start, end; int i; - for (i = dev->ctrl.queue_count - 1; i > 0; i--) { - nvme_process_cq(&dev->queues[i], &start, &end); - nvme_complete_cqes(&dev->queues[i], start, end); - } + for (i = dev->ctrl.queue_count - 1; i > 0; i--) + nvme_process_cq(&dev->queues[i]); } static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, From fa059b856a593a7bddd4d3779ae8ab1380e05d91 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 4 Mar 2020 09:17:01 -0800 Subject: [PATCH 54/81] nvme-pci: Simplify nvme_poll_irqdisable The timeout handler can use the existing nvme_poll() if it needs to check a polled queue, allowing nvme_poll_irqdisable() to handle only irq driven queues for the remaining callers. Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 02f22c63adcf..f45e26e6af7e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1020,30 +1020,18 @@ static irqreturn_t nvme_irq_check(int irq, void *data) } /* - * Poll for completions any queue, including those not dedicated to polling. + * Poll for completions for any interrupt driven queue * Can be called from any context. */ -static int nvme_poll_irqdisable(struct nvme_queue *nvmeq) +static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) { struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); - int found; - /* - * For a poll queue we need to protect against the polling thread - * using the CQ lock. For normal interrupt driven threads we have - * to disable the interrupt to avoid racing with it. - */ - if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) { - spin_lock(&nvmeq->cq_poll_lock); - found = nvme_process_cq(nvmeq); - spin_unlock(&nvmeq->cq_poll_lock); - } else { - disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); - found = nvme_process_cq(nvmeq); - enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); - } + WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); - return found; + disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + nvme_process_cq(nvmeq); + enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } static int nvme_poll(struct blk_mq_hw_ctx *hctx) @@ -1232,7 +1220,11 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) /* * Did we miss an interrupt? */ - nvme_poll_irqdisable(nvmeq); + if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) + nvme_poll(req->mq_hctx); + else + nvme_poll_irqdisable(nvmeq); + if (blk_mq_request_completed(req)) { dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", From 40510a639ec08db81d5ff9c79856baf9dda94748 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 25 Feb 2020 15:53:09 -0800 Subject: [PATCH 55/81] nvme-tcp: optimize queue io_cpu assignment for multiple queue maps Currently, queue io_cpu assignment is done sequentially for default, read and poll queues based on queue id. This causes miss-alignment between context of CPU initiating I/O and the I/O worker thread processing queued requests or completions. Change to modify queue io_cpu assignment to take into account queue maps offset. Each queue io_cpu will start at zero for each queue map. This essentially aligns read/poll queues to start over the same range as default queues. Testing performed by Mark with: - ram device (nvmet) - single CPU core (pinned) - 100% 4k reads - engine io_uring (not using sq_thread option) - hipri flag set Micro-benchmark results show a net gain of: - increase of 18%-29% in IOPs - reduction of 16%-22% in average latency - reduction of 7%-23% in 99.99% latency Baseline: ======== QDepth/Batch | IOPs [k] | Avg. Lat [us] | 99.99% Lat [us] ----------------------------------------------------------------- 1/1 | 32.4 | 30.11 | 50.94 32/8 | 179 | 168.20 | 371 CPU alignment: ============= QDepth/Batch | IOPs [k] | Avg. Lat [us] | 99.99% Lat [us] ----------------------------------------------------------------- 1/1 | 38.5 | 25.18 | 39.16 32/8 | 231 | 130.75 | 343 Reported-by: Mark Wunderlich Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 62 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e384239af880..11a7c26f8573 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1258,13 +1258,67 @@ free_icreq: return ret; } +static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue) +{ + return nvme_tcp_queue_id(queue) == 0; +} + +static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT]; +} + +static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + !nvme_tcp_default_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + + ctrl->io_queues[HCTX_TYPE_READ]; +} + +static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + !nvme_tcp_default_queue(queue) && + !nvme_tcp_read_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + + ctrl->io_queues[HCTX_TYPE_READ] + + ctrl->io_queues[HCTX_TYPE_POLL]; +} + +static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + int n = 0; + + if (nvme_tcp_default_queue(queue)) + n = qid - 1; + else if (nvme_tcp_read_queue(queue)) + n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1; + else if (nvme_tcp_poll_queue(queue)) + n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - + ctrl->io_queues[HCTX_TYPE_READ] - 1; + queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); +} + static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, size_t queue_size) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; struct linger sol = { .l_onoff = 1, .l_linger = 0 }; - int ret, opt, rcv_pdu_size, n; + int ret, opt, rcv_pdu_size; queue->ctrl = ctrl; INIT_LIST_HEAD(&queue->send_list); @@ -1343,11 +1397,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, } queue->sock->sk->sk_allocation = GFP_ATOMIC; - if (!qid) - n = 0; - else - n = (qid - 1) % num_online_cpus(); - queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); + nvme_tcp_set_queue_io_cpu(queue); queue->request = NULL; queue->data_remaining = 0; queue->ddgst_remaining = 0; From 9cda34e37489244a8c8628617e24b2dbc8a8edad Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 25 Feb 2020 16:42:27 -0800 Subject: [PATCH 56/81] nvmet-tcp: fix maxh2cdata icresp parameter MAXH2CDATA is not zero based. Also no reason to limit ourselves to 1M transfers as we can do more easily. Make this an arbitrary limit of 16M. Reported-by: Wenhua Liu Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index cbff1038bdb3..1942c941c31c 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -798,7 +798,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) icresp->hdr.pdo = 0; icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); - icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */ + icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */ icresp->cpda = 0; if (queue->hdr_digest) icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; From 5ff4e11264780ce49a9acb66e0b4c5a30a569be4 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 25 Feb 2020 16:43:23 -0800 Subject: [PATCH 57/81] nvme-tcp: move send failure to nvme_tcp_try_send Consolidate the request failure handling code to where it is being fetched (nvme_tcp_try_send). Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 11a7c26f8573..221a5a59aa06 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1027,8 +1027,15 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) if (req->state == NVME_TCP_SEND_DDGST) ret = nvme_tcp_try_send_ddgst(req); done: - if (ret == -EAGAIN) + if (ret == -EAGAIN) { ret = 0; + } else if (ret < 0) { + dev_err(queue->ctrl->ctrl.device, + "failed to send request %d\n", ret); + if (ret != -EPIPE && ret != -ECONNRESET) + nvme_tcp_fail_request(queue->request); + nvme_tcp_done_send_req(queue); + } return ret; } @@ -1059,21 +1066,10 @@ static void nvme_tcp_io_work(struct work_struct *w) int result; result = nvme_tcp_try_send(queue); - if (result > 0) { + if (result > 0) pending = true; - } else if (unlikely(result < 0)) { - dev_err(queue->ctrl->ctrl.device, - "failed to send request %d\n", result); - - /* - * Fail the request unless peer closed the connection, - * in which case error recovery flow will complete all. - */ - if ((result != -EPIPE) && (result != -ECONNRESET)) - nvme_tcp_fail_request(queue->request); - nvme_tcp_done_send_req(queue); - return; - } + else if (unlikely(result < 0)) + break; result = nvme_tcp_try_recv(queue); if (result > 0) From 761ad26c45b0260a8516bc1fc9d25bb66ca4e25c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 25 Feb 2020 16:43:24 -0800 Subject: [PATCH 58/81] nvme-tcp: break from io_work loop if recv failed If we failed to receive data from the socket, don't try to further process it, we will for sure be handling a queue error at this point. While no issue was seen with the current behavior thus far, its safer to cease socket processing if we detected an error. Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 221a5a59aa06..4b20301e517c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1074,6 +1074,8 @@ static void nvme_tcp_io_work(struct work_struct *w) result = nvme_tcp_try_recv(queue); if (result > 0) pending = true; + else if (unlikely(result < 0)) + break; if (!pending) return; From 2db24e4a22bc97c713261a81fc75e2a36db65715 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 9 Mar 2020 17:04:12 +0200 Subject: [PATCH 59/81] nvme-pci: properly print controller address Align PCI address print with fabrics address that is printed with newline character. Before: [root@server40 linux]# cat /sys/class/nvme/nvme2/address 0000:0b:00.0[root@server40 linux]# After: [root@server40 linux]# cat /sys/class/nvme/nvme2/address 0000:0b:00.0 [root@server40 linux]# Reviewed-by: Christoph Hellwig Signed-off-by: Max Gurtovoy --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f45e26e6af7e..e6fa0c7bb96c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2656,7 +2656,7 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size) { struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); - return snprintf(buf, size, "%s", dev_name(&pdev->dev)); + return snprintf(buf, size, "%s\n", dev_name(&pdev->dev)); } static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { From 02cb00e233ade7c050e0f476902e63847e78114e Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 8 Mar 2020 12:55:03 +0200 Subject: [PATCH 60/81] nvmet: Add get_mdts op for controllers Some transports, such as RDMA, would like to set the Maximum Data Transfer Size (MDTS) according to device/port/ctrl characteristics. This will enable the transport to set the optimal MDTS according to controller needs and device capabilities. Add a new nvmet transport op that is called during ctrl identification. This will not effect transports that don't implement this option. The return value of the new op is according to the NVMe spec definition for MDTS. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin --- drivers/nvme/target/admin-cmd.c | 8 ++++++-- drivers/nvme/target/nvmet.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index c0aa9c34c699..b9ec489dc748 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -369,8 +369,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) /* we support multiple ports, multiples hosts and ANA: */ id->cmic = (1 << 0) | (1 << 1) | (1 << 3); - /* no limit on data transfer sizes for now */ - id->mdts = 0; + /* Limit MDTS according to transport capability */ + if (ctrl->ops->get_mdts) + id->mdts = ctrl->ops->get_mdts(ctrl); + else + id->mdts = 0; + id->cntlid = cpu_to_le16(ctrl->cntlid); id->ver = cpu_to_le32(ctrl->subsys->ver); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 42ba2ddd9e96..421dff3ea143 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -289,6 +289,7 @@ struct nvmet_fabrics_ops { struct nvmet_port *port, char *traddr); u16 (*install_queue)(struct nvmet_sq *nvme_sq); void (*discovery_chg)(struct nvmet_port *port); + u8 (*get_mdts)(const struct nvmet_ctrl *ctrl); }; #define NVMET_MAX_INLINE_BIOVEC 8 From ec6d20e16c2d2bef8df2d82d63dcee51caa4ac27 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 8 Mar 2020 12:55:04 +0200 Subject: [PATCH 61/81] nvmet-rdma: Implement get_mdts controller op Set the maximal data transfer size to be 1MB (currently mdts is unlimited). This will allow calculating the amount of MR's that one ctrl should allocate to fulfill it's capabilities. Reviewed-by: Christoph Hellwig Signed-off-by: Max Gurtovoy --- drivers/nvme/target/rdma.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 37d262a65877..f47a79b9fc6c 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -31,6 +31,9 @@ #define NVMET_RDMA_MAX_INLINE_SGE 4 #define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE) +/* Assume mpsmin == device_page_size == 4KB */ +#define NVMET_RDMA_MAX_MDTS 8 + struct nvmet_rdma_cmd { struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; struct ib_cqe cqe; @@ -1602,6 +1605,11 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, } } +static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) +{ + return NVMET_RDMA_MAX_MDTS; +} + static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_RDMA, @@ -1612,6 +1620,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .queue_response = nvmet_rdma_queue_response, .delete_ctrl = nvmet_rdma_delete_ctrl, .disc_traddr = nvmet_rdma_disc_port_addr, + .get_mdts = nvmet_rdma_get_mdts, }; static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) From c363f249e7e6576587d8982d9087406fe98beb99 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 8 Mar 2020 12:55:05 +0200 Subject: [PATCH 62/81] nvmet-rdma: allocate RW ctxs according to mdts Current nvmet-rdma code allocates MR pool budget based on queue size, assuming both host and target use the same "max_pages_per_mr" count. After limiting the mdts value for RDMA controllers, we know the factor of maximum MR's per IO operation. Thus, make sure MR pool will be sufficient for the required IO depth and IO size. That is, say host's SQ size is 100, then the MR pool budget allocated currently at target will also be 100 MRs. But 100 IO WRITE Requests with 256 sg_count(IO size above 1MB) require 200 MRs when target's "max_pages_per_mr" is 128. Reported-by: Krishnamraju Eraparaju Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Max Gurtovoy --- drivers/nvme/target/rdma.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index f47a79b9fc6c..9e1b8c61f54e 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -978,7 +978,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) { struct ib_qp_init_attr qp_attr; struct nvmet_rdma_device *ndev = queue->dev; - int comp_vector, nr_cqe, ret, i; + int comp_vector, nr_cqe, ret, i, factor; /* * Spread the io queues across completion vectors, @@ -1011,7 +1011,9 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.qp_type = IB_QPT_RC; /* +1 for drain */ qp_attr.cap.max_send_wr = queue->send_queue_size + 1; - qp_attr.cap.max_rdma_ctxs = queue->send_queue_size; + factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num, + 1 << NVMET_RDMA_MAX_MDTS); + qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor; qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, ndev->device->attrs.max_send_sge); From 764e9332098c0e60251386a507fe46ac91276120 Mon Sep 17 00:00:00 2001 From: John Meneghini Date: Thu, 20 Feb 2020 10:05:38 +0900 Subject: [PATCH 63/81] nvme-multipath: do not reset on unknown status The nvme multipath error handling defaults to controller reset if the error is unknown. There are, however, no existing nvme status codes that indicate a reset should be used, and resetting causes unnecessary disruption to the rest of IO. Change nvme's error handling to first check if failover should happen. If not, let the normal error handling take over rather than reset the controller. Based-on-a-patch-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: John Meneghini Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 5 +---- drivers/nvme/host/multipath.c | 21 +++++++++------------ drivers/nvme/host/nvme.h | 5 +++-- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0e38e07a302f..fde4b3a526ad 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -291,11 +291,8 @@ void nvme_complete_rq(struct request *req) nvme_req(req)->ctrl->comp_seen = true; if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { - if ((req->cmd_flags & REQ_NVME_MPATH) && - blk_path_error(status)) { - nvme_failover_req(req); + if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req)) return; - } if (!blk_queue_dying(req->q)) { nvme_retry_req(req); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a11900cf3a36..90dd1d641b7b 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -64,17 +64,12 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, } } -void nvme_failover_req(struct request *req) +bool nvme_failover_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; u16 status = nvme_req(req)->status; unsigned long flags; - spin_lock_irqsave(&ns->head->requeue_lock, flags); - blk_steal_bios(&ns->head->requeue_list, req); - spin_unlock_irqrestore(&ns->head->requeue_lock, flags); - blk_mq_end_request(req, 0); - switch (status & 0x7ff) { case NVME_SC_ANA_TRANSITION: case NVME_SC_ANA_INACCESSIBLE: @@ -103,15 +98,17 @@ void nvme_failover_req(struct request *req) nvme_mpath_clear_current_path(ns); break; default: - /* - * Reset the controller for any non-ANA error as we don't know - * what caused the error. - */ - nvme_reset_ctrl(ns->ctrl); - break; + /* This was a non-ANA error so follow the normal error path. */ + return false; } + spin_lock_irqsave(&ns->head->requeue_lock, flags); + blk_steal_bios(&ns->head->requeue_list, req); + spin_unlock_irqrestore(&ns->head->requeue_lock, flags); + blk_mq_end_request(req, 0); + kblockd_schedule_work(&ns->head->requeue_work); + return true; } void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1024fec7914c..d800b9a51c2c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -550,7 +550,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, struct nvme_ctrl *ctrl, int *flags); -void nvme_failover_req(struct request *req); +bool nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); @@ -599,8 +599,9 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); } -static inline void nvme_failover_req(struct request *req) +static inline bool nvme_failover_req(struct request *req) { + return false; } static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) { From 8d8a50e20dc2dc41cb788085968b9024dc36f7a5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Mar 2020 09:50:37 +0100 Subject: [PATCH 64/81] nvme-fabrics: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Reviewed-by: Christoph Hellwig Signed-off-by: Takashi Iwai Signed-off-by: Keith Busch --- drivers/nvme/host/fabrics.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 74b8818ac9a1..2a6c8190eeb7 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -105,14 +105,14 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size) int len = 0; if (ctrl->opts->mask & NVMF_OPT_TRADDR) - len += snprintf(buf, size, "traddr=%s", ctrl->opts->traddr); + len += scnprintf(buf, size, "traddr=%s", ctrl->opts->traddr); if (ctrl->opts->mask & NVMF_OPT_TRSVCID) - len += snprintf(buf + len, size - len, "%strsvcid=%s", + len += scnprintf(buf + len, size - len, "%strsvcid=%s", (len) ? "," : "", ctrl->opts->trsvcid); if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR) - len += snprintf(buf + len, size - len, "%shost_traddr=%s", + len += scnprintf(buf + len, size - len, "%shost_traddr=%s", (len) ? "," : "", ctrl->opts->host_traddr); - len += snprintf(buf + len, size - len, "\n"); + len += scnprintf(buf + len, size - len, "\n"); return len; } From e90d172b11b845b0f2caa9422c2f9d3ef59af575 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 12 Mar 2020 16:06:39 -0700 Subject: [PATCH 65/81] nvmet-tcp: optimize tcp stack TX when data digest is used If we have a 4-byte data digest to send to the wire, but we have more data to send, set MSG_MORE to tell the stack that more is coming. Reviewed-by: Mark Wunderlich Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 1942c941c31c..dcee4995e22d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -626,7 +626,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) return 1; } -static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) +static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct nvmet_tcp_queue *queue = cmd->queue; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; @@ -636,6 +636,9 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) }; int ret; + if (!last_in_batch && cmd->queue->send_list_len) + msg.msg_flags |= MSG_MORE; + ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (unlikely(ret <= 0)) return ret; @@ -676,7 +679,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue, } if (cmd->state == NVMET_TCP_SEND_DDGST) { - ret = nvmet_try_send_ddgst(cmd); + ret = nvmet_try_send_ddgst(cmd, last_in_batch); if (ret <= 0) goto done_send; } From c225b610311bc5695d952cd3590136f26199a227 Mon Sep 17 00:00:00 2001 From: "masahiro31.yamada@kioxia.com" Date: Thu, 5 Mar 2020 11:13:29 +0000 Subject: [PATCH 66/81] nvme: Add compat_ioctl handler for NVME_IOCTL_SUBMIT_IO Currently 32 bit application gets ENOTTY when it calls compat_ioctl with NVME_IOCTL_SUBMIT_IO in 64 bit kernel. The cause is that the results of sizeof(struct nvme_user_io), which is used to define NVME_IOCTL_SUBMIT_IO, are not same between 32 bit compiler and 64 bit compiler. * 32 bit: the result of sizeof nvme_user_io is 44. * 64 bit: the result of sizeof nvme_user_io is 48. 64 bit compiler seems to add 32 bit padding for multiple of 8 bytes. This patch adds a compat_ioctl handler. The handler replaces NVME_IOCTL_SUBMIT_IO32 with NVME_IOCTL_SUBMIT_IO in case 32 bit application calls compat_ioctl for submit in 64 bit kernel. Then, it calls nvme_ioctl as usual. Reviewed-by: Christoph Hellwig Signed-off-by: Masahiro Yamada (KIOXIA) Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 45 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fde4b3a526ad..3c1c826ea491 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1585,6 +1585,47 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, return ret; } +#ifdef CONFIG_COMPAT +struct nvme_user_io32 { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nblocks; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 slba; + __u32 dsmgmt; + __u32 reftag; + __u16 apptag; + __u16 appmask; +} __attribute__((__packed__)); + +#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) + +static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + /* + * Corresponds to the difference of NVME_IOCTL_SUBMIT_IO + * between 32 bit programs and 64 bit kernel. + * The cause is that the results of sizeof(struct nvme_user_io), + * which is used to define NVME_IOCTL_SUBMIT_IO, + * are not same between 32 bit compiler and 64 bit compiler. + * NVME_IOCTL_SUBMIT_IO32 is for 64 bit kernel handling + * NVME_IOCTL_SUBMIT_IO issued from 32 bit programs. + * Other IOCTL numbers are same between 32 bit and 64 bit. + * So there is nothing to do regarding to other IOCTL numbers. + */ + if (cmd == NVME_IOCTL_SUBMIT_IO32) + return nvme_ioctl(bdev, mode, NVME_IOCTL_SUBMIT_IO, arg); + + return nvme_ioctl(bdev, mode, cmd, arg); +} +#else +#define nvme_compat_ioctl NULL +#endif /* CONFIG_COMPAT */ + static int nvme_open(struct block_device *bdev, fmode_t mode) { struct nvme_ns *ns = bdev->bd_disk->private_data; @@ -2028,7 +2069,7 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit); static const struct block_device_operations nvme_fops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, - .compat_ioctl = nvme_ioctl, + .compat_ioctl = nvme_compat_ioctl, .open = nvme_open, .release = nvme_release, .getgeo = nvme_getgeo, @@ -2056,7 +2097,7 @@ const struct block_device_operations nvme_ns_head_ops = { .open = nvme_ns_head_open, .release = nvme_ns_head_release, .ioctl = nvme_ioctl, - .compat_ioctl = nvme_ioctl, + .compat_ioctl = nvme_compat_ioctl, .getgeo = nvme_getgeo, .pr_ops = &nvme_pr_ops, }; From f41cfd5d0a04b12a5dae753cd01163661432ebbb Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 18 Mar 2020 17:27:59 +0200 Subject: [PATCH 67/81] nvme: release ida resources ida instances allocate some internal memory in addition to the base 'struct ida'. Use ida_destroy() to release that memory at module_exit(). Reviewed-by: Christoph Hellwig Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3c1c826ea491..ad0847b6c769 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4358,6 +4358,7 @@ static void __exit nvme_core_exit(void) destroy_workqueue(nvme_delete_wq); destroy_workqueue(nvme_reset_wq); destroy_workqueue(nvme_wq); + ida_destroy(&nvme_instance_ida); } MODULE_LICENSE("GPL"); From e7c43feae2ab8744c3112b5a714959c8ea71ca19 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 10 Mar 2020 16:39:10 +0200 Subject: [PATCH 68/81] nvme: Use nvme_state_terminal helper Improve code readability. Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Israel Rukshin Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ad0847b6c769..392af3cf0bf9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2633,8 +2633,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, lockdep_assert_held(&nvme_subsystems_lock); list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) { - if (tmp->state == NVME_CTRL_DELETING || - tmp->state == NVME_CTRL_DEAD) + if (nvme_state_terminal(tmp)) continue; if (tmp->cntlid == ctrl->cntlid) { From 6721c18a0610db39cf0110b9be07946bbc208ed7 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 24 Mar 2020 17:29:39 +0200 Subject: [PATCH 69/81] nvme: Remove unused return code from nvme_delete_ctrl_sync The return code of nvme_delete_ctrl_sync is never used, so change it to void. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 392af3cf0bf9..8a7761c3086e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -192,21 +192,16 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_delete_ctrl); -static int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) +static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) { - int ret = 0; - /* * Keep a reference until nvme_do_delete_ctrl() complete, * since ->delete_ctrl can free the controller. */ nvme_get_ctrl(ctrl); - if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) - ret = -EBUSY; - if (!ret) + if (nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) nvme_do_delete_ctrl(ctrl); nvme_put_ctrl(ctrl); - return ret; } static inline bool nvme_ns_has_pi(struct nvme_ns *ns) From 253fd4ac806896293c9b9d12c794195447bad164 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 24 Mar 2020 17:29:40 +0200 Subject: [PATCH 70/81] nvme-pci: Re-order nvme_pci_free_ctrl Destroy the resources in the same order like in nvme_probe error flow to improve code readability. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e6fa0c7bb96c..ff0bd2d84f3e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2470,13 +2470,13 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) struct nvme_dev *dev = to_nvme_dev(ctrl); nvme_dbbuf_dma_free(dev); - put_device(dev->dev); nvme_free_tagset(dev); if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); - kfree(dev->queues); free_opal_dev(dev->ctrl.opal_dev); mempool_destroy(dev->iod_mempool); + put_device(dev->dev); + kfree(dev->queues); kfree(dev); } From b780d7415aacec855e2f2370cbf98f918b224903 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 24 Mar 2020 17:29:41 +0200 Subject: [PATCH 71/81] nvme: Fix ctrl use-after-free during sysfs deletion In case nvme_sysfs_delete() is called by the user before taking the ctrl reference count, the ctrl may be freed during the creation and cause the bug. Take the reference as soon as the controller is externally visible, which is done by cdev_device_add() in nvme_init_ctrl(). Also take the reference count at the core layer instead of taking it on each transport separately. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/fc.c | 4 +--- drivers/nvme/host/pci.c | 1 - drivers/nvme/host/rdma.c | 3 +-- drivers/nvme/host/tcp.c | 3 +-- drivers/nvme/target/loop.c | 3 +-- 6 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8a7761c3086e..51f80be0fe90 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4130,6 +4130,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, if (ret) goto out_release_instance; + nvme_get_ctrl(ctrl); cdev_init(&ctrl->cdev, &nvme_dev_fops); ctrl->cdev.owner = ops->module; ret = cdev_device_add(&ctrl->cdev, ctrl->device); @@ -4148,6 +4149,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, return 0; out_free_name: + nvme_put_ctrl(ctrl); kfree_const(ctrl->device->kobj.name); out_release_instance: ida_simple_remove(&nvme_instance_ida, ctrl->instance); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5a70ac395d53..59d2e2bec179 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3181,10 +3181,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto fail_ctrl; } - nvme_get_ctrl(&ctrl->ctrl); - if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { - nvme_put_ctrl(&ctrl->ctrl); dev_err(ctrl->ctrl.device, "NVME-FC{%d}: failed to schedule initial connect\n", ctrl->cnum); @@ -3209,6 +3206,7 @@ fail_ctrl: /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); /* Remove core ctrl ref. */ nvme_put_ctrl(&ctrl->ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ff0bd2d84f3e..4e062c3a84bc 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2802,7 +2802,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); nvme_reset_ctrl(&dev->ctrl); - nvme_get_ctrl(&dev->ctrl); async_schedule(nvme_async_probe, dev); return 0; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3e85c5cacefd..ca782deea72d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2043,8 +2043,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); - nvme_get_ctrl(&ctrl->ctrl); - mutex_lock(&nvme_rdma_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); mutex_unlock(&nvme_rdma_ctrl_mutex); @@ -2054,6 +2052,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); if (ret > 0) ret = -EIO; return ERR_PTR(ret); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4b20301e517c..dd569b122a0d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2428,8 +2428,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); - nvme_get_ctrl(&ctrl->ctrl); - mutex_lock(&nvme_tcp_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); mutex_unlock(&nvme_tcp_ctrl_mutex); @@ -2439,6 +2437,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); if (ret > 0) ret = -EIO; return ERR_PTR(ret); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 4df4ebde208a..a425e2858829 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -618,8 +618,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn); - nvme_get_ctrl(&ctrl->ctrl); - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); @@ -637,6 +635,7 @@ out_free_queues: kfree(ctrl->queues); out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); out_put_ctrl: nvme_put_ctrl(&ctrl->ctrl); if (ret > 0) From 726612b6b8259afa41d265a2722991c87f059223 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 24 Mar 2020 17:29:42 +0200 Subject: [PATCH 72/81] nvme: Make nvme_uninit_ctrl symmetric to nvme_init_ctrl Put the ctrl reference count at nvme_uninit_ctrl as opposed to nvme_init_ctrl which takes it. This decrease the reference count at the core layer instead of decreasing it on each transport separately. Also move the call of nvme_uninit_ctrl at PCI driver after calling to nvme_release_prp_pools and nvme_dev_unmap, in order to put the reference count after using the dev. This is safe because those functions use nvme_dev which is freed only later at nvme_pci_free_ctrl. Signed-off-by: Israel Rukshin Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/fc.c | 1 - drivers/nvme/host/pci.c | 3 +-- drivers/nvme/host/rdma.c | 1 - drivers/nvme/host/tcp.c | 1 - drivers/nvme/target/loop.c | 2 -- 6 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 51f80be0fe90..8e6a3ada9d44 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -171,7 +171,6 @@ static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl) nvme_remove_namespaces(ctrl); ctrl->ops->delete_ctrl(ctrl); nvme_uninit_ctrl(ctrl); - nvme_put_ctrl(ctrl); } static void nvme_delete_ctrl_work(struct work_struct *work) @@ -4048,6 +4047,7 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) nvme_fault_inject_fini(&ctrl->fault_inject); dev_pm_qos_hide_latency_tolerance(ctrl->device); cdev_device_del(&ctrl->cdev, ctrl->device); + nvme_put_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 59d2e2bec179..a8bf2fb1287b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3206,7 +3206,6 @@ fail_ctrl: /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); /* Remove core ctrl ref. */ nvme_put_ctrl(&ctrl->ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4e062c3a84bc..4e79e412b276 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2873,10 +2873,9 @@ static void nvme_remove(struct pci_dev *pdev) nvme_free_host_mem(dev); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); - nvme_uninit_ctrl(&dev->ctrl); nvme_release_prp_pools(dev); nvme_dev_unmap(dev); - nvme_put_ctrl(&dev->ctrl); + nvme_uninit_ctrl(&dev->ctrl); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ca782deea72d..c99a88247660 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2052,7 +2052,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); if (ret > 0) ret = -EIO; return ERR_PTR(ret); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index dd569b122a0d..f111430bb617 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2437,7 +2437,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); if (ret > 0) ret = -EIO; return ERR_PTR(ret); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index a425e2858829..0d54e730cbf2 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -485,7 +485,6 @@ out_destroy_admin: out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); } static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { @@ -635,7 +634,6 @@ out_free_queues: kfree(ctrl->queues); out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); out_put_ctrl: nvme_put_ctrl(&ctrl->ctrl); if (ret > 0) From ce1518139e6976cf19c133b555083354fdb629b8 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 24 Mar 2020 17:29:43 +0200 Subject: [PATCH 73/81] nvme: Fix controller creation races with teardown flow Calling nvme_sysfs_delete() when the controller is in the middle of creation may cause several bugs. If the controller is in NEW state we remove delete_controller file and don't delete the controller. The user will not be able to use nvme disconnect command on that controller again, although the controller may be active. Other bugs may happen if the controller is in the middle of create_ctrl callback and nvme_do_delete_ctrl() starts. For example, freeing I/O tagset at nvme_do_delete_ctrl() before it was allocated at create_ctrl callback. To fix all those races don't allow the user to delete the controller before it was fully created. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 5 +++++ drivers/nvme/host/nvme.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8e6a3ada9d44..66fe301d9abb 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3228,6 +3228,10 @@ static ssize_t nvme_sysfs_delete(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + /* Can't delete non-created controllers */ + if (!ctrl->created) + return -EBUSY; + if (device_remove_file_self(dev, attr)) nvme_delete_ctrl_sync(ctrl); return count; @@ -4039,6 +4043,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_queue_scan(ctrl); nvme_start_queues(ctrl); } + ctrl->created = true; } EXPORT_SYMBOL_GPL(nvme_start_ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d800b9a51c2c..2e04a36296d9 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -259,6 +259,7 @@ struct nvme_ctrl { struct nvme_command ka_cmd; struct work_struct fw_act_work; unsigned long events; + bool created; #ifdef CONFIG_NVME_MULTIPATH /* asymmetric namespace access: */ From 96135862dfcce38b98beff7d1009188263b7e6f7 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 24 Mar 2020 17:29:44 +0200 Subject: [PATCH 74/81] nvme-rdma: Add warning on state change failure at nvme_rdma_setup_ctrl The transition to LIVE state should not fail in case of a new controller. Moving to DELETING state before nvme_tcp_create_ctrl() allocates all the resources may leads to NULL dereference at teardown flow (e.g., IO tagset, admin_q, connect_q). Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c99a88247660..3ae3011a95ea 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1022,8 +1022,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); if (!changed) { - /* state change failure is ok if we're in DELETING state */ + /* + * state change failure is ok if we're in DELETING state, + * unless we're during creation of a new controller to + * avoid races with teardown flow. + */ WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); + WARN_ON_ONCE(new); ret = -EINVAL; goto destroy_io; } From bea54ef53fce57c8b2f11315c9384e43b2ecb321 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 24 Mar 2020 17:29:45 +0200 Subject: [PATCH 75/81] nvme-tcp: Add warning on state change failure at nvme_tcp_setup_ctrl The transition to LIVE state should not fail in case of a new controller. Moving to DELETING state before nvme_tcp_create_ctrl() allocates all the resources may leads to NULL dereference at teardown flow (e.g., IO tagset, admin_q, connect_q). Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index f111430bb617..0ef14f0fad86 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1930,8 +1930,13 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) } if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) { - /* state change failure is ok if we're in DELETING state */ + /* + * state change failure is ok if we're in DELETING state, + * unless we're during creation of a new controller to + * avoid races with teardown flow. + */ WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING); + WARN_ON_ONCE(new); ret = -EINVAL; goto destroy_io; } From fb314eb0cbb2e11540d1ae1a7b28346397f621ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 14:19:35 +0100 Subject: [PATCH 76/81] nvme: refactor nvme_identify_ns_descs error handling Move the handling of an error into the function from the caller, and only do it for an actual error on the admin command itself, not the command parsing, as that should be enough to deal with devices claiming a bogus version compliance. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 66fe301d9abb..6bd1c6dfac6b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1102,8 +1102,17 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data, NVME_IDENTIFY_DATA_SIZE); - if (status) + if (status) { + dev_warn(ctrl->device, + "Identify Descriptors failed (%d)\n", status); + /* + * Don't treat an error as fatal, as we potentially already + * have a NGUID or EUI-64. + */ + if (status > 0) + status = 0; goto free_data; + } for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) { struct nvme_ns_id_desc *cur = data + pos; @@ -1757,26 +1766,15 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, struct nvme_id_ns *id, struct nvme_ns_ids *ids) { - int ret = 0; - memset(ids, 0, sizeof(*ids)); if (ctrl->vs >= NVME_VS(1, 1, 0)) memcpy(ids->eui64, id->eui64, sizeof(id->eui64)); if (ctrl->vs >= NVME_VS(1, 2, 0)) memcpy(ids->nguid, id->nguid, sizeof(id->nguid)); - if (ctrl->vs >= NVME_VS(1, 3, 0)) { - /* Don't treat error as fatal we potentially - * already have a NGUID or EUI-64 - */ - ret = nvme_identify_ns_descs(ctrl, nsid, ids); - if (ret) - dev_warn(ctrl->device, - "Identify Descriptors failed (%d)\n", ret); - if (ret > 0) - ret = 0; - } - return ret; + if (ctrl->vs >= NVME_VS(1, 3, 0)) + return nvme_identify_ns_descs(ctrl, nsid, ids); + return 0; } static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) From 026d2ef752f47f33efd92244b9cf6be65d2a1621 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 14:19:36 +0100 Subject: [PATCH 77/81] nvme: rename __nvme_find_ns_head to nvme_find_ns_head There is no non __-prefixed version, so make the name a little more readable. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6bd1c6dfac6b..56a0dc18ed2d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3357,7 +3357,7 @@ static const struct attribute_group *nvme_dev_attr_groups[] = { NULL, }; -static struct nvme_ns_head *__nvme_find_ns_head(struct nvme_subsystem *subsys, +static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, unsigned nsid) { struct nvme_ns_head *h; @@ -3457,7 +3457,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, mutex_lock(&ctrl->subsys->lock); if (is_shared) - head = __nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { head = nvme_alloc_ns_head(ctrl, nsid, id); if (IS_ERR(head)) { From 43fcd9e1eae87c3235b8077f97bc6a286c3ae59b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 14:19:37 +0100 Subject: [PATCH 78/81] nvme: cleanup namespace identifier reporting in nvme_init_ns_head Lift the common namespace identifier reporting between the shared namespace and new nshead cases into common code. This also means one less lock is held while doing I/O. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 56a0dc18ed2d..2b0f693437a8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3390,7 +3390,8 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, - unsigned nsid, struct nvme_id_ns *id) + unsigned nsid, struct nvme_id_ns *id, + struct nvme_ns_ids *ids) { struct nvme_ns_head *head; size_t size = sizeof(*head); @@ -3413,12 +3414,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_ida_remove; head->subsys = ctrl->subsys; head->ns_id = nsid; + head->ids = *ids; kref_init(&head->ref); - ret = nvme_report_ns_ids(ctrl, nsid, id, &head->ids); - if (ret) - goto out_cleanup_srcu; - ret = __nvme_check_ids(ctrl->subsys, head); if (ret) { dev_err(ctrl->device, @@ -3453,24 +3451,23 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, struct nvme_ctrl *ctrl = ns->ctrl; bool is_shared = id->nmic & (1 << 0); struct nvme_ns_head *head = NULL; + struct nvme_ns_ids ids; int ret = 0; + ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); + if (ret) + goto out; + mutex_lock(&ctrl->subsys->lock); if (is_shared) head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { - head = nvme_alloc_ns_head(ctrl, nsid, id); + head = nvme_alloc_ns_head(ctrl, nsid, id, &ids); if (IS_ERR(head)) { ret = PTR_ERR(head); goto out_unlock; } } else { - struct nvme_ns_ids ids; - - ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); - if (ret) - goto out_unlock; - if (!nvme_ns_ids_equal(&head->ids, &ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", @@ -3485,6 +3482,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, out_unlock: mutex_unlock(&ctrl->subsys->lock); +out: if (ret > 0) ret = blk_status_to_errno(nvme_error_status(ret)); return ret; From 02694e86356dcf72d39329e52630234ad687e206 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 25 Mar 2020 10:49:54 -0700 Subject: [PATCH 79/81] block: add a zone condition debug helper Add a helper to stringify the zone conditions. We use this helper in the next patch to track zone conditions in tracepoints. Reviewed-by: Damien Le Moal Signed-off-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- block/blk-zoned.c | 32 ++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 4 ++++ 2 files changed, 36 insertions(+) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 05741c6f618b..f18f1ee9d71f 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -20,6 +20,38 @@ #include "blk.h" +#define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name +static const char *const zone_cond_name[] = { + ZONE_COND_NAME(NOT_WP), + ZONE_COND_NAME(EMPTY), + ZONE_COND_NAME(IMP_OPEN), + ZONE_COND_NAME(EXP_OPEN), + ZONE_COND_NAME(CLOSED), + ZONE_COND_NAME(READONLY), + ZONE_COND_NAME(FULL), + ZONE_COND_NAME(OFFLINE), +}; +#undef ZONE_COND_NAME + +/** + * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX. + * @zone_cond: BLK_ZONE_COND_XXX. + * + * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX + * into string format. Useful in the debugging and tracing zone conditions. For + * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN". + */ +const char *blk_zone_cond_str(enum blk_zone_cond zone_cond) +{ + static const char *zone_cond_str = "UNKNOWN"; + + if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond]) + zone_cond_str = zone_cond_name[zone_cond]; + + return zone_cond_str; +} +EXPORT_SYMBOL_GPL(blk_zone_cond_str); + static inline sector_t blk_zone_start(struct request_queue *q, sector_t sector) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f629d40c645c..a5acf17e7d76 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -952,6 +952,10 @@ static inline unsigned int blk_rq_stats_sectors(const struct request *rq) } #ifdef CONFIG_BLK_DEV_ZONED + +/* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */ +const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); + static inline unsigned int blk_rq_zone_no(struct request *rq) { return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); From c51d04199826824944ed563748c9542eea27c2d7 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 25 Mar 2020 10:49:55 -0700 Subject: [PATCH 80/81] null_blk: add tracepoint helpers for zoned mode This patch adds two new tracpoints for null_blk_zoned.c that allows us to trace report-zones, zone-mgmt-op and zone-write operations which has direct effect on the zone condition state machine. Also, we update drivers/block/Makefile so that new null_blk related tracefiles can be compiled. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/block/Makefile | 6 +++ drivers/block/null_blk_trace.c | 21 +++++++++ drivers/block/null_blk_trace.h | 79 ++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 drivers/block/null_blk_trace.c create mode 100644 drivers/block/null_blk_trace.h diff --git a/drivers/block/Makefile b/drivers/block/Makefile index a53cc1e3a2d3..795facd8cf19 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -6,6 +6,9 @@ # Rewritten to use lists instead of if-statements. # +# needed for trace events +ccflags-y += -I$(src) + obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o @@ -39,6 +42,9 @@ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o null_blk-objs := null_blk_main.o +ifeq ($(CONFIG_BLK_DEV_ZONED), y) +null_blk-$(CONFIG_TRACING) += null_blk_trace.o +endif null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o skd-y := skd_main.o diff --git a/drivers/block/null_blk_trace.c b/drivers/block/null_blk_trace.c new file mode 100644 index 000000000000..f246e7bff698 --- /dev/null +++ b/drivers/block/null_blk_trace.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * null_blk trace related helpers. + * + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#include "null_blk_trace.h" + +/* + * Helper to use for all null_blk traces to extract disk name. + */ +const char *nullb_trace_disk_name(struct trace_seq *p, char *name) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (name && *name) + trace_seq_printf(p, "disk=%s, ", name); + trace_seq_putc(p, 0); + + return ret; +} diff --git a/drivers/block/null_blk_trace.h b/drivers/block/null_blk_trace.h new file mode 100644 index 000000000000..4f83032eb544 --- /dev/null +++ b/drivers/block/null_blk_trace.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * null_blk device driver tracepoints. + * + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nullb + +#if !defined(_TRACE_NULLB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NULLB_H + +#include +#include + +#include "null_blk.h" + +const char *nullb_trace_disk_name(struct trace_seq *p, char *name); + +#define __print_disk_name(name) nullb_trace_disk_name(p, name) + +#ifndef TRACE_HEADER_MULTI_READ +static inline void __assign_disk_name(char *name, struct gendisk *disk) +{ + if (disk) + memcpy(name, disk->disk_name, DISK_NAME_LEN); + else + memset(name, 0, DISK_NAME_LEN); +} +#endif + +TRACE_EVENT(nullb_zone_op, + TP_PROTO(struct nullb_cmd *cmd, unsigned int zone_no, + unsigned int zone_cond), + TP_ARGS(cmd, zone_no, zone_cond), + TP_STRUCT__entry( + __array(char, disk, DISK_NAME_LEN) + __field(enum req_opf, op) + __field(unsigned int, zone_no) + __field(unsigned int, zone_cond) + ), + TP_fast_assign( + __entry->op = req_op(cmd->rq); + __entry->zone_no = zone_no; + __entry->zone_cond = zone_cond; + __assign_disk_name(__entry->disk, cmd->rq->rq_disk); + ), + TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s", + __print_disk_name(__entry->disk), + blk_op_str(__entry->op), + __entry->zone_no, + blk_zone_cond_str(__entry->zone_cond)) +); + +TRACE_EVENT(nullb_report_zones, + TP_PROTO(struct nullb *nullb, unsigned int nr_zones), + TP_ARGS(nullb, nr_zones), + TP_STRUCT__entry( + __array(char, disk, DISK_NAME_LEN) + __field(unsigned int, nr_zones) + ), + TP_fast_assign( + __entry->nr_zones = nr_zones; + __assign_disk_name(__entry->disk, nullb->disk); + ), + TP_printk("%s nr_zones=%u", + __print_disk_name(__entry->disk), __entry->nr_zones) +); + +#endif /* _TRACE_NULLB_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE null_blk_trace + +/* This part must be outside protection */ +#include From 766c3297d7e1584394d4af0cc8368e838124b023 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 25 Mar 2020 10:49:56 -0700 Subject: [PATCH 81/81] null_blk: add trace in null_blk_zoned.c With the help of previously added tracepoints we can now trace report-zones, zone-write and zone-mgmt ops in null_blk_zoned.c. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/block/null_blk_zoned.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index ed34785dd64b..673618d8222a 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -2,6 +2,9 @@ #include #include "null_blk.h" +#define CREATE_TRACE_POINTS +#include "null_blk_trace.h" + /* zone_size in MBs to sectors. */ #define ZONE_SIZE_SHIFT 11 @@ -80,6 +83,8 @@ int null_report_zones(struct gendisk *disk, sector_t sector, return 0; nr_zones = min(nr_zones, dev->nr_zones - first_zone); + trace_nullb_report_zones(nullb, nr_zones); + for (i = 0; i < nr_zones; i++) { /* * Stacked DM target drivers will remap the zone information by @@ -148,6 +153,8 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, /* Invalid zone condition */ return BLK_STS_IOERR; } + + trace_nullb_zone_op(cmd, zno, zone->cond); return BLK_STS_OK; } @@ -155,7 +162,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, sector_t sector) { struct nullb_device *dev = cmd->nq->dev; - struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)]; + unsigned int zone_no = null_zone_no(dev, sector); + struct blk_zone *zone = &dev->zones[zone_no]; size_t i; switch (op) { @@ -203,6 +211,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, default: return BLK_STS_NOTSUPP; } + + trace_nullb_zone_op(cmd, zone_no, zone->cond); return BLK_STS_OK; }