From 49bddc73d15c25a68e4294d76fc74519fda984cd Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Aug 2019 09:30:29 +0530 Subject: [PATCH 01/12] libnvdimm/of_pmem: Provide a unique name for bus provider ndctl binaries, v66 and older, mistakenly require the ndbus to have unique names. If not while enumerating the bus in userspace it drops bus with similar names. This results in us not listing devices beneath the bus. Signed-off-by: Aneesh Kumar K.V Tested-by: Vaibhav Jain Link: https://lore.kernel.org/r/20190807040029.11344-1-aneesh.kumar@linux.ibm.com Signed-off-by: Dan Williams --- drivers/nvdimm/of_pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index a0c8dcfa0bf9..97187d6c0bdb 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -42,7 +42,7 @@ static int of_pmem_region_probe(struct platform_device *pdev) return -ENOMEM; priv->bus_desc.attr_groups = bus_attr_groups; - priv->bus_desc.provider_name = "of_pmem"; + priv->bus_desc.provider_name = kstrdup(pdev->name, GFP_KERNEL); priv->bus_desc.module = THIS_MODULE; priv->bus_desc.of_node = np; From 5518ba4ebd7d1e15fc558af05fa600198535f074 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 7 Aug 2019 14:31:11 -0700 Subject: [PATCH 02/12] tools/testing/nvdimm: Fix fallthrough warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the expected 'fall through' designation to fix: tools/testing/nvdimm/test/nfit.c: In function ‘nd_intel_test_finish_query’: tools/testing/nvdimm/test/nfit.c:433:13: warning: this statement may fall through [-Wimplicit-fallthrough=] fw->state = FW_STATE_UPDATED; ~~~~~~~~~~^~~~~~~~~~~~~~~~~~ tools/testing/nvdimm/test/nfit.c:435:2: note: here case FW_STATE_UPDATED: ^~~~ Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/156521347159.1442374.1381360879102718899.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 507e6f4cbb53..bf6422a6af7f 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -428,10 +428,9 @@ static int nd_intel_test_finish_query(struct nfit_test *t, dev_dbg(dev, "%s: still verifying\n", __func__); break; } - dev_dbg(dev, "%s: transition out verify\n", __func__); fw->state = FW_STATE_UPDATED; - /* we are going to fall through if it's "done" */ + /* fall through */ case FW_STATE_UPDATED: nd_cmd->status = 0; /* bogus test version */ From 2b90cb223320a93b1be6c2616efe6f9ff14d8b28 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 10 Jun 2019 16:06:13 -0500 Subject: [PATCH 03/12] libnvdimm, region: Use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct nd_region { ... struct nd_mapping mapping[0]; }; instance = kzalloc(sizeof(struct nd_region) + sizeof(struct nd_mapping) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, mapping, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Vishal Verma Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20190610210613.GA21989@embeddedor Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index af30cbe7a8ea..b477a8dc0020 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1025,10 +1025,9 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, } region_buf = ndbr; } else { - nd_region = kzalloc(sizeof(struct nd_region) - + sizeof(struct nd_mapping) - * ndr_desc->num_mappings, - GFP_KERNEL); + nd_region = kzalloc(struct_size(nd_region, mapping, + ndr_desc->num_mappings), + GFP_KERNEL); region_buf = nd_region; } From d78c620a2e824d7b01a6e991208a8aa2c938cabe Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 26 Aug 2019 17:54:54 -0700 Subject: [PATCH 04/12] libnvdimm/security: Introduce a 'frozen' attribute In the process of debugging a system with an NVDIMM that was failing to unlock it was found that the kernel is reporting 'locked' while the DIMM security interface is 'frozen'. Unfortunately the security state is tracked internally as an enum which prevents it from communicating the difference between 'locked' and 'locked + frozen'. It follows that the enum also prevents the kernel from communicating 'unlocked + frozen' which would be useful for debugging why security operations like 'change passphrase' are disabled. Ditch the security state enum for a set of flags and introduce a new sysfs attribute explicitly for the 'frozen' state. The regression risk is low because the 'frozen' state was already blocked behind the 'locked' state, but will need to revisit if there were cases where applications need 'frozen' to show up in the primary 'security' attribute. The expectation is that communicating 'frozen' is mostly a helper for debug and status monitoring. Reviewed-by: Dave Jiang Reported-by: Jeff Moyer Reviewed-by: Jeff Moyer Link: https://lore.kernel.org/r/156686729474.184120.5835135644278860826.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/acpi/nfit/intel.c | 59 ++++++++++--------- drivers/nvdimm/bus.c | 2 +- drivers/nvdimm/dimm_devs.c | 59 +++++++++++-------- drivers/nvdimm/nd-core.h | 23 ++++++-- drivers/nvdimm/security.c | 99 +++++++++++++++----------------- include/linux/libnvdimm.h | 9 ++- tools/testing/nvdimm/dimm_devs.c | 19 ++---- 7 files changed, 142 insertions(+), 128 deletions(-) diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c index cddd0fcf622c..1113b679cd7b 100644 --- a/drivers/acpi/nfit/intel.c +++ b/drivers/acpi/nfit/intel.c @@ -7,10 +7,11 @@ #include "intel.h" #include "nfit.h" -static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm, +static unsigned long intel_security_flags(struct nvdimm *nvdimm, enum nvdimm_passphrase_type ptype) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + unsigned long security_flags = 0; struct { struct nd_cmd_pkg pkg; struct nd_intel_get_security_state cmd; @@ -27,7 +28,7 @@ static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm, int rc; if (!test_bit(NVDIMM_INTEL_GET_SECURITY_STATE, &nfit_mem->dsm_mask)) - return -ENXIO; + return 0; /* * Short circuit the state retrieval while we are doing overwrite. @@ -35,38 +36,42 @@ static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm, * until the overwrite DSM completes. */ if (nvdimm_in_overwrite(nvdimm) && ptype == NVDIMM_USER) - return NVDIMM_SECURITY_OVERWRITE; + return BIT(NVDIMM_SECURITY_OVERWRITE); rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); - if (rc < 0) - return rc; - if (nd_cmd.cmd.status) - return -EIO; + if (rc < 0 || nd_cmd.cmd.status) { + pr_err("%s: security state retrieval failed (%d:%#x)\n", + nvdimm_name(nvdimm), rc, nd_cmd.cmd.status); + return 0; + } /* check and see if security is enabled and locked */ if (ptype == NVDIMM_MASTER) { if (nd_cmd.cmd.extended_state & ND_INTEL_SEC_ESTATE_ENABLED) - return NVDIMM_SECURITY_UNLOCKED; - else if (nd_cmd.cmd.extended_state & - ND_INTEL_SEC_ESTATE_PLIMIT) - return NVDIMM_SECURITY_FROZEN; - } else { - if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_UNSUPPORTED) - return -ENXIO; - else if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_ENABLED) { - if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_LOCKED) - return NVDIMM_SECURITY_LOCKED; - else if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_FROZEN - || nd_cmd.cmd.state & - ND_INTEL_SEC_STATE_PLIMIT) - return NVDIMM_SECURITY_FROZEN; - else - return NVDIMM_SECURITY_UNLOCKED; - } + set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags); + else + set_bit(NVDIMM_SECURITY_DISABLED, &security_flags); + if (nd_cmd.cmd.extended_state & ND_INTEL_SEC_ESTATE_PLIMIT) + set_bit(NVDIMM_SECURITY_FROZEN, &security_flags); + return security_flags; } - /* this should cover master security disabled as well */ - return NVDIMM_SECURITY_DISABLED; + if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_UNSUPPORTED) + return 0; + + if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_ENABLED) { + if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_FROZEN || + nd_cmd.cmd.state & ND_INTEL_SEC_STATE_PLIMIT) + set_bit(NVDIMM_SECURITY_FROZEN, &security_flags); + + if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_LOCKED) + set_bit(NVDIMM_SECURITY_LOCKED, &security_flags); + else + set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags); + } else + set_bit(NVDIMM_SECURITY_DISABLED, &security_flags); + + return security_flags; } static int intel_security_freeze(struct nvdimm *nvdimm) @@ -371,7 +376,7 @@ static void nvdimm_invalidate_cache(void) #endif static const struct nvdimm_security_ops __intel_security_ops = { - .state = intel_security_state, + .get_flags = intel_security_flags, .freeze = intel_security_freeze, .change_key = intel_security_change_key, .disable = intel_security_disable, diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 798c5c4aea9c..29479d3b01b0 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -400,7 +400,7 @@ static int child_unregister(struct device *dev, void *data) /* We are shutting down. Make state frozen artificially. */ nvdimm_bus_lock(dev); - nvdimm->sec.state = NVDIMM_SECURITY_FROZEN; + set_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags); if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags)) dev_put = true; nvdimm_bus_unlock(dev); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 29a065e769ea..53330625fe07 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -372,24 +372,27 @@ __weak ssize_t security_show(struct device *dev, { struct nvdimm *nvdimm = to_nvdimm(dev); - switch (nvdimm->sec.state) { - case NVDIMM_SECURITY_DISABLED: + if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) return sprintf(buf, "disabled\n"); - case NVDIMM_SECURITY_UNLOCKED: + if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) return sprintf(buf, "unlocked\n"); - case NVDIMM_SECURITY_LOCKED: + if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags)) return sprintf(buf, "locked\n"); - case NVDIMM_SECURITY_FROZEN: - return sprintf(buf, "frozen\n"); - case NVDIMM_SECURITY_OVERWRITE: + if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags)) return sprintf(buf, "overwrite\n"); - default: - return -ENOTTY; - } - return -ENOTTY; } +static ssize_t frozen_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + + return sprintf(buf, "%d\n", test_bit(NVDIMM_SECURITY_FROZEN, + &nvdimm->sec.flags)); +} +static DEVICE_ATTR_RO(frozen); + #define OPS \ C( OP_FREEZE, "freeze", 1), \ C( OP_DISABLE, "disable", 2), \ @@ -501,6 +504,7 @@ static struct attribute *nvdimm_attributes[] = { &dev_attr_commands.attr, &dev_attr_available_slots.attr, &dev_attr_security.attr, + &dev_attr_frozen.attr, NULL, }; @@ -509,17 +513,24 @@ static umode_t nvdimm_visible(struct kobject *kobj, struct attribute *a, int n) struct device *dev = container_of(kobj, typeof(*dev), kobj); struct nvdimm *nvdimm = to_nvdimm(dev); - if (a != &dev_attr_security.attr) + if (a != &dev_attr_security.attr && a != &dev_attr_frozen.attr) return a->mode; - if (nvdimm->sec.state < 0) + if (!nvdimm->sec.flags) return 0; - /* Are there any state mutation ops? */ - if (nvdimm->sec.ops->freeze || nvdimm->sec.ops->disable - || nvdimm->sec.ops->change_key - || nvdimm->sec.ops->erase - || nvdimm->sec.ops->overwrite) + + if (a == &dev_attr_security.attr) { + /* Are there any state mutation ops (make writable)? */ + if (nvdimm->sec.ops->freeze || nvdimm->sec.ops->disable + || nvdimm->sec.ops->change_key + || nvdimm->sec.ops->erase + || nvdimm->sec.ops->overwrite) + return a->mode; + return 0444; + } + + if (nvdimm->sec.ops->freeze) return a->mode; - return 0444; + return 0; } struct attribute_group nvdimm_attribute_group = { @@ -569,8 +580,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, * attribute visibility. */ /* get security state and extended (master) state */ - nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); - nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER); + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); + nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); nd_device_register(dev); return nvdimm; @@ -588,7 +599,7 @@ int nvdimm_security_setup_events(struct device *dev) { struct nvdimm *nvdimm = to_nvdimm(dev); - if (nvdimm->sec.state < 0 || !nvdimm->sec.ops + if (!nvdimm->sec.flags || !nvdimm->sec.ops || !nvdimm->sec.ops->overwrite) return 0; nvdimm->sec.overwrite_state = sysfs_get_dirent(dev->kobj.sd, "security"); @@ -614,7 +625,7 @@ int nvdimm_security_freeze(struct nvdimm *nvdimm) if (!nvdimm->sec.ops || !nvdimm->sec.ops->freeze) return -EOPNOTSUPP; - if (nvdimm->sec.state < 0) + if (!nvdimm->sec.flags) return -EIO; if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { @@ -623,7 +634,7 @@ int nvdimm_security_freeze(struct nvdimm *nvdimm) } rc = nvdimm->sec.ops->freeze(nvdimm); - nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); return rc; } diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 0ac52b6eb00e..da2bbfd56d9f 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -39,21 +39,32 @@ struct nvdimm { const char *dimm_id; struct { const struct nvdimm_security_ops *ops; - enum nvdimm_security_state state; - enum nvdimm_security_state ext_state; + unsigned long flags; + unsigned long ext_flags; unsigned int overwrite_tmo; struct kernfs_node *overwrite_state; } sec; struct delayed_work dwork; }; -static inline enum nvdimm_security_state nvdimm_security_state( +static inline unsigned long nvdimm_security_flags( struct nvdimm *nvdimm, enum nvdimm_passphrase_type ptype) { - if (!nvdimm->sec.ops) - return -ENXIO; + u64 flags; + const u64 state_flags = 1UL << NVDIMM_SECURITY_DISABLED + | 1UL << NVDIMM_SECURITY_LOCKED + | 1UL << NVDIMM_SECURITY_UNLOCKED + | 1UL << NVDIMM_SECURITY_OVERWRITE; - return nvdimm->sec.ops->state(nvdimm, ptype); + if (!nvdimm->sec.ops) + return 0; + + flags = nvdimm->sec.ops->get_flags(nvdimm, ptype); + /* disabled, locked, unlocked, and overwrite are mutually exclusive */ + dev_WARN_ONCE(&nvdimm->dev, hweight64(flags & state_flags) > 1, + "reported invalid security state: %#llx\n", + (unsigned long long) flags); + return flags; } int nvdimm_security_freeze(struct nvdimm *nvdimm); #if IS_ENABLED(CONFIG_NVDIMM_KEYS) diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index a570f2263a42..5862d0eee9db 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -158,7 +158,7 @@ static int nvdimm_key_revalidate(struct nvdimm *nvdimm) } nvdimm_put_key(key); - nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); return 0; } @@ -174,7 +174,7 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm) lockdep_assert_held(&nvdimm_bus->reconfig_mutex); if (!nvdimm->sec.ops || !nvdimm->sec.ops->unlock - || nvdimm->sec.state < 0) + || !nvdimm->sec.flags) return -EIO; if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { @@ -189,7 +189,7 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm) * freeze of the security configuration. I.e. if the OS does not * have the key, security is being managed pre-OS. */ - if (nvdimm->sec.state == NVDIMM_SECURITY_UNLOCKED) { + if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) { if (!key_revalidate) return 0; @@ -202,7 +202,7 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm) rc == 0 ? "success" : "fail"); nvdimm_put_key(key); - nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); return rc; } @@ -217,6 +217,24 @@ int nvdimm_security_unlock(struct device *dev) return rc; } +static int check_security_state(struct nvdimm *nvdimm) +{ + struct device *dev = &nvdimm->dev; + + if (test_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags)) { + dev_dbg(dev, "Incorrect security state: %#lx\n", + nvdimm->sec.flags); + return -EIO; + } + + if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { + dev_dbg(dev, "Security operation in progress.\n"); + return -EBUSY; + } + + return 0; +} + int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid) { struct device *dev = &nvdimm->dev; @@ -229,19 +247,12 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid) lockdep_assert_held(&nvdimm_bus->reconfig_mutex); if (!nvdimm->sec.ops || !nvdimm->sec.ops->disable - || nvdimm->sec.state < 0) + || !nvdimm->sec.flags) return -EOPNOTSUPP; - if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) { - dev_dbg(dev, "Incorrect security state: %d\n", - nvdimm->sec.state); - return -EIO; - } - - if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { - dev_dbg(dev, "Security operation in progress.\n"); - return -EBUSY; - } + rc = check_security_state(nvdimm); + if (rc) + return rc; data = nvdimm_get_user_key_payload(nvdimm, keyid, NVDIMM_BASE_KEY, &key); @@ -253,7 +264,7 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid) rc == 0 ? "success" : "fail"); nvdimm_put_key(key); - nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); return rc; } @@ -271,14 +282,12 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid, lockdep_assert_held(&nvdimm_bus->reconfig_mutex); if (!nvdimm->sec.ops || !nvdimm->sec.ops->change_key - || nvdimm->sec.state < 0) + || !nvdimm->sec.flags) return -EOPNOTSUPP; - if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) { - dev_dbg(dev, "Incorrect security state: %d\n", - nvdimm->sec.state); - return -EIO; - } + rc = check_security_state(nvdimm); + if (rc) + return rc; data = nvdimm_get_user_key_payload(nvdimm, keyid, NVDIMM_BASE_KEY, &key); @@ -301,10 +310,10 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid, nvdimm_put_key(newkey); nvdimm_put_key(key); if (pass_type == NVDIMM_MASTER) - nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, + nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); else - nvdimm->sec.state = nvdimm_security_state(nvdimm, + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); return rc; } @@ -322,7 +331,7 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, lockdep_assert_held(&nvdimm_bus->reconfig_mutex); if (!nvdimm->sec.ops || !nvdimm->sec.ops->erase - || nvdimm->sec.state < 0) + || !nvdimm->sec.flags) return -EOPNOTSUPP; if (atomic_read(&nvdimm->busy)) { @@ -330,18 +339,11 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, return -EBUSY; } - if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) { - dev_dbg(dev, "Incorrect security state: %d\n", - nvdimm->sec.state); - return -EIO; - } + rc = check_security_state(nvdimm); + if (rc) + return rc; - if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { - dev_dbg(dev, "Security operation in progress.\n"); - return -EBUSY; - } - - if (nvdimm->sec.ext_state != NVDIMM_SECURITY_UNLOCKED + if (!test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.ext_flags) && pass_type == NVDIMM_MASTER) { dev_dbg(dev, "Attempt to secure erase in wrong master state.\n"); @@ -359,7 +361,7 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, rc == 0 ? "success" : "fail"); nvdimm_put_key(key); - nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); return rc; } @@ -375,7 +377,7 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) lockdep_assert_held(&nvdimm_bus->reconfig_mutex); if (!nvdimm->sec.ops || !nvdimm->sec.ops->overwrite - || nvdimm->sec.state < 0) + || !nvdimm->sec.flags) return -EOPNOTSUPP; if (atomic_read(&nvdimm->busy)) { @@ -388,16 +390,9 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) return -EINVAL; } - if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) { - dev_dbg(dev, "Incorrect security state: %d\n", - nvdimm->sec.state); - return -EIO; - } - - if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) { - dev_dbg(dev, "Security operation in progress.\n"); - return -EBUSY; - } + rc = check_security_state(nvdimm); + if (rc) + return rc; data = nvdimm_get_user_key_payload(nvdimm, keyid, NVDIMM_BASE_KEY, &key); @@ -412,7 +407,7 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) if (rc == 0) { set_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags); set_bit(NDD_WORK_PENDING, &nvdimm->flags); - nvdimm->sec.state = NVDIMM_SECURITY_OVERWRITE; + set_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags); /* * Make sure we don't lose device while doing overwrite * query. @@ -443,7 +438,7 @@ void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm) tmo = nvdimm->sec.overwrite_tmo; if (!nvdimm->sec.ops || !nvdimm->sec.ops->query_overwrite - || nvdimm->sec.state < 0) + || !nvdimm->sec.flags) return; rc = nvdimm->sec.ops->query_overwrite(nvdimm); @@ -467,8 +462,8 @@ void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm) clear_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags); clear_bit(NDD_WORK_PENDING, &nvdimm->flags); put_device(&nvdimm->dev); - nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); - nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER); + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); } void nvdimm_security_overwrite_query(struct work_struct *work) diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 7a64b3ddb408..b6eddf912568 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -160,8 +160,11 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( } -enum nvdimm_security_state { - NVDIMM_SECURITY_ERROR = -1, +/* + * Note that separate bits for locked + unlocked are defined so that + * 'flags == 0' corresponds to an error / not-supported state. + */ +enum nvdimm_security_bits { NVDIMM_SECURITY_DISABLED, NVDIMM_SECURITY_UNLOCKED, NVDIMM_SECURITY_LOCKED, @@ -182,7 +185,7 @@ enum nvdimm_passphrase_type { }; struct nvdimm_security_ops { - enum nvdimm_security_state (*state)(struct nvdimm *nvdimm, + unsigned long (*get_flags)(struct nvdimm *nvdimm, enum nvdimm_passphrase_type pass_type); int (*freeze)(struct nvdimm *nvdimm); int (*change_key)(struct nvdimm *nvdimm, diff --git a/tools/testing/nvdimm/dimm_devs.c b/tools/testing/nvdimm/dimm_devs.c index 2d4baf57822f..57bd27dedf1f 100644 --- a/tools/testing/nvdimm/dimm_devs.c +++ b/tools/testing/nvdimm/dimm_devs.c @@ -18,24 +18,13 @@ ssize_t security_show(struct device *dev, * For the test version we need to poll the "hardware" in order * to get the updated status for unlock testing. */ - nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); - nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER); + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); - switch (nvdimm->sec.state) { - case NVDIMM_SECURITY_DISABLED: + if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) return sprintf(buf, "disabled\n"); - case NVDIMM_SECURITY_UNLOCKED: + if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) return sprintf(buf, "unlocked\n"); - case NVDIMM_SECURITY_LOCKED: + if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags)) return sprintf(buf, "locked\n"); - case NVDIMM_SECURITY_FROZEN: - return sprintf(buf, "frozen\n"); - case NVDIMM_SECURITY_OVERWRITE: - return sprintf(buf, "overwrite\n"); - default: - return -ENOTTY; - } - return -ENOTTY; } - From bc4f2199ca3107809df96cf72f618b9559b00a21 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 26 Aug 2019 17:55:00 -0700 Subject: [PATCH 05/12] libnvdimm/security: Tighten scope of nvdimm->busy vs security operations An attempt to freeze DIMMs currently runs afoul of default blocking of all security operations in the entry to the 'store' routine for the 'security' sysfs attribute. The blanket blocking of all security operations while the DIMM is in active use in a region is too restrictive. The only security operations that need to be aware of the ->busy state are those that mutate the state of data, i.e. erase and overwrite. Refactor the ->busy checks to be applied at the entry common entry point in __security_store() rather than each of the helper routines to enable freeze to be run regardless of busy state. Reviewed-by: Dave Jiang Reviewed-by: Jeff Moyer Link: https://lore.kernel.org/r/156686729996.184120.3458026302402493937.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/dimm_devs.c | 33 ++++++++++++++++----------------- drivers/nvdimm/security.c | 10 ---------- 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 53330625fe07..d837cb9be83d 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -424,9 +424,6 @@ static ssize_t __security_store(struct device *dev, const char *buf, size_t len) unsigned int key, newkey; int i; - if (atomic_read(&nvdimm->busy)) - return -EBUSY; - rc = sscanf(buf, "%"__stringify(SEC_CMD_SIZE)"s" " %"__stringify(KEY_ID_SIZE)"s" " %"__stringify(KEY_ID_SIZE)"s", @@ -451,23 +448,25 @@ static ssize_t __security_store(struct device *dev, const char *buf, size_t len) } else if (i == OP_DISABLE) { dev_dbg(dev, "disable %u\n", key); rc = nvdimm_security_disable(nvdimm, key); - } else if (i == OP_UPDATE) { - dev_dbg(dev, "update %u %u\n", key, newkey); - rc = nvdimm_security_update(nvdimm, key, newkey, NVDIMM_USER); - } else if (i == OP_ERASE) { - dev_dbg(dev, "erase %u\n", key); - rc = nvdimm_security_erase(nvdimm, key, NVDIMM_USER); + } else if (i == OP_UPDATE || i == OP_MASTER_UPDATE) { + dev_dbg(dev, "%s %u %u\n", ops[i].name, key, newkey); + rc = nvdimm_security_update(nvdimm, key, newkey, i == OP_UPDATE + ? NVDIMM_USER : NVDIMM_MASTER); + } else if (i == OP_ERASE || i == OP_MASTER_ERASE) { + dev_dbg(dev, "%s %u\n", ops[i].name, key); + if (atomic_read(&nvdimm->busy)) { + dev_dbg(dev, "Unable to secure erase while DIMM active.\n"); + return -EBUSY; + } + rc = nvdimm_security_erase(nvdimm, key, i == OP_ERASE + ? NVDIMM_USER : NVDIMM_MASTER); } else if (i == OP_OVERWRITE) { dev_dbg(dev, "overwrite %u\n", key); + if (atomic_read(&nvdimm->busy)) { + dev_dbg(dev, "Unable to overwrite while DIMM active.\n"); + return -EBUSY; + } rc = nvdimm_security_overwrite(nvdimm, key); - } else if (i == OP_MASTER_UPDATE) { - dev_dbg(dev, "master_update %u %u\n", key, newkey); - rc = nvdimm_security_update(nvdimm, key, newkey, - NVDIMM_MASTER); - } else if (i == OP_MASTER_ERASE) { - dev_dbg(dev, "master_erase %u\n", key); - rc = nvdimm_security_erase(nvdimm, key, - NVDIMM_MASTER); } else return -EINVAL; diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index 5862d0eee9db..2166e627383a 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -334,11 +334,6 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, || !nvdimm->sec.flags) return -EOPNOTSUPP; - if (atomic_read(&nvdimm->busy)) { - dev_dbg(dev, "Unable to secure erase while DIMM active.\n"); - return -EBUSY; - } - rc = check_security_state(nvdimm); if (rc) return rc; @@ -380,11 +375,6 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) || !nvdimm->sec.flags) return -EOPNOTSUPP; - if (atomic_read(&nvdimm->busy)) { - dev_dbg(dev, "Unable to overwrite while DIMM active.\n"); - return -EBUSY; - } - if (dev->driver == NULL) { dev_dbg(dev, "Unable to overwrite while DIMM active.\n"); return -EINVAL; From 7b60422cb796d40431337becf2129fd9944b2f05 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 26 Aug 2019 17:55:05 -0700 Subject: [PATCH 06/12] libnvdimm/security: Consolidate 'security' operations The security operations are exported from libnvdimm/security.c to libnvdimm/dimm_devs.c, and libnvdimm/security.c is optionally compiled based on the CONFIG_NVDIMM_KEYS config symbol. Rather than export the operations across compile objects, just move the __security_store() entry point to live with the helpers. Acked-by: Jeff Moyer Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/156686730515.184120.10522747907309996674.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/dimm_devs.c | 84 +---------------------------------- drivers/nvdimm/nd-core.h | 30 ++----------- drivers/nvdimm/security.c | 90 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 90 insertions(+), 114 deletions(-) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index d837cb9be83d..196aa44c4936 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -393,88 +393,6 @@ static ssize_t frozen_show(struct device *dev, } static DEVICE_ATTR_RO(frozen); -#define OPS \ - C( OP_FREEZE, "freeze", 1), \ - C( OP_DISABLE, "disable", 2), \ - C( OP_UPDATE, "update", 3), \ - C( OP_ERASE, "erase", 2), \ - C( OP_OVERWRITE, "overwrite", 2), \ - C( OP_MASTER_UPDATE, "master_update", 3), \ - C( OP_MASTER_ERASE, "master_erase", 2) -#undef C -#define C(a, b, c) a -enum nvdimmsec_op_ids { OPS }; -#undef C -#define C(a, b, c) { b, c } -static struct { - const char *name; - int args; -} ops[] = { OPS }; -#undef C - -#define SEC_CMD_SIZE 32 -#define KEY_ID_SIZE 10 - -static ssize_t __security_store(struct device *dev, const char *buf, size_t len) -{ - struct nvdimm *nvdimm = to_nvdimm(dev); - ssize_t rc; - char cmd[SEC_CMD_SIZE+1], keystr[KEY_ID_SIZE+1], - nkeystr[KEY_ID_SIZE+1]; - unsigned int key, newkey; - int i; - - rc = sscanf(buf, "%"__stringify(SEC_CMD_SIZE)"s" - " %"__stringify(KEY_ID_SIZE)"s" - " %"__stringify(KEY_ID_SIZE)"s", - cmd, keystr, nkeystr); - if (rc < 1) - return -EINVAL; - for (i = 0; i < ARRAY_SIZE(ops); i++) - if (sysfs_streq(cmd, ops[i].name)) - break; - if (i >= ARRAY_SIZE(ops)) - return -EINVAL; - if (ops[i].args > 1) - rc = kstrtouint(keystr, 0, &key); - if (rc >= 0 && ops[i].args > 2) - rc = kstrtouint(nkeystr, 0, &newkey); - if (rc < 0) - return rc; - - if (i == OP_FREEZE) { - dev_dbg(dev, "freeze\n"); - rc = nvdimm_security_freeze(nvdimm); - } else if (i == OP_DISABLE) { - dev_dbg(dev, "disable %u\n", key); - rc = nvdimm_security_disable(nvdimm, key); - } else if (i == OP_UPDATE || i == OP_MASTER_UPDATE) { - dev_dbg(dev, "%s %u %u\n", ops[i].name, key, newkey); - rc = nvdimm_security_update(nvdimm, key, newkey, i == OP_UPDATE - ? NVDIMM_USER : NVDIMM_MASTER); - } else if (i == OP_ERASE || i == OP_MASTER_ERASE) { - dev_dbg(dev, "%s %u\n", ops[i].name, key); - if (atomic_read(&nvdimm->busy)) { - dev_dbg(dev, "Unable to secure erase while DIMM active.\n"); - return -EBUSY; - } - rc = nvdimm_security_erase(nvdimm, key, i == OP_ERASE - ? NVDIMM_USER : NVDIMM_MASTER); - } else if (i == OP_OVERWRITE) { - dev_dbg(dev, "overwrite %u\n", key); - if (atomic_read(&nvdimm->busy)) { - dev_dbg(dev, "Unable to overwrite while DIMM active.\n"); - return -EBUSY; - } - rc = nvdimm_security_overwrite(nvdimm, key); - } else - return -EINVAL; - - if (rc == 0) - rc = len; - return rc; -} - static ssize_t security_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -489,7 +407,7 @@ static ssize_t security_store(struct device *dev, nd_device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); - rc = __security_store(dev, buf, len); + rc = nvdimm_security_store(dev, buf, len); nvdimm_bus_unlock(dev); nd_device_unlock(dev); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index da2bbfd56d9f..454454ba1738 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -68,35 +68,11 @@ static inline unsigned long nvdimm_security_flags( } int nvdimm_security_freeze(struct nvdimm *nvdimm); #if IS_ENABLED(CONFIG_NVDIMM_KEYS) -int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid); -int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid, - unsigned int new_keyid, - enum nvdimm_passphrase_type pass_type); -int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, - enum nvdimm_passphrase_type pass_type); -int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid); +ssize_t nvdimm_security_store(struct device *dev, const char *buf, size_t len); void nvdimm_security_overwrite_query(struct work_struct *work); #else -static inline int nvdimm_security_disable(struct nvdimm *nvdimm, - unsigned int keyid) -{ - return -EOPNOTSUPP; -} -static inline int nvdimm_security_update(struct nvdimm *nvdimm, - unsigned int keyid, - unsigned int new_keyid, - enum nvdimm_passphrase_type pass_type) -{ - return -EOPNOTSUPP; -} -static inline int nvdimm_security_erase(struct nvdimm *nvdimm, - unsigned int keyid, - enum nvdimm_passphrase_type pass_type) -{ - return -EOPNOTSUPP; -} -static inline int nvdimm_security_overwrite(struct nvdimm *nvdimm, - unsigned int keyid) +static inline ssize_t nvdimm_security_store(struct device *dev, + const char *buf, size_t len) { return -EOPNOTSUPP; } diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index 2166e627383a..9e45b207ff01 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -235,7 +235,7 @@ static int check_security_state(struct nvdimm *nvdimm) return 0; } -int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid) +static int security_disable(struct nvdimm *nvdimm, unsigned int keyid) { struct device *dev = &nvdimm->dev; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); @@ -268,7 +268,7 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid) return rc; } -int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid, +static int security_update(struct nvdimm *nvdimm, unsigned int keyid, unsigned int new_keyid, enum nvdimm_passphrase_type pass_type) { @@ -318,7 +318,7 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid, return rc; } -int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, +static int security_erase(struct nvdimm *nvdimm, unsigned int keyid, enum nvdimm_passphrase_type pass_type) { struct device *dev = &nvdimm->dev; @@ -360,7 +360,7 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid, return rc; } -int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) +static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) { struct device *dev = &nvdimm->dev; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); @@ -465,3 +465,85 @@ void nvdimm_security_overwrite_query(struct work_struct *work) __nvdimm_security_overwrite_query(nvdimm); nvdimm_bus_unlock(&nvdimm->dev); } + +#define OPS \ + C( OP_FREEZE, "freeze", 1), \ + C( OP_DISABLE, "disable", 2), \ + C( OP_UPDATE, "update", 3), \ + C( OP_ERASE, "erase", 2), \ + C( OP_OVERWRITE, "overwrite", 2), \ + C( OP_MASTER_UPDATE, "master_update", 3), \ + C( OP_MASTER_ERASE, "master_erase", 2) +#undef C +#define C(a, b, c) a +enum nvdimmsec_op_ids { OPS }; +#undef C +#define C(a, b, c) { b, c } +static struct { + const char *name; + int args; +} ops[] = { OPS }; +#undef C + +#define SEC_CMD_SIZE 32 +#define KEY_ID_SIZE 10 + +ssize_t nvdimm_security_store(struct device *dev, const char *buf, size_t len) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + ssize_t rc; + char cmd[SEC_CMD_SIZE+1], keystr[KEY_ID_SIZE+1], + nkeystr[KEY_ID_SIZE+1]; + unsigned int key, newkey; + int i; + + rc = sscanf(buf, "%"__stringify(SEC_CMD_SIZE)"s" + " %"__stringify(KEY_ID_SIZE)"s" + " %"__stringify(KEY_ID_SIZE)"s", + cmd, keystr, nkeystr); + if (rc < 1) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(ops); i++) + if (sysfs_streq(cmd, ops[i].name)) + break; + if (i >= ARRAY_SIZE(ops)) + return -EINVAL; + if (ops[i].args > 1) + rc = kstrtouint(keystr, 0, &key); + if (rc >= 0 && ops[i].args > 2) + rc = kstrtouint(nkeystr, 0, &newkey); + if (rc < 0) + return rc; + + if (i == OP_FREEZE) { + dev_dbg(dev, "freeze\n"); + rc = nvdimm_security_freeze(nvdimm); + } else if (i == OP_DISABLE) { + dev_dbg(dev, "disable %u\n", key); + rc = security_disable(nvdimm, key); + } else if (i == OP_UPDATE || i == OP_MASTER_UPDATE) { + dev_dbg(dev, "%s %u %u\n", ops[i].name, key, newkey); + rc = security_update(nvdimm, key, newkey, i == OP_UPDATE + ? NVDIMM_USER : NVDIMM_MASTER); + } else if (i == OP_ERASE || i == OP_MASTER_ERASE) { + dev_dbg(dev, "%s %u\n", ops[i].name, key); + if (atomic_read(&nvdimm->busy)) { + dev_dbg(dev, "Unable to secure erase while DIMM active.\n"); + return -EBUSY; + } + rc = security_erase(nvdimm, key, i == OP_ERASE + ? NVDIMM_USER : NVDIMM_MASTER); + } else if (i == OP_OVERWRITE) { + dev_dbg(dev, "overwrite %u\n", key); + if (atomic_read(&nvdimm->busy)) { + dev_dbg(dev, "Unable to overwrite while DIMM active.\n"); + return -EBUSY; + } + rc = security_overwrite(nvdimm, key); + } else + return -EINVAL; + + if (rc == 0) + rc = len; + return rc; +} From a2d1c7a61db9b1e261410c7d9e2be2243040749b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 5 Sep 2019 21:15:57 +0530 Subject: [PATCH 07/12] libnvdimm/region: Rewrite _probe_success() to _advance_seeds() The nd_region_probe_success() helper collides seed management with nvdimm->busy tracking. Given the 'busy' increment is handled internal to the nd_region driver 'probe' path move the decrement to the 'remove' path. With that cleanup the routine can be renamed to the more descriptive nd_region_advance_seeds(). The change is prompted by an incoming need to optionally advance the seeds on other events besides 'probe' success. Cc: "Aneesh Kumar K.V" Signed-off-by: Dan Williams Signed-off-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/20190905154603.10349-2-aneesh.kumar@linux.ibm.com Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 7 +--- drivers/nvdimm/namespace_devs.c | 34 ++++++++++++++--- drivers/nvdimm/nd-core.h | 3 +- drivers/nvdimm/region_devs.c | 68 +++++---------------------------- 4 files changed, 41 insertions(+), 71 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 29479d3b01b0..ee6de34ae525 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -95,10 +95,8 @@ static int nvdimm_bus_probe(struct device *dev) rc = nd_drv->probe(dev); debug_nvdimm_unlock(dev); - if (rc == 0) - nd_region_probe_success(nvdimm_bus, dev); - else - nd_region_disable(nvdimm_bus, dev); + if (rc == 0 && dev->parent && is_nd_region(dev->parent)) + nd_region_advance_seeds(to_nd_region(dev->parent), dev); nvdimm_bus_probe_end(nvdimm_bus); dev_dbg(&nvdimm_bus->dev, "END: %s.probe(%s) = %d\n", dev->driver->name, @@ -121,7 +119,6 @@ static int nvdimm_bus_remove(struct device *dev) rc = nd_drv->remove(dev); debug_nvdimm_unlock(dev); } - nd_region_disable(nvdimm_bus, dev); dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name, dev_name(dev), rc); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index a16e52251a30..3be81f7b9ed3 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2462,6 +2462,27 @@ static struct device **create_namespaces(struct nd_region *nd_region) return devs; } +static void deactivate_labels(void *region) +{ + struct nd_region *nd_region = region; + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = nd_mapping->ndd; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + mutex_lock(&nd_mapping->lock); + nd_mapping_free_labels(nd_mapping); + mutex_unlock(&nd_mapping->lock); + + put_ndd(ndd); + nd_mapping->ndd = NULL; + if (ndd) + atomic_dec(&nvdimm->busy); + } +} + static int init_active_labels(struct nd_region *nd_region) { int i; @@ -2519,16 +2540,17 @@ static int init_active_labels(struct nd_region *nd_region) mutex_unlock(&nd_mapping->lock); } - if (j >= count) - continue; + if (j < count) + break; + } - mutex_lock(&nd_mapping->lock); - nd_mapping_free_labels(nd_mapping); - mutex_unlock(&nd_mapping->lock); + if (i < nd_region->ndr_mappings) { + deactivate_labels(nd_region); return -ENOMEM; } - return 0; + return devm_add_action_or_reset(&nd_region->dev, deactivate_labels, + nd_region); } int nd_region_register_namespaces(struct nd_region *nd_region, int *err) diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 454454ba1738..25fa121104d0 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -115,13 +115,12 @@ int __init nvdimm_bus_init(void); void nvdimm_bus_exit(void); void nvdimm_devs_exit(void); void nd_region_devs_exit(void); -void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev); struct nd_region; +void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev); void nd_region_create_ns_seed(struct nd_region *nd_region); void nd_region_create_btt_seed(struct nd_region *nd_region); void nd_region_create_pfn_seed(struct nd_region *nd_region); void nd_region_create_dax_seed(struct nd_region *nd_region); -void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev); int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); void nd_synchronize(void); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index b477a8dc0020..cd6da354eae4 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -715,85 +715,37 @@ void nd_mapping_free_labels(struct nd_mapping *nd_mapping) } /* - * Upon successful probe/remove, take/release a reference on the - * associated interleave set (if present), and plant new btt + namespace - * seeds. Also, on the removal of a BLK region, notify the provider to - * disable the region. + * When a namespace is activated create new seeds for the next + * namespace, or namespace-personality to be configured. */ -static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, - struct device *dev, bool probe) +void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev) { - struct nd_region *nd_region; - - if (!probe && is_nd_region(dev)) { - int i; - - nd_region = to_nd_region(dev); - for (i = 0; i < nd_region->ndr_mappings; i++) { - struct nd_mapping *nd_mapping = &nd_region->mapping[i]; - struct nvdimm_drvdata *ndd = nd_mapping->ndd; - struct nvdimm *nvdimm = nd_mapping->nvdimm; - - mutex_lock(&nd_mapping->lock); - nd_mapping_free_labels(nd_mapping); - mutex_unlock(&nd_mapping->lock); - - put_ndd(ndd); - nd_mapping->ndd = NULL; - if (ndd) - atomic_dec(&nvdimm->busy); - } - } - if (dev->parent && is_nd_region(dev->parent) && probe) { - nd_region = to_nd_region(dev->parent); - nvdimm_bus_lock(dev); - if (nd_region->ns_seed == dev) - nd_region_create_ns_seed(nd_region); - nvdimm_bus_unlock(dev); - } - if (is_nd_btt(dev) && probe) { + nvdimm_bus_lock(dev); + if (nd_region->ns_seed == dev) { + nd_region_create_ns_seed(nd_region); + } else if (is_nd_btt(dev)) { struct nd_btt *nd_btt = to_nd_btt(dev); - nd_region = to_nd_region(dev->parent); - nvdimm_bus_lock(dev); if (nd_region->btt_seed == dev) nd_region_create_btt_seed(nd_region); if (nd_region->ns_seed == &nd_btt->ndns->dev) nd_region_create_ns_seed(nd_region); - nvdimm_bus_unlock(dev); - } - if (is_nd_pfn(dev) && probe) { + } else if (is_nd_pfn(dev)) { struct nd_pfn *nd_pfn = to_nd_pfn(dev); - nd_region = to_nd_region(dev->parent); - nvdimm_bus_lock(dev); if (nd_region->pfn_seed == dev) nd_region_create_pfn_seed(nd_region); if (nd_region->ns_seed == &nd_pfn->ndns->dev) nd_region_create_ns_seed(nd_region); - nvdimm_bus_unlock(dev); - } - if (is_nd_dax(dev) && probe) { + } else if (is_nd_dax(dev)) { struct nd_dax *nd_dax = to_nd_dax(dev); - nd_region = to_nd_region(dev->parent); - nvdimm_bus_lock(dev); if (nd_region->dax_seed == dev) nd_region_create_dax_seed(nd_region); if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev) nd_region_create_ns_seed(nd_region); - nvdimm_bus_unlock(dev); } -} - -void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev) -{ - nd_region_notify_driver_action(nvdimm_bus, dev, true); -} - -void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev) -{ - nd_region_notify_driver_action(nvdimm_bus, dev, false); + nvdimm_bus_unlock(dev); } static ssize_t mappingN(struct device *dev, char *buf, int n) From 1c97afa714098aab2ca588cc654f8ff67dd46dcb Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Sep 2019 21:15:58 +0530 Subject: [PATCH 08/12] libnvdimm/pmem: Advance namespace seed for specific probe errors In order to support marking namespaces with unsupported feature/versions disabled, nvdimm core should advance the namespace seed on these probe failures. Otherwise, these failed namespaces will be considered a seed namespace and will be wrongly used while creating new namespaces. Add -EOPNOTSUPP as return from pmem probe callback to indicate a namespace initialization failures due to pfn superblock feature/version mismatch. Signed-off-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/20190905154603.10349-3-aneesh.kumar@linux.ibm.com Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 3 ++- drivers/nvdimm/pmem.c | 29 +++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index ee6de34ae525..75a58a6e9615 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -95,7 +95,8 @@ static int nvdimm_bus_probe(struct device *dev) rc = nd_drv->probe(dev); debug_nvdimm_unlock(dev); - if (rc == 0 && dev->parent && is_nd_region(dev->parent)) + if ((rc == 0 || rc == -EOPNOTSUPP) && + dev->parent && is_nd_region(dev->parent)) nd_region_advance_seeds(to_nd_region(dev->parent), dev); nvdimm_bus_probe_end(nvdimm_bus); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 4c121dd03dd9..f9f76f6ba07b 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -490,6 +490,7 @@ static int pmem_attach_disk(struct device *dev, static int nd_pmem_probe(struct device *dev) { + int ret; struct nd_namespace_common *ndns; ndns = nvdimm_namespace_common_probe(dev); @@ -505,12 +506,32 @@ static int nd_pmem_probe(struct device *dev) if (is_nd_pfn(dev)) return pmem_attach_disk(dev, ndns); - /* if we find a valid info-block we'll come back as that personality */ - if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0 - || nd_dax_probe(dev, ndns) == 0) + ret = nd_btt_probe(dev, ndns); + if (ret == 0) return -ENXIO; - /* ...otherwise we're just a raw pmem device */ + /* + * We have two failure conditions here, there is no + * info reserver block or we found a valid info reserve block + * but failed to initialize the pfn superblock. + * + * For the first case consider namespace as a raw pmem namespace + * and attach a disk. + * + * For the latter, consider this a success and advance the namespace + * seed. + */ + ret = nd_pfn_probe(dev, ndns); + if (ret == 0) + return -ENXIO; + else if (ret == -EOPNOTSUPP) + return ret; + + ret = nd_dax_probe(dev, ndns); + if (ret == 0) + return -ENXIO; + else if (ret == -EOPNOTSUPP) + return ret; return pmem_attach_disk(dev, ndns); } From e96f0bf2ec92da2bc9c11b0d69e9086f076e7f0b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Sep 2019 21:15:59 +0530 Subject: [PATCH 09/12] libnvdimm/pfn_dev: Add a build check to make sure we notice when struct page size change Namespaces created with PFN_MODE_PMEM mode stores struct page in the reserve block area. We need to make sure we account for the right struct page size while doing this. Instead of directly depending on sizeof(struct page) which can change based on different kernel config option, use the max struct page size (64) while calculating the reserve block area. This makes sure pmem device can be used across kernels built with different configs. If the above assumption of max struct page size change, we need to update the reserve block allocation space for new namespaces created. Signed-off-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/20190905154603.10349-4-aneesh.kumar@linux.ibm.com Signed-off-by: Dan Williams --- drivers/nvdimm/nd.h | 4 ++++ drivers/nvdimm/pfn_devs.c | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 1b9955651379..e89af4b2d8e9 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -375,6 +375,10 @@ unsigned int pmem_sector_size(struct nd_namespace_common *ndns); void nvdimm_badblocks_populate(struct nd_region *nd_region, struct badblocks *bb, const struct resource *res); #if IS_ENABLED(CONFIG_ND_CLAIM) + +/* max struct page size independent of kernel config */ +#define MAX_STRUCT_PAGE_SIZE 64 + int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap); int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 3e7b11cf1aae..cd120feb9213 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -701,8 +701,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * The altmap should be padded out to the block size used * when populating the vmemmap. This *should* be equal to * PMD_SIZE for most architectures. + * + * Also make sure size of struct page is less than 64. We + * want to make sure we use large enough size here so that + * we don't have a dynamic reserve space depending on + * struct page size. But we also want to make sure we notice + * when we end up adding new elements to struct page. */ - offset = ALIGN(start + SZ_8K + 64 * npfns, align) - start; + BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE); + offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align) + - start; } else if (nd_pfn->mode == PFN_MODE_RAM) offset = ALIGN(start + SZ_8K, align) - start; else From edbb52c24441ab5203b969eca759483cb533f36d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Sep 2019 21:16:00 +0530 Subject: [PATCH 10/12] libnvdimm/pfn_dev: Add page size and struct page size to pfn superblock This is needed so that pmem probe don't wrongly initialize a namespace which doesn't have enough space reserved for holding struct pages with the current kernel. Signed-off-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/20190905154603.10349-5-aneesh.kumar@linux.ibm.com Signed-off-by: Dan Williams --- drivers/nvdimm/pfn.h | 5 ++++- drivers/nvdimm/pfn_devs.c | 25 ++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index 7381673b7b70..acb19517f678 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -29,7 +29,10 @@ struct nd_pfn_sb { /* minor-version-2 record the base alignment of the mapping */ __le32 align; /* minor-version-3 guarantee the padding and flags are zero */ - u8 padding[4000]; + /* minor-version-4 record the page size and struct page size */ + __le32 page_size; + __le16 page_struct_size; + u8 padding[3994]; __le64 checksum; }; diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index cd120feb9213..ce9ef18282dd 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -460,6 +460,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) if (__le16_to_cpu(pfn_sb->version_minor) < 2) pfn_sb->align = 0; + if (__le16_to_cpu(pfn_sb->version_minor) < 4) { + pfn_sb->page_struct_size = cpu_to_le16(64); + pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); + } + switch (le32_to_cpu(pfn_sb->mode)) { case PFN_MODE_RAM: case PFN_MODE_PMEM: @@ -475,6 +480,22 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) align = 1UL << ilog2(offset); mode = le32_to_cpu(pfn_sb->mode); + if ((le32_to_cpu(pfn_sb->page_size) > PAGE_SIZE) && + (mode == PFN_MODE_PMEM)) { + dev_err(&nd_pfn->dev, + "init failed, page size mismatch %d\n", + le32_to_cpu(pfn_sb->page_size)); + return -EOPNOTSUPP; + } + + if ((le16_to_cpu(pfn_sb->page_struct_size) < sizeof(struct page)) && + (mode == PFN_MODE_PMEM)) { + dev_err(&nd_pfn->dev, + "init failed, struct page size mismatch %d\n", + le16_to_cpu(pfn_sb->page_struct_size)); + return -EOPNOTSUPP; + } + if (!nd_pfn->uuid) { /* * When probing a namepace via nd_pfn_probe() the uuid @@ -730,8 +751,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); - pfn_sb->version_minor = cpu_to_le16(3); + pfn_sb->version_minor = cpu_to_le16(4); pfn_sb->align = cpu_to_le32(nd_pfn->align); + pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE); + pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); From 047e0eff1f150220d615c35c36af225f3e1ed057 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Sep 2019 21:16:01 +0530 Subject: [PATCH 11/12] libnvdimm/label: Remove the dpa align check There's no strict requirement why slot_valid() needs to check for page alignment and it would seem to actively hurt cross-page-size compatibility. Let's delete the check and rely on checksum validation. Signed-off-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/20190905154603.10349-6-aneesh.kumar@linux.ibm.com Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 73e197babc2f..47a4828b8b31 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -353,11 +353,6 @@ static bool slot_valid(struct nvdimm_drvdata *ndd, if (slot != __le32_to_cpu(nd_label->slot)) return false; - /* check that DPA allocations are page aligned */ - if ((__le64_to_cpu(nd_label->dpa) - | __le64_to_cpu(nd_label->rawsize)) % SZ_4K) - return false; - /* check checksum */ if (namespace_label_has(ndd, checksum)) { u64 sum, sum_save; From 5b26db95fee3f1ce0d096b2de0ac6f3716171093 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Sep 2019 21:16:02 +0530 Subject: [PATCH 12/12] libnvdimm: Use PAGE_SIZE instead of SZ_4K for align check Architectures have different page size than 4K. Use the PAGE_SIZE to make sure ranges are correctly aligned. Signed-off-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/20190905154603.10349-7-aneesh.kumar@linux.ibm.com Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 6 +++--- drivers/nvdimm/region_devs.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 3be81f7b9ed3..43401325c874 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1006,10 +1006,10 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) return -ENXIO; } - div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder); + div_u64_rem(val, PAGE_SIZE * nd_region->ndr_mappings, &remainder); if (remainder) { - dev_dbg(dev, "%llu is not %dK aligned\n", val, - (SZ_4K * nd_region->ndr_mappings) / SZ_1K); + dev_dbg(dev, "%llu is not %ldK aligned\n", val, + (PAGE_SIZE * nd_region->ndr_mappings) / SZ_1K); return -EINVAL; } diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index cd6da354eae4..3fd6b59abd33 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -944,10 +944,10 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_mapping_desc *mapping = &ndr_desc->mapping[i]; struct nvdimm *nvdimm = mapping->nvdimm; - if ((mapping->start | mapping->size) % SZ_4K) { - dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n", - caller, dev_name(&nvdimm->dev), i); - + if ((mapping->start | mapping->size) % PAGE_SIZE) { + dev_err(&nvdimm_bus->dev, + "%s: %s mapping%d is not %ld aligned\n", + caller, dev_name(&nvdimm->dev), i, PAGE_SIZE); return NULL; }