From 2d5153526f929838b0912ded26862840f72745f4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 May 2023 14:55:09 +0200 Subject: [PATCH 01/13] dax: fix missing-prototype warnings dev_dax_probe declaration for this function was removed with the only caller outside of device.c. Mark it static to avoid a W=1 warning: drivers/dax/device.c:399:5: error: no previous prototype for 'dev_dax_probe' Similarly, run_dax() causes a warning, but this one is because the declaration needs to be included: drivers/dax/super.c:337:6: error: no previous prototype for 'run_dax' Fixes: 83762cb5c7c4 ("dax: Kill DEV_DAX_PMEM_COMPAT") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230517125532.931157-1-arnd@kernel.org Signed-off-by: Dan Williams --- drivers/dax/bus.h | 7 ------- drivers/dax/dax-private.h | 7 +++++++ drivers/dax/device.c | 3 +-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index 8cd79ab34292..43f490e9ce65 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -49,13 +49,6 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv); void kill_dev_dax(struct dev_dax *dev_dax); bool static_dev_dax(struct dev_dax *dev_dax); -/* - * While run_dax() is potentially a generic operation that could be - * defined in include/linux/dax.h we don't want to grow any users - * outside of drivers/dax/ - */ -void run_dax(struct dax_device *dax_dev); - #define MODULE_ALIAS_DAX_DEVICE(type) \ MODULE_ALIAS("dax:t" __stringify(type) "*") #define DAX_DEVICE_MODALIAS_FMT "dax:t%d" diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index 1c974b7caae6..db032680d941 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -76,6 +76,13 @@ struct dev_dax { } *ranges; }; +/* + * While run_dax() is potentially a generic operation that could be + * defined in include/linux/dax.h we don't want to grow any users + * outside of drivers/dax/ + */ +void run_dax(struct dax_device *dax_dev); + static inline struct dev_dax *to_dev_dax(struct device *dev) { return container_of(dev, struct dev_dax, dev); diff --git a/drivers/dax/device.c b/drivers/dax/device.c index af9930c03c9c..30665a3ff6ea 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -396,7 +396,7 @@ static void dev_dax_kill(void *dev_dax) kill_dev_dax(dev_dax); } -int dev_dax_probe(struct dev_dax *dev_dax) +static int dev_dax_probe(struct dev_dax *dev_dax) { struct dax_device *dax_dev = dev_dax->dax_dev; struct device *dev = &dev_dax->dev; @@ -471,7 +471,6 @@ int dev_dax_probe(struct dev_dax *dev_dax) run_dax(dax_dev); return devm_add_action_or_reset(dev, dev_dax_kill, dev_dax); } -EXPORT_SYMBOL_GPL(dev_dax_probe); static struct dax_device_driver device_dax_driver = { .probe = dev_dax_probe, From 9e46e541cf19fffbbbd9c6ef393446e7d5ef8c39 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 22:14:08 +0200 Subject: [PATCH 02/13] testing: nvdimm: add missing prototypes for wrapped functions The nvdimm test wraps a number of API functions, but these functions don't have a prototype in a header because they are all called by a different name: drivers/nvdimm/../../tools/testing/nvdimm/test/iomap.c:74:15: error: no previous prototype for '__wrap_devm_ioremap' [-Werror=missing-prototypes] 74 | void __iomem *__wrap_devm_ioremap(struct device *dev, | ^~~~~~~~~~~~~~~~~~~ drivers/nvdimm/../../tools/testing/nvdimm/test/iomap.c:86:7: error: no previous prototype for '__wrap_devm_memremap' [-Werror=missing-prototypes] 86 | void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, | ^~~~~~~~~~~~~~~~~~~~ ... Add prototypes to avoid the warning. Signed-off-by: Arnd Bergmann Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230516201415.556858-2-arnd@kernel.org Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit_test.h | 29 +++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index b5f7a996c4d0..b00583d1eace 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -207,7 +207,36 @@ typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4); +void __iomem *__wrap_devm_ioremap(struct device *dev, + resource_size_t offset, unsigned long size); +void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags); +void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); +pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags); +void *__wrap_memremap(resource_size_t offset, size_t size, + unsigned long flags); +void __wrap_devm_memunmap(struct device *dev, void *addr); +void __iomem *__wrap_ioremap(resource_size_t offset, unsigned long size); +void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size); void __wrap_iounmap(volatile void __iomem *addr); +void __wrap_memunmap(void *addr); +struct resource *__wrap___request_region(struct resource *parent, + resource_size_t start, resource_size_t n, const char *name, + int flags); +int __wrap_insert_resource(struct resource *parent, struct resource *res); +int __wrap_remove_resource(struct resource *res); +struct resource *__wrap___devm_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name); +void __wrap___release_region(struct resource *parent, resource_size_t start, + resource_size_t n); +void __wrap___devm_release_region(struct device *dev, struct resource *parent, + resource_size_t start, resource_size_t n); +acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path, + struct acpi_object_list *p, struct acpi_buffer *buf); +union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, + u64 rev, u64 func, union acpi_object *argv4); + void nfit_test_setup(nfit_test_lookup_fn lookup, nfit_test_evaluate_dsm_fn evaluate); void nfit_test_teardown(void); From 7f80ab365a1d10cb143c897954199c760272c338 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 22:14:09 +0200 Subject: [PATCH 03/13] libnvdimm: mark 'security_show' static again The security_show() function was made global and __weak at some point to allow overriding it. The override was removed later, but it remains global, which causes a warning about the missing declaration: drivers/nvdimm/dimm_devs.c:352:9: error: no previous prototype for 'security_show' This is also not an appropriate name for a global symbol in the kernel, so just make it static again. Fixes: 15a8348707ff ("libnvdimm: Introduce CONFIG_NVDIMM_SECURITY_TEST flag") Signed-off-by: Arnd Bergmann Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230516201415.556858-3-arnd@kernel.org Signed-off-by: Dan Williams --- drivers/nvdimm/dimm_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 957f7c3d17ba..10c3cb6a574a 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -349,7 +349,7 @@ static ssize_t available_slots_show(struct device *dev, } static DEVICE_ATTR_RO(available_slots); -ssize_t security_show(struct device *dev, +static ssize_t security_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm *nvdimm = to_nvdimm(dev); From e98d14fa7315867fded127a98db355f49807dfdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 6 Jun 2023 20:26:00 -0700 Subject: [PATCH 04/13] tools/testing/nvdimm: Drop empty platform remove function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A remove callback just returning 0 is equivalent to no remove callback at all. So drop the useless function. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221213100512.599548-1-u.kleine-koenig@pengutronix.de Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index e4e2d1650dd5..005043bd9623 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -3240,11 +3240,6 @@ static int nfit_test_probe(struct platform_device *pdev) return 0; } -static int nfit_test_remove(struct platform_device *pdev) -{ - return 0; -} - static void nfit_test_release(struct device *dev) { struct nfit_test *nfit_test = to_nfit_test(dev); @@ -3259,7 +3254,6 @@ static const struct platform_device_id nfit_test_id[] = { static struct platform_driver nfit_test_driver = { .probe = nfit_test_probe, - .remove = nfit_test_remove, .driver = { .name = KBUILD_MODNAME, }, From 6d24b170a9db0456f577b1ab01226a2254c016a8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 2 Jun 2023 23:13:54 -0700 Subject: [PATCH 05/13] dax: Fix dax_mapping_release() use after free A CONFIG_DEBUG_KOBJECT_RELEASE test of removing a device-dax region provider (like modprobe -r dax_hmem) yields: kobject: 'mapping0' (ffff93eb460e8800): kobject_release, parent 0000000000000000 (delayed 2000) [..] DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 23 PID: 282 at kernel/locking/lockdep.c:232 __lock_acquire+0x9fc/0x2260 [..] RIP: 0010:__lock_acquire+0x9fc/0x2260 [..] Call Trace: [..] lock_acquire+0xd4/0x2c0 ? ida_free+0x62/0x130 _raw_spin_lock_irqsave+0x47/0x70 ? ida_free+0x62/0x130 ida_free+0x62/0x130 dax_mapping_release+0x1f/0x30 device_release+0x36/0x90 kobject_delayed_cleanup+0x46/0x150 Due to attempting ida_free() on an ida object that has already been freed. Devices typically only hold a reference on their parent while registered. If a child needs a parent object to complete its release it needs to hold a reference that it drops from its release callback. Arrange for a dax_mapping to pin its parent dev_dax instance until dax_mapping_release(). Fixes: 0b07ce872a9e ("device-dax: introduce 'mapping' devices") Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/168577283412.1672036.16111545266174261446.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma --- drivers/dax/bus.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 227800053309..aee695f86b44 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -635,10 +635,12 @@ EXPORT_SYMBOL_GPL(alloc_dax_region); static void dax_mapping_release(struct device *dev) { struct dax_mapping *mapping = to_dax_mapping(dev); - struct dev_dax *dev_dax = to_dev_dax(dev->parent); + struct device *parent = dev->parent; + struct dev_dax *dev_dax = to_dev_dax(parent); ida_free(&dev_dax->ida, mapping->id); kfree(mapping); + put_device(parent); } static void unregister_dax_mapping(void *data) @@ -778,6 +780,7 @@ static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id) dev = &mapping->dev; device_initialize(dev); dev->parent = &dev_dax->dev; + get_device(dev->parent); dev->type = &dax_mapping_type; dev_set_name(dev, "mapping%d", mapping->id); rc = device_add(dev); From 82b4ceeccb89cfd0b03706f1b15e31a7db6a027d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 2 Jun 2023 23:13:59 -0700 Subject: [PATCH 06/13] dax: Use device_unregister() in unregister_dax_mapping() Replace an open-coded device_unregister() sequence with the helper. Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/168577283989.1672036.7777592498865470652.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma --- drivers/dax/bus.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index aee695f86b44..c99ea08aafc3 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -657,8 +657,7 @@ static void unregister_dax_mapping(void *data) dev_dax->ranges[mapping->range_id].mapping = NULL; mapping->range_id = -1; - device_del(dev); - put_device(dev); + device_unregister(dev); } static struct dev_dax_range *get_dax_range(struct device *dev) From 70aab281e18c68a1284bc387de127c2fc0bed3f8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 2 Jun 2023 23:14:05 -0700 Subject: [PATCH 07/13] dax: Introduce alloc_dev_dax_id() The reference counting of dax_region objects is needlessly complicated, has lead to confusion [1], and has hidden a bug [2]. Towards cleaning up that mess introduce alloc_dev_dax_id() to minimize the holding of a dax_region reference to only what dev_dax_release() needs, the dax_region->ida. Part of the reason for the mess was the design to dereference a dax_region in all cases in free_dev_dax_id() even if the id was statically assigned by the upper level dax_region driver. Remove the need to call "is_static(dax_region)" by tracking whether the id is dynamic directly in the dev_dax instance itself. With that flag the dax_region pinning and release per dev_dax instance can move to alloc_dev_dax_id() and free_dev_dax_id() respectively. A follow-on cleanup address the unnecessary references in the dax_region setup and drivers. Fixes: 0f3da14a4f05 ("device-dax: introduce 'seed' devices") Link: http://lore.kernel.org/r/20221203095858.612027-1-liuyongqiang13@huawei.com [1] Link: http://lore.kernel.org/r/3cf0890b-4eb0-e70e-cd9c-2ecc3d496263@hpe.com [2] Reported-by: Yongqiang Liu Reported-by: Paul Cassella Reported-by: Ira Weiny Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/168577284563.1672036.13493034988900989554.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma --- drivers/dax/bus.c | 56 ++++++++++++++++++++++++--------------- drivers/dax/dax-private.h | 4 ++- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index c99ea08aafc3..a4cc3eca774f 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -446,18 +446,34 @@ static void unregister_dev_dax(void *dev) put_device(dev); } +static void dax_region_free(struct kref *kref) +{ + struct dax_region *dax_region; + + dax_region = container_of(kref, struct dax_region, kref); + kfree(dax_region); +} + +void dax_region_put(struct dax_region *dax_region) +{ + kref_put(&dax_region->kref, dax_region_free); +} +EXPORT_SYMBOL_GPL(dax_region_put); + /* a return value >= 0 indicates this invocation invalidated the id */ static int __free_dev_dax_id(struct dev_dax *dev_dax) { - struct dax_region *dax_region = dev_dax->region; struct device *dev = &dev_dax->dev; + struct dax_region *dax_region; int rc = dev_dax->id; device_lock_assert(dev); - if (is_static(dax_region) || dev_dax->id < 0) + if (!dev_dax->dyn_id || dev_dax->id < 0) return -1; + dax_region = dev_dax->region; ida_free(&dax_region->ida, dev_dax->id); + dax_region_put(dax_region); dev_dax->id = -1; return rc; } @@ -473,6 +489,20 @@ static int free_dev_dax_id(struct dev_dax *dev_dax) return rc; } +static int alloc_dev_dax_id(struct dev_dax *dev_dax) +{ + struct dax_region *dax_region = dev_dax->region; + int id; + + id = ida_alloc(&dax_region->ida, GFP_KERNEL); + if (id < 0) + return id; + kref_get(&dax_region->kref); + dev_dax->dyn_id = true; + dev_dax->id = id; + return id; +} + static ssize_t delete_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -560,20 +590,6 @@ static const struct attribute_group *dax_region_attribute_groups[] = { NULL, }; -static void dax_region_free(struct kref *kref) -{ - struct dax_region *dax_region; - - dax_region = container_of(kref, struct dax_region, kref); - kfree(dax_region); -} - -void dax_region_put(struct dax_region *dax_region) -{ - kref_put(&dax_region->kref, dax_region_free); -} -EXPORT_SYMBOL_GPL(dax_region_put); - static void dax_region_unregister(void *region) { struct dax_region *dax_region = region; @@ -1297,12 +1313,10 @@ static const struct attribute_group *dax_attribute_groups[] = { static void dev_dax_release(struct device *dev) { struct dev_dax *dev_dax = to_dev_dax(dev); - struct dax_region *dax_region = dev_dax->region; struct dax_device *dax_dev = dev_dax->dax_dev; put_dax(dax_dev); free_dev_dax_id(dev_dax); - dax_region_put(dax_region); kfree(dev_dax->pgmap); kfree(dev_dax); } @@ -1326,6 +1340,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) if (!dev_dax) return ERR_PTR(-ENOMEM); + dev_dax->region = dax_region; if (is_static(dax_region)) { if (dev_WARN_ONCE(parent, data->id < 0, "dynamic id specified to static region\n")) { @@ -1341,13 +1356,11 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) goto err_id; } - rc = ida_alloc(&dax_region->ida, GFP_KERNEL); + rc = alloc_dev_dax_id(dev_dax); if (rc < 0) goto err_id; - dev_dax->id = rc; } - dev_dax->region = dax_region; dev = &dev_dax->dev; device_initialize(dev); dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); @@ -1388,7 +1401,6 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) dev_dax->target_node = dax_region->target_node; dev_dax->align = dax_region->align; ida_init(&dev_dax->ida); - kref_get(&dax_region->kref); inode = dax_inode(dax_dev); dev->devt = inode->i_rdev; diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index db032680d941..27cf2daaaa79 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -52,7 +52,8 @@ struct dax_mapping { * @region - parent region * @dax_dev - core dax functionality * @target_node: effective numa node if dev_dax memory range is onlined - * @id: ida allocated id + * @dyn_id: is this a dynamic or statically created instance + * @id: ida allocated id when the dax_region is not static * @ida: mapping id allocator * @dev - device core * @pgmap - pgmap for memmap setup / lifetime (driver owned) @@ -64,6 +65,7 @@ struct dev_dax { struct dax_device *dax_dev; unsigned int align; int target_node; + bool dyn_id; int id; struct ida ida; struct device dev; From 2532f41607c4308733239dd43278f8a5540f3ec7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 2 Jun 2023 23:14:11 -0700 Subject: [PATCH 08/13] dax: Cleanup extra dax_region references Now that free_dev_dax_id() internally manages the references it needs the extra references taken by the dax_region drivers are not needed. Reported-by: Ira Weiny Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/168577285161.1672036.8111253437794419696.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma --- drivers/dax/bus.c | 4 +--- drivers/dax/bus.h | 1 - drivers/dax/cxl.c | 8 +------- drivers/dax/hmem/hmem.c | 8 +------- drivers/dax/pmem.c | 7 +------ 5 files changed, 4 insertions(+), 24 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index a4cc3eca774f..0ee96e6fc426 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -454,11 +454,10 @@ static void dax_region_free(struct kref *kref) kfree(dax_region); } -void dax_region_put(struct dax_region *dax_region) +static void dax_region_put(struct dax_region *dax_region) { kref_put(&dax_region->kref, dax_region_free); } -EXPORT_SYMBOL_GPL(dax_region_put); /* a return value >= 0 indicates this invocation invalidated the id */ static int __free_dev_dax_id(struct dev_dax *dev_dax) @@ -641,7 +640,6 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, return NULL; } - kref_get(&dax_region->kref); if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) return NULL; return dax_region; diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index 43f490e9ce65..1ccd23360124 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -9,7 +9,6 @@ struct dev_dax; struct resource; struct dax_device; struct dax_region; -void dax_region_put(struct dax_region *dax_region); /* dax bus specific ioresource flags */ #define IORESOURCE_DAX_STATIC BIT(0) diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c index ccdf8de85bd5..8bc9d04034d6 100644 --- a/drivers/dax/cxl.c +++ b/drivers/dax/cxl.c @@ -13,7 +13,6 @@ static int cxl_dax_region_probe(struct device *dev) struct cxl_region *cxlr = cxlr_dax->cxlr; struct dax_region *dax_region; struct dev_dax_data data; - struct dev_dax *dev_dax; if (nid == NUMA_NO_NODE) nid = memory_add_physaddr_to_nid(cxlr_dax->hpa_range.start); @@ -28,13 +27,8 @@ static int cxl_dax_region_probe(struct device *dev) .id = -1, .size = range_len(&cxlr_dax->hpa_range), }; - dev_dax = devm_create_dev_dax(&data); - if (IS_ERR(dev_dax)) - return PTR_ERR(dev_dax); - /* child dev_dax instances now own the lifetime of the dax_region */ - dax_region_put(dax_region); - return 0; + return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data)); } static struct cxl_driver cxl_dax_region_driver = { diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index e5fe8b39fb94..5d2ddef0f8f5 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -16,7 +16,6 @@ static int dax_hmem_probe(struct platform_device *pdev) struct dax_region *dax_region; struct memregion_info *mri; struct dev_dax_data data; - struct dev_dax *dev_dax; /* * @region_idle == true indicates that an administrative agent @@ -38,13 +37,8 @@ static int dax_hmem_probe(struct platform_device *pdev) .id = -1, .size = region_idle ? 0 : range_len(&mri->range), }; - dev_dax = devm_create_dev_dax(&data); - if (IS_ERR(dev_dax)) - return PTR_ERR(dev_dax); - /* child dev_dax instances now own the lifetime of the dax_region */ - dax_region_put(dax_region); - return 0; + return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data)); } static struct platform_driver dax_hmem_driver = { diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index f050ea78bb83..ae0cb113a5d3 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -13,7 +13,6 @@ static struct dev_dax *__dax_pmem_probe(struct device *dev) int rc, id, region_id; resource_size_t offset; struct nd_pfn_sb *pfn_sb; - struct dev_dax *dev_dax; struct dev_dax_data data; struct nd_namespace_io *nsio; struct dax_region *dax_region; @@ -65,12 +64,8 @@ static struct dev_dax *__dax_pmem_probe(struct device *dev) .pgmap = &pgmap, .size = range_len(&range), }; - dev_dax = devm_create_dev_dax(&data); - /* child dev_dax instances now own the lifetime of the dax_region */ - dax_region_put(dax_region); - - return dev_dax; + return devm_create_dev_dax(&data); } static int dax_pmem_probe(struct device *dev) From dd0c64258a9d9e74b4896f05c7e77fa3365b5f12 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Jun 2023 14:02:56 +0100 Subject: [PATCH 09/13] fsdax: remove redundant variable 'error' The variable 'error' is being assigned a value that is never read, the assignment and the variable and redundant and can be removed. Cleans up clang scan build warning: fs/dax.c:1880:10: warning: Although the value stored to 'error' is used in the enclosing expression, the value is never actually read from 'error' [deadcode.DeadStores] Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20230621130256.2676126-1-colin.i.king@gmail.com Reviewed-by: Jan Kara Signed-off-by: Vishal Verma --- fs/dax.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 2ababb89918d..cb36c6746fc4 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1830,7 +1830,6 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, vm_fault_t ret = VM_FAULT_FALLBACK; pgoff_t max_pgoff; void *entry; - int error; if (vmf->flags & FAULT_FLAG_WRITE) iter.flags |= IOMAP_WRITE; @@ -1877,7 +1876,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, } iter.pos = (loff_t)xas.xa_index << PAGE_SHIFT; - while ((error = iomap_iter(&iter, ops)) > 0) { + while (iomap_iter(&iter, ops) > 0) { if (iomap_length(&iter) < PMD_SIZE) continue; /* actually breaks out of the loop */ From 46e66dab8565f742374e9cc4ff7d35f344d774e2 Mon Sep 17 00:00:00 2001 From: Tarun Sahu Date: Wed, 21 Jun 2023 21:20:25 +0530 Subject: [PATCH 10/13] dax/kmem: Pass valid argument to memory_group_register_static memory_group_register_static takes maximum number of pages as the argument while dev_dax_kmem_probe passes total_len (in bytes) as the argument. IIUC, I don't see any crash/panic impact as such. As, memory_group_register_static just set the max_pages limit which is used in auto_movable_zone_for_pfn to determine the zone. which might cause these condition to behave differently, This will be true always so jump will happen to kernel_zone ... if (!auto_movable_can_online_movable(NUMA_NO_NODE, group, nr_pages)) goto kernel_zone; ... kernel_zone: return default_kernel_zone_for_pfn(nid, pfn, nr_pages); Here, In below, zone_intersects compare range will be larger as nr_pages will be higher (derived from total_len passed in dev_dax_kmem_probe). ... static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn, unsigned long nr_pages) { struct pglist_data *pgdat = NODE_DATA(nid); int zid; for (zid = 0; zid < ZONE_NORMAL; zid++) { struct zone *zone = &pgdat->node_zones[zid]; if (zone_intersects(zone, start_pfn, nr_pages)) return zone; } return &pgdat->node_zones[ZONE_NORMAL]; } Incorrect zone will be returned here, which in later time might cause bigger problem. Fixes: eedf634aac3b ("dax/kmem: use a single static memory group for a single probed unit") Signed-off-by: Tarun Sahu Link: https://lore.kernel.org/r/20230621155025.370672-1-tsahu@linux.ibm.com Reviewed-by: Vishal Verma Signed-off-by: Vishal Verma --- drivers/dax/kmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index 7b36db6f1cbd..898ca9505754 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -99,7 +99,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) if (!data->res_name) goto err_res_name; - rc = memory_group_register_static(numa_node, total_len); + rc = memory_group_register_static(numa_node, PFN_UP(total_len)); if (rc < 0) goto err_reg_mgid; data->mgid = rc; From 191a9f3a611175b3e8e8c9e700fb8bce12ad7aa3 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 16 Jun 2023 17:06:28 +0100 Subject: [PATCH 11/13] nvdimm: make nd_class variable static The nd_class is not used outside of drivers/nvdimm/bus.c and thus sparse is generating the following warning. Remove this by making it static: drivers/nvdimm/bus.c:28:14: warning: symbol 'nd_class' was not declared. Should it be static? Signed-off-by: Ben Dooks Link: https://lore.kernel.org/r/20230616160628.11801-1-ben.dooks@codethink.co.uk Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma --- drivers/nvdimm/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 954dbc105fc8..5852fe290523 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -25,7 +25,7 @@ int nvdimm_major; static int nvdimm_bus_major; -struct class *nd_class; +static struct class *nd_class; static DEFINE_IDA(nd_ida); static int to_nd_device_type(const struct device *dev) From 0e796e3eafc5ba450a1f479f22aef4c8a4c7d686 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 16 Jun 2023 17:09:25 +0100 Subject: [PATCH 12/13] nvdimm: make security_show static The security_show function is not used outside of drivers/nvdimm/dimm_devs.c and the attribute it is for is also already static. Silence the sparse warning for this not being declared by making it static. Fixes: drivers/nvdimm/dimm_devs.c:352:9: warning: symbol 'security_show' was not declared. Should it be static? Signed-off-by: Ben Dooks Link: https://lore.kernel.org/r/20230616160925.17687-1-ben.dooks@codethink.co.uk Reviewed-by: Dave Jiang Signed-off-by: Vishal Verma --- drivers/nvdimm/dimm_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 10c3cb6a574a..1273873582be 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -350,7 +350,7 @@ static ssize_t available_slots_show(struct device *dev, static DEVICE_ATTR_RO(available_slots); static ssize_t security_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct nvdimm *nvdimm = to_nvdimm(dev); From 1ea7ca1b090145519aad998679222f0a14ab8fce Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Thu, 15 Jun 2023 12:13:25 -0600 Subject: [PATCH 13/13] dax: enable dax fault handler to report VM_FAULT_HWPOISON When multiple processes mmap() a dax file, then at some point, a process issues a 'load' and consumes a hwpoison, the process receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb set for the poison scope. Soon after, any other process issues a 'load' to the poisoned page (that is unmapped from the kernel side by memory_failure), it receives a SIGBUS with si_code = BUS_ADRERR and without valid si_lsb. This is confusing to user, and is different from page fault due to poison in RAM memory, also some helpful information is lost. Channel dax backend driver's poison detection to the filesystem such that instead of reporting VM_FAULT_SIGBUS, it could report VM_FAULT_HWPOISON. If user level block IO syscalls fail due to poison, the errno will be converted to EIO to maintain block API consistency. Signed-off-by: Jane Chu Link: https://lore.kernel.org/r/20230615181325.1327259-2-jane.chu@oracle.com Reviewed-by: Dan Williams Signed-off-by: Vishal Verma --- drivers/dax/super.c | 5 ++++- drivers/nvdimm/pmem.c | 2 +- drivers/s390/block/dcssblk.c | 3 ++- fs/dax.c | 11 ++++++----- fs/fuse/virtio_fs.c | 3 ++- include/linux/dax.h | 13 +++++++++++++ include/linux/mm.h | 2 ++ 7 files changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index c4c4728a36e4..0da9232ea175 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -203,6 +203,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages) { + int ret; + if (!dax_alive(dax_dev)) return -ENXIO; /* @@ -213,7 +215,8 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, if (nr_pages != 1) return -EIO; - return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages); + ret = dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages); + return dax_mem2blk_err(ret); } EXPORT_SYMBOL_GPL(dax_zero_page_range); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index ceea55f621cc..46e094e56159 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, long actual_nr; if (mode != DAX_RECOVERY_WRITE) - return -EIO; + return -EHWPOISON; /* * Set the recovery stride is set to kernel page size because diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index c09f2e053bf8..ee47ac520cd4 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -54,7 +54,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, NULL); if (rc < 0) - return rc; + return dax_mem2blk_err(rc); + memset(kaddr, 0, nr_pages << PAGE_SHIFT); dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); return 0; diff --git a/fs/dax.c b/fs/dax.c index cb36c6746fc4..906ecbd541a3 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size, if (!zero_edge) { ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL); if (ret) - return ret; + return dax_mem2blk_err(ret); } if (copy_all) { @@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter) out_unlock: dax_read_unlock(id); - return ret; + return dax_mem2blk_err(ret); } int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, @@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size) ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, &kaddr, NULL); if (ret < 0) - return ret; + return dax_mem2blk_err(ret); + memset(kaddr + offset, 0, size); if (iomap->flags & IOMAP_F_SHARED) ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap, @@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), DAX_ACCESS, &kaddr, NULL); - if (map_len == -EIO && iov_iter_rw(iter) == WRITE) { + if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) { map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), DAX_RECOVERY_WRITE, &kaddr, NULL); @@ -1506,7 +1507,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, recovery = true; } if (map_len < 0) { - ret = map_len; + ret = dax_mem2blk_err(map_len); break; } diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 4d8d4f16c727..5f1be1da92ce 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -775,7 +775,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev, rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, NULL); if (rc < 0) - return rc; + return dax_mem2blk_err(rc); + memset(kaddr, 0, nr_pages << PAGE_SHIFT); dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); return 0; diff --git a/include/linux/dax.h b/include/linux/dax.h index bf6258472e49..261944ec0887 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -261,6 +261,19 @@ static inline bool dax_mapping(struct address_space *mapping) return mapping->host && IS_DAX(mapping->host); } +/* + * Due to dax's memory and block duo personalities, hwpoison reporting + * takes into consideration which personality is presently visible. + * When dax acts like a block device, such as in block IO, an encounter of + * dax hwpoison is reported as -EIO. + * When dax acts like memory, such as in page fault, a detection of hwpoison + * is reported as -EHWPOISON which leads to VM_FAULT_HWPOISON. + */ +static inline int dax_mem2blk_err(int err) +{ + return (err == -EHWPOISON) ? -EIO : err; +} + #ifdef CONFIG_DEV_DAX_HMEM_DEVICES void hmem_register_resource(int target_nid, struct resource *r); #else diff --git a/include/linux/mm.h b/include/linux/mm.h index 27ce77080c79..052ac9317365 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3342,6 +3342,8 @@ static inline vm_fault_t vmf_error(int err) { if (err == -ENOMEM) return VM_FAULT_OOM; + else if (err == -EHWPOISON) + return VM_FAULT_HWPOISON; return VM_FAULT_SIGBUS; }