From 26c22cfde5dd6e63f25c48458b0185dcb0fbb2fd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 16 Jul 2021 15:39:12 -0300 Subject: [PATCH 01/37] vfio: Use config not menuconfig for VFIO_NOIOMMU VFIO_NOIOMMU is supposed to be an element in the VFIO menu, not start a new menu. Correct this copy-paste mistake. Fixes: 03a76b60f8ba ("vfio: Include No-IOMMU mode") Signed-off-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/0-v1-3f0b685c3679+478-vfio_menuconfig_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 67d0bf4efa16..e44bf736e2b2 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -29,7 +29,7 @@ menuconfig VFIO If you don't know what to do here, say N. -menuconfig VFIO_NOIOMMU +config VFIO_NOIOMMU bool "VFIO No-IOMMU support" depends on VFIO help From e7500b3ede2c66380a7e9faa6a81e6df2f8e4e55 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 21 Jul 2021 10:05:48 -0300 Subject: [PATCH 02/37] vfio/pci: Make vfio_pci_regops->rw() return ssize_t The only implementation of this in IGD returns a -ERRNO which is implicitly cast through a size_t and then casted again and returned as a ssize_t in vfio_pci_rw(). Fix the vfio_pci_regops->rw() return type to be ssize_t so all is consistent. Fixes: 28541d41c9e0 ("vfio/pci: Add infrastructure for additional device specific regions") Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Reviewed-by: Max Gurtovoy Link: https://lore.kernel.org/r/0-v3-5db12d1bf576+c910-vfio_rw_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_igd.c | 10 +++++----- drivers/vfio/pci/vfio_pci_private.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 228df565e9bc..aa0a29fd2762 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -25,8 +25,8 @@ #define OPREGION_RVDS 0x3c2 #define OPREGION_VERSION 0x16 -static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite) +static ssize_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) { unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; void *base = vdev->region[i].data; @@ -160,9 +160,9 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) return ret; } -static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, - char __user *buf, size_t count, loff_t *ppos, - bool iswrite) +static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, + char __user *buf, size_t count, loff_t *ppos, + bool iswrite) { unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; struct pci_dev *pdev = vdev->region[i].data; diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 5a36272cecbf..bbc56c857ef0 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -56,7 +56,7 @@ struct vfio_pci_device; struct vfio_pci_region; struct vfio_pci_regops { - size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf, + ssize_t (*rw)(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); void (*release)(struct vfio_pci_device *vdev, struct vfio_pci_region *region); From 15a5896e61acb7cbad5efd9cf807a4d9a5e8315d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Jul 2021 16:35:23 +0200 Subject: [PATCH 03/37] vfio/mdev: turn mdev_init into a subsys_initcall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this setups with buŃ–lt-in mdev and mdev-drivers fail to register like this: [1.903149] Driver 'intel_vgpu_mdev' was unable to register with bus_type 'mdev' because the bus was not initialized. Signed-off-by: Christoph Hellwig Reviewed-by: Cornelia Huck Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20210726143524.155779-2-hch@lst.de Signed-off-by: Alex Williamson --- drivers/vfio/mdev/mdev_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index e4581ec093a6..b16606ebafa1 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -398,7 +398,7 @@ static void __exit mdev_exit(void) mdev_bus_unregister(); } -module_init(mdev_init) +subsys_initcall(mdev_init) module_exit(mdev_exit) MODULE_VERSION(DRIVER_VERSION); From 3fb1712d85962f81265b5018922a2da13cdf6033 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Jul 2021 16:35:24 +0200 Subject: [PATCH 04/37] vfio/mdev: don't warn if ->request is not set Only a single driver actually sets the ->request method, so don't print a scary warning if it isn't. Signed-off-by: Christoph Hellwig Reviewed-by: Cornelia Huck Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20210726143524.155779-3-hch@lst.de Signed-off-by: Alex Williamson --- drivers/vfio/mdev/mdev_core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index b16606ebafa1..b314101237fe 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -138,10 +138,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) if (!dev) return -EINVAL; - /* Not mandatory, but its absence could be a problem */ - if (!ops->request) - dev_info(dev, "Driver cannot be asked to release device\n"); - mutex_lock(&parent_list_lock); /* Check for duplicate */ From e1706f0764f8bea42db8d33df5d7f9e754b89693 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:18:57 -0300 Subject: [PATCH 05/37] vfio/samples: Remove module get/put The patch to move the get/put to core and the patch to convert the samples to use vfio_device crossed in a way that this was missed. When both patches are together the samples do not need their own get/put. Fixes: 437e41368c01 ("vfio/mdpy: Convert to use vfio_register_group_dev()") Fixes: 681c1615f891 ("vfio/mbochs: Convert to use vfio_register_group_dev()") Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Reviewed-by: Max Gurtovoy Link: https://lore.kernel.org/r/1-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- samples/vfio-mdev/mbochs.c | 4 ---- samples/vfio-mdev/mdpy.c | 4 ---- 2 files changed, 8 deletions(-) diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index 6c0f229db36a..e81b875b4d87 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -1274,9 +1274,6 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd, static int mbochs_open(struct vfio_device *vdev) { - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - return 0; } @@ -1300,7 +1297,6 @@ static void mbochs_close(struct vfio_device *vdev) mbochs_put_pages(mdev_state); mutex_unlock(&mdev_state->ops_lock); - module_put(THIS_MODULE); } static ssize_t diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index 393c9df6f6a0..a7d4ed28d664 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -611,15 +611,11 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd, static int mdpy_open(struct vfio_device *vdev) { - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - return 0; } static void mdpy_close(struct vfio_device *vdev) { - module_put(THIS_MODULE); } static ssize_t From de5494af4815a4c9328536c72741229b7de88e7f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:18:58 -0300 Subject: [PATCH 06/37] vfio/mbochs: Fix missing error unwind of mbochs_used_mbytes Convert mbochs to use an atomic scheme for this like mtty was changed into. The atomic fixes various race conditions with probing. Add the missing error unwind. Also add the missing kfree of mdev_state->pages. Fixes: 681c1615f891 ("vfio/mbochs: Convert to use vfio_register_group_dev()") Reported-by: Cornelia Huck Co-developed-by: Alex Williamson Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/2-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- samples/vfio-mdev/mbochs.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index e81b875b4d87..3e885be7d076 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -129,7 +129,7 @@ static dev_t mbochs_devt; static struct class *mbochs_class; static struct cdev mbochs_cdev; static struct device mbochs_dev; -static int mbochs_used_mbytes; +static atomic_t mbochs_avail_mbytes; static const struct vfio_device_ops mbochs_dev_ops; struct vfio_region_info_ext { @@ -507,18 +507,22 @@ static int mbochs_reset(struct mdev_state *mdev_state) static int mbochs_probe(struct mdev_device *mdev) { + int avail_mbytes = atomic_read(&mbochs_avail_mbytes); const struct mbochs_type *type = &mbochs_types[mdev_get_type_group_id(mdev)]; struct device *dev = mdev_dev(mdev); struct mdev_state *mdev_state; int ret = -ENOMEM; - if (type->mbytes + mbochs_used_mbytes > max_mbytes) - return -ENOMEM; + do { + if (avail_mbytes < type->mbytes) + return -ENOSPC; + } while (!atomic_try_cmpxchg(&mbochs_avail_mbytes, &avail_mbytes, + avail_mbytes - type->mbytes)); mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL); if (mdev_state == NULL) - return -ENOMEM; + goto err_avail; vfio_init_group_dev(&mdev_state->vdev, &mdev->dev, &mbochs_dev_ops); mdev_state->vconfig = kzalloc(MBOCHS_CONFIG_SPACE_SIZE, GFP_KERNEL); @@ -549,17 +553,17 @@ static int mbochs_probe(struct mdev_device *mdev) mbochs_create_config_space(mdev_state); mbochs_reset(mdev_state); - mbochs_used_mbytes += type->mbytes; - ret = vfio_register_group_dev(&mdev_state->vdev); if (ret) goto err_mem; dev_set_drvdata(&mdev->dev, mdev_state); return 0; - err_mem: + kfree(mdev_state->pages); kfree(mdev_state->vconfig); kfree(mdev_state); +err_avail: + atomic_add(type->mbytes, &mbochs_avail_mbytes); return ret; } @@ -567,8 +571,8 @@ static void mbochs_remove(struct mdev_device *mdev) { struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev); - mbochs_used_mbytes -= mdev_state->type->mbytes; vfio_unregister_group_dev(&mdev_state->vdev); + atomic_add(mdev_state->type->mbytes, &mbochs_avail_mbytes); kfree(mdev_state->pages); kfree(mdev_state->vconfig); kfree(mdev_state); @@ -1351,7 +1355,7 @@ static ssize_t available_instances_show(struct mdev_type *mtype, { const struct mbochs_type *type = &mbochs_types[mtype_get_type_group_id(mtype)]; - int count = (max_mbytes - mbochs_used_mbytes) / type->mbytes; + int count = atomic_read(&mbochs_avail_mbytes) / type->mbytes; return sprintf(buf, "%d\n", count); } @@ -1433,6 +1437,8 @@ static int __init mbochs_dev_init(void) { int ret = 0; + atomic_set(&mbochs_avail_mbytes, max_mbytes); + ret = alloc_chrdev_region(&mbochs_devt, 0, MINORMASK + 1, MBOCHS_NAME); if (ret < 0) { pr_err("Error: failed to register mbochs_dev, err: %d\n", ret); From ae03c3771b8cbbed3802ad1153d896c32015c520 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 5 Aug 2021 22:18:59 -0300 Subject: [PATCH 07/37] vfio: Introduce a vfio_uninit_group_dev() API call This pairs with vfio_init_group_dev() and allows undoing any state that is stored in the vfio_device unrelated to registration. Add appropriately placed calls to all the drivers. The following patch will use this to add pre-registration state for the device set. Signed-off-by: Max Gurtovoy Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- Documentation/driver-api/vfio.rst | 4 ++- drivers/vfio/fsl-mc/vfio_fsl_mc.c | 7 ++--- drivers/vfio/mdev/vfio_mdev.c | 13 +++++++--- drivers/vfio/pci/vfio_pci.c | 6 +++-- drivers/vfio/platform/vfio_platform_common.c | 7 +++-- drivers/vfio/vfio.c | 5 ++++ include/linux/vfio.h | 1 + samples/vfio-mdev/mbochs.c | 2 ++ samples/vfio-mdev/mdpy.c | 25 ++++++++++-------- samples/vfio-mdev/mtty.c | 27 ++++++++++++-------- 10 files changed, 64 insertions(+), 33 deletions(-) diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst index 606eed8823ce..c663b6f97825 100644 --- a/Documentation/driver-api/vfio.rst +++ b/Documentation/driver-api/vfio.rst @@ -255,11 +255,13 @@ vfio_unregister_group_dev() respectively:: void vfio_init_group_dev(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops); + void vfio_uninit_group_dev(struct vfio_device *device); int vfio_register_group_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); The driver should embed the vfio_device in its own structure and call -vfio_init_group_dev() to pre-configure it before going to registration. +vfio_init_group_dev() to pre-configure it before going to registration +and call vfio_uninit_group_dev() after completing the un-registration. vfio_register_group_dev() indicates to the core to begin tracking the iommu_group of the specified dev and register the dev as owned by a VFIO bus driver. Once vfio_register_group_dev() returns it is possible for userspace to diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index 90cad109583b..122997c61ba4 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -627,7 +627,7 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) ret = vfio_fsl_mc_reflck_attach(vdev); if (ret) - goto out_kfree; + goto out_uninit; ret = vfio_fsl_mc_init_device(vdev); if (ret) @@ -657,7 +657,8 @@ out_device: vfio_fsl_uninit_device(vdev); out_reflck: vfio_fsl_mc_reflck_put(vdev->reflck); -out_kfree: +out_uninit: + vfio_uninit_group_dev(&vdev->vdev); kfree(vdev); out_group_put: vfio_iommu_group_put(group, dev); @@ -675,7 +676,7 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) dprc_remove_devices(mc_dev, NULL, 0); vfio_fsl_uninit_device(vdev); vfio_fsl_mc_reflck_put(vdev->reflck); - + vfio_uninit_group_dev(&vdev->vdev); kfree(vdev); vfio_iommu_group_put(mc_dev->dev.iommu_group, dev); diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index 39ef7489fe47..a5c77ccb24f7 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -120,12 +120,16 @@ static int vfio_mdev_probe(struct mdev_device *mdev) vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops); ret = vfio_register_group_dev(vdev); - if (ret) { - kfree(vdev); - return ret; - } + if (ret) + goto out_uninit; + dev_set_drvdata(&mdev->dev, vdev); return 0; + +out_uninit: + vfio_uninit_group_dev(vdev); + kfree(vdev); + return ret; } static void vfio_mdev_remove(struct mdev_device *mdev) @@ -133,6 +137,7 @@ static void vfio_mdev_remove(struct mdev_device *mdev) struct vfio_device *vdev = dev_get_drvdata(&mdev->dev); vfio_unregister_group_dev(vdev); + vfio_uninit_group_dev(vdev); kfree(vdev); } diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 318864d52837..fab3715d60d4 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -2022,7 +2022,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = vfio_pci_reflck_attach(vdev); if (ret) - goto out_free; + goto out_uninit; ret = vfio_pci_vf_init(vdev); if (ret) goto out_reflck; @@ -2059,7 +2059,8 @@ out_vf: vfio_pci_vf_uninit(vdev); out_reflck: vfio_pci_reflck_put(vdev->reflck); -out_free: +out_uninit: + vfio_uninit_group_dev(&vdev->vdev); kfree(vdev->pm_save); kfree(vdev); out_group_put: @@ -2077,6 +2078,7 @@ static void vfio_pci_remove(struct pci_dev *pdev) vfio_pci_vf_uninit(vdev); vfio_pci_reflck_put(vdev->reflck); + vfio_uninit_group_dev(&vdev->vdev); vfio_pci_vga_uninit(vdev); vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 703164df7637..bdde8605178c 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -667,7 +667,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, ret = vfio_platform_of_probe(vdev, dev); if (ret) - return ret; + goto out_uninit; vdev->device = dev; @@ -675,7 +675,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, if (ret && vdev->reset_required) { dev_err(dev, "No reset function found for device %s\n", vdev->name); - return ret; + goto out_uninit; } group = vfio_iommu_group_get(dev); @@ -698,6 +698,8 @@ put_iommu: vfio_iommu_group_put(group, dev); put_reset: vfio_platform_put_reset(vdev); +out_uninit: + vfio_uninit_group_dev(&vdev->vdev); return ret; } EXPORT_SYMBOL_GPL(vfio_platform_probe_common); @@ -708,6 +710,7 @@ void vfio_platform_remove_common(struct vfio_platform_device *vdev) pm_runtime_disable(vdev->device); vfio_platform_put_reset(vdev); + vfio_uninit_group_dev(&vdev->vdev); vfio_iommu_group_put(vdev->vdev.dev->iommu_group, vdev->vdev.dev); } EXPORT_SYMBOL_GPL(vfio_platform_remove_common); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 02cc51ce6891..cc375df0fd5d 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -749,6 +749,11 @@ void vfio_init_group_dev(struct vfio_device *device, struct device *dev, } EXPORT_SYMBOL_GPL(vfio_init_group_dev); +void vfio_uninit_group_dev(struct vfio_device *device) +{ +} +EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); + int vfio_register_group_dev(struct vfio_device *device) { struct vfio_device *existing_device; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index a2c5b30e1763..b0875cf8e496 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -61,6 +61,7 @@ extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); void vfio_init_group_dev(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops); +void vfio_uninit_group_dev(struct vfio_device *device); int vfio_register_group_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index 3e885be7d076..0f1511849b7c 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -559,6 +559,7 @@ static int mbochs_probe(struct mdev_device *mdev) dev_set_drvdata(&mdev->dev, mdev_state); return 0; err_mem: + vfio_uninit_group_dev(&mdev_state->vdev); kfree(mdev_state->pages); kfree(mdev_state->vconfig); kfree(mdev_state); @@ -572,6 +573,7 @@ static void mbochs_remove(struct mdev_device *mdev) struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev); vfio_unregister_group_dev(&mdev_state->vdev); + vfio_uninit_group_dev(&mdev_state->vdev); atomic_add(mdev_state->type->mbytes, &mbochs_avail_mbytes); kfree(mdev_state->pages); kfree(mdev_state->vconfig); diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index a7d4ed28d664..57334034cde6 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -235,17 +235,16 @@ static int mdpy_probe(struct mdev_device *mdev) mdev_state->vconfig = kzalloc(MDPY_CONFIG_SPACE_SIZE, GFP_KERNEL); if (mdev_state->vconfig == NULL) { - kfree(mdev_state); - return -ENOMEM; + ret = -ENOMEM; + goto err_state; } fbsize = roundup_pow_of_two(type->width * type->height * type->bytepp); mdev_state->memblk = vmalloc_user(fbsize); if (!mdev_state->memblk) { - kfree(mdev_state->vconfig); - kfree(mdev_state); - return -ENOMEM; + ret = -ENOMEM; + goto err_vconfig; } dev_info(dev, "%s: %s (%dx%d)\n", __func__, type->name, type->width, type->height); @@ -260,13 +259,18 @@ static int mdpy_probe(struct mdev_device *mdev) mdpy_count++; ret = vfio_register_group_dev(&mdev_state->vdev); - if (ret) { - kfree(mdev_state->vconfig); - kfree(mdev_state); - return ret; - } + if (ret) + goto err_mem; dev_set_drvdata(&mdev->dev, mdev_state); return 0; +err_mem: + vfree(mdev_state->memblk); +err_vconfig: + kfree(mdev_state->vconfig); +err_state: + vfio_uninit_group_dev(&mdev_state->vdev); + kfree(mdev_state); + return ret; } static void mdpy_remove(struct mdev_device *mdev) @@ -278,6 +282,7 @@ static void mdpy_remove(struct mdev_device *mdev) vfio_unregister_group_dev(&mdev_state->vdev); vfree(mdev_state->memblk); kfree(mdev_state->vconfig); + vfio_uninit_group_dev(&mdev_state->vdev); kfree(mdev_state); mdpy_count--; diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 8b26fecc4afe..37cc9067e160 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -718,8 +718,8 @@ static int mtty_probe(struct mdev_device *mdev) mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL); if (mdev_state == NULL) { - atomic_add(nr_ports, &mdev_avail_ports); - return -ENOMEM; + ret = -ENOMEM; + goto err_nr_ports; } vfio_init_group_dev(&mdev_state->vdev, &mdev->dev, &mtty_dev_ops); @@ -732,9 +732,8 @@ static int mtty_probe(struct mdev_device *mdev) mdev_state->vconfig = kzalloc(MTTY_CONFIG_SPACE_SIZE, GFP_KERNEL); if (mdev_state->vconfig == NULL) { - kfree(mdev_state); - atomic_add(nr_ports, &mdev_avail_ports); - return -ENOMEM; + ret = -ENOMEM; + goto err_state; } mutex_init(&mdev_state->ops_lock); @@ -743,14 +742,19 @@ static int mtty_probe(struct mdev_device *mdev) mtty_create_config_space(mdev_state); ret = vfio_register_group_dev(&mdev_state->vdev); - if (ret) { - kfree(mdev_state); - atomic_add(nr_ports, &mdev_avail_ports); - return ret; - } - + if (ret) + goto err_vconfig; dev_set_drvdata(&mdev->dev, mdev_state); return 0; + +err_vconfig: + kfree(mdev_state->vconfig); +err_state: + vfio_uninit_group_dev(&mdev_state->vdev); + kfree(mdev_state); +err_nr_ports: + atomic_add(nr_ports, &mdev_avail_ports); + return ret; } static void mtty_remove(struct mdev_device *mdev) @@ -761,6 +765,7 @@ static void mtty_remove(struct mdev_device *mdev) vfio_unregister_group_dev(&mdev_state->vdev); kfree(mdev_state->vconfig); + vfio_uninit_group_dev(&mdev_state->vdev); kfree(mdev_state); atomic_add(nr_ports, &mdev_avail_ports); } From 2fd585f4ed9de9b9259e95affdd7d8cde06b48c3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:00 -0300 Subject: [PATCH 08/37] vfio: Provide better generic support for open/release vfio_device_ops Currently the driver ops have an open/release pair that is called once each time a device FD is opened or closed. Add an additional set of open/close_device() ops which are called when the device FD is opened for the first time and closed for the last time. An analysis shows that all of the drivers require this semantic. Some are open coding it as part of their reflck implementation, and some are just buggy and miss it completely. To retain the current semantics PCI and FSL depend on, introduce the idea of a "device set" which is a grouping of vfio_device's that share the same lock around opening. The device set is established by providing a 'set_id' pointer. All vfio_device's that provide the same pointer will be joined to the same singleton memory and lock across the whole set. This effectively replaces the oddly named reflck. After conversion the set_id will be sourced from: - A struct device from a fsl_mc_device (fsl) - A struct pci_slot (pci) - A struct pci_bus (pci) - The struct vfio_device (everything) The design ensures that the above pointers are live as long as the vfio_device is registered, so they form reliable unique keys to group vfio_devices into sets. This implementation uses xarray instead of searching through the driver core structures, which simplifies the somewhat tricky locking in this area. Following patches convert all the drivers. Signed-off-by: Yishai Hadas Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/mdev/vfio_mdev.c | 26 +++++- drivers/vfio/vfio.c | 149 +++++++++++++++++++++++++++++----- include/linux/mdev.h | 2 + include/linux/vfio.h | 21 +++++ 4 files changed, 174 insertions(+), 24 deletions(-) diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index a5c77ccb24f7..e12196ffd487 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -17,13 +17,33 @@ #include "mdev_private.h" +static int vfio_mdev_open_device(struct vfio_device *core_vdev) +{ + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; + + if (unlikely(!parent->ops->open_device)) + return 0; + + return parent->ops->open_device(mdev); +} + +static void vfio_mdev_close_device(struct vfio_device *core_vdev) +{ + struct mdev_device *mdev = to_mdev_device(core_vdev->dev); + struct mdev_parent *parent = mdev->type->parent; + + if (likely(parent->ops->close_device)) + parent->ops->close_device(mdev); +} + static int vfio_mdev_open(struct vfio_device *core_vdev) { struct mdev_device *mdev = to_mdev_device(core_vdev->dev); struct mdev_parent *parent = mdev->type->parent; if (unlikely(!parent->ops->open)) - return -EINVAL; + return 0; return parent->ops->open(mdev); } @@ -44,7 +64,7 @@ static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev, struct mdev_parent *parent = mdev->type->parent; if (unlikely(!parent->ops->ioctl)) - return -EINVAL; + return 0; return parent->ops->ioctl(mdev, cmd, arg); } @@ -100,6 +120,8 @@ static void vfio_mdev_request(struct vfio_device *core_vdev, unsigned int count) static const struct vfio_device_ops vfio_mdev_dev_ops = { .name = "vfio-mdev", + .open_device = vfio_mdev_open_device, + .close_device = vfio_mdev_close_device, .open = vfio_mdev_open, .release = vfio_mdev_release, .ioctl = vfio_mdev_unlocked_ioctl, diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index cc375df0fd5d..9cc17768c425 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -96,6 +96,79 @@ module_param_named(enable_unsafe_noiommu_mode, MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); #endif +static DEFINE_XARRAY(vfio_device_set_xa); + +int vfio_assign_device_set(struct vfio_device *device, void *set_id) +{ + unsigned long idx = (unsigned long)set_id; + struct vfio_device_set *new_dev_set; + struct vfio_device_set *dev_set; + + if (WARN_ON(!set_id)) + return -EINVAL; + + /* + * Atomically acquire a singleton object in the xarray for this set_id + */ + xa_lock(&vfio_device_set_xa); + dev_set = xa_load(&vfio_device_set_xa, idx); + if (dev_set) + goto found_get_ref; + xa_unlock(&vfio_device_set_xa); + + new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); + if (!new_dev_set) + return -ENOMEM; + mutex_init(&new_dev_set->lock); + INIT_LIST_HEAD(&new_dev_set->device_list); + new_dev_set->set_id = set_id; + + xa_lock(&vfio_device_set_xa); + dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, + GFP_KERNEL); + if (!dev_set) { + dev_set = new_dev_set; + goto found_get_ref; + } + + kfree(new_dev_set); + if (xa_is_err(dev_set)) { + xa_unlock(&vfio_device_set_xa); + return xa_err(dev_set); + } + +found_get_ref: + dev_set->device_count++; + xa_unlock(&vfio_device_set_xa); + mutex_lock(&dev_set->lock); + device->dev_set = dev_set; + list_add_tail(&device->dev_set_list, &dev_set->device_list); + mutex_unlock(&dev_set->lock); + return 0; +} +EXPORT_SYMBOL_GPL(vfio_assign_device_set); + +static void vfio_release_device_set(struct vfio_device *device) +{ + struct vfio_device_set *dev_set = device->dev_set; + + if (!dev_set) + return; + + mutex_lock(&dev_set->lock); + list_del(&device->dev_set_list); + mutex_unlock(&dev_set->lock); + + xa_lock(&vfio_device_set_xa); + if (!--dev_set->device_count) { + __xa_erase(&vfio_device_set_xa, + (unsigned long)dev_set->set_id); + mutex_destroy(&dev_set->lock); + kfree(dev_set); + } + xa_unlock(&vfio_device_set_xa); +} + /* * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe * and remove functions, any use cases other than acquiring the first @@ -751,6 +824,7 @@ EXPORT_SYMBOL_GPL(vfio_init_group_dev); void vfio_uninit_group_dev(struct vfio_device *device) { + vfio_release_device_set(device); } EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); @@ -760,6 +834,13 @@ int vfio_register_group_dev(struct vfio_device *device) struct iommu_group *iommu_group; struct vfio_group *group; + /* + * If the driver doesn't specify a set then the device is added to a + * singleton set just for itself. + */ + if (!device->dev_set) + vfio_assign_device_set(device, device); + iommu_group = iommu_group_get(device->dev); if (!iommu_group) return -EINVAL; @@ -1361,7 +1442,8 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) { struct vfio_device *device; struct file *filep; - int ret; + int fdno; + int ret = 0; if (0 == atomic_read(&group->container_users) || !group->container->iommu_driver || !vfio_group_viable(group)) @@ -1375,38 +1457,38 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) return PTR_ERR(device); if (!try_module_get(device->dev->driver->owner)) { - vfio_device_put(device); - return -ENODEV; + ret = -ENODEV; + goto err_device_put; } - ret = device->ops->open(device); - if (ret) { - module_put(device->dev->driver->owner); - vfio_device_put(device); - return ret; + mutex_lock(&device->dev_set->lock); + device->open_count++; + if (device->open_count == 1 && device->ops->open_device) { + ret = device->ops->open_device(device); + if (ret) + goto err_undo_count; + } + mutex_unlock(&device->dev_set->lock); + + if (device->ops->open) { + ret = device->ops->open(device); + if (ret) + goto err_close_device; } /* * We can't use anon_inode_getfd() because we need to modify * the f_mode flags directly to allow more than just ioctls */ - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) { - device->ops->release(device); - module_put(device->dev->driver->owner); - vfio_device_put(device); - return ret; - } + fdno = ret = get_unused_fd_flags(O_CLOEXEC); + if (ret < 0) + goto err_release; filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, device, O_RDWR); if (IS_ERR(filep)) { - put_unused_fd(ret); ret = PTR_ERR(filep); - device->ops->release(device); - module_put(device->dev->driver->owner); - vfio_device_put(device); - return ret; + goto err_fd; } /* @@ -1418,12 +1500,28 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) atomic_inc(&group->container_users); - fd_install(ret, filep); + fd_install(fdno, filep); if (group->noiommu) dev_warn(device->dev, "vfio-noiommu device opened by user " "(%s:%d)\n", current->comm, task_pid_nr(current)); + return fdno; +err_fd: + put_unused_fd(fdno); +err_release: + if (device->ops->release) + device->ops->release(device); +err_close_device: + mutex_lock(&device->dev_set->lock); + if (device->open_count == 1 && device->ops->close_device) + device->ops->close_device(device); +err_undo_count: + device->open_count--; + mutex_unlock(&device->dev_set->lock); + module_put(device->dev->driver->owner); +err_device_put: + vfio_device_put(device); return ret; } @@ -1561,7 +1659,13 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; - device->ops->release(device); + if (device->ops->release) + device->ops->release(device); + + mutex_lock(&device->dev_set->lock); + if (!--device->open_count && device->ops->close_device) + device->ops->close_device(device); + mutex_unlock(&device->dev_set->lock); module_put(device->dev->driver->owner); @@ -2364,6 +2468,7 @@ static void __exit vfio_cleanup(void) class_destroy(vfio.class); vfio.class = NULL; misc_deregister(&vfio_dev); + xa_destroy(&vfio_device_set_xa); } module_init(vfio_init); diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 3a38598c2605..cb5b7ed1d7c3 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -111,6 +111,8 @@ struct mdev_parent_ops { int (*create)(struct mdev_device *mdev); int (*remove)(struct mdev_device *mdev); + int (*open_device)(struct mdev_device *mdev); + void (*close_device)(struct mdev_device *mdev); int (*open)(struct mdev_device *mdev); void (*release)(struct mdev_device *mdev); ssize_t (*read)(struct mdev_device *mdev, char __user *buf, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index b0875cf8e496..f0e6a72875e4 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -15,13 +15,28 @@ #include #include +/* + * VFIO devices can be placed in a set, this allows all devices to share this + * structure and the VFIO core will provide a lock that is held around + * open_device()/close_device() for all devices in the set. + */ +struct vfio_device_set { + void *set_id; + struct mutex lock; + struct list_head device_list; + unsigned int device_count; +}; + struct vfio_device { struct device *dev; const struct vfio_device_ops *ops; struct vfio_group *group; + struct vfio_device_set *dev_set; + struct list_head dev_set_list; /* Members below here are private, not for driver use */ refcount_t refcount; + unsigned int open_count; struct completion comp; struct list_head group_next; }; @@ -29,6 +44,8 @@ struct vfio_device { /** * struct vfio_device_ops - VFIO bus driver device callbacks * + * @open_device: Called when the first file descriptor is opened for this device + * @close_device: Opposite of open_device * @open: Called when userspace creates new file descriptor for device * @release: Called when userspace releases file descriptor for device * @read: Perform read(2) on device file descriptor @@ -43,6 +60,8 @@ struct vfio_device { */ struct vfio_device_ops { char *name; + int (*open_device)(struct vfio_device *vdev); + void (*close_device)(struct vfio_device *vdev); int (*open)(struct vfio_device *vdev); void (*release)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -67,6 +86,8 @@ void vfio_unregister_group_dev(struct vfio_device *device); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern void vfio_device_put(struct vfio_device *device); +int vfio_assign_device_set(struct vfio_device *device, void *set_id); + /* events for the backend driver notify callback */ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, From 17a1e4fa3f7f07f541c751745b5aa6f2fcab9a48 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:01 -0300 Subject: [PATCH 09/37] vfio/samples: Delete useless open/close The core code no longer requires these ops to be defined, so delete these empty functions and leave the op as NULL. mtty's functions only log a pointless message, delete that entirely. Signed-off-by: Yishai Hadas Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- samples/vfio-mdev/mbochs.c | 6 ------ samples/vfio-mdev/mdpy.c | 11 ----------- samples/vfio-mdev/mtty.c | 13 ------------- 3 files changed, 30 deletions(-) diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index 0f1511849b7c..7b2e12fe7082 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -1278,11 +1278,6 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd, return -ENOTTY; } -static int mbochs_open(struct vfio_device *vdev) -{ - return 0; -} - static void mbochs_close(struct vfio_device *vdev) { struct mdev_state *mdev_state = @@ -1401,7 +1396,6 @@ static struct attribute_group *mdev_type_groups[] = { }; static const struct vfio_device_ops mbochs_dev_ops = { - .open = mbochs_open, .release = mbochs_close, .read = mbochs_read, .write = mbochs_write, diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index 57334034cde6..8d1a80a0722a 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -614,15 +614,6 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd, return -ENOTTY; } -static int mdpy_open(struct vfio_device *vdev) -{ - return 0; -} - -static void mdpy_close(struct vfio_device *vdev) -{ -} - static ssize_t resolution_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -717,8 +708,6 @@ static struct attribute_group *mdev_type_groups[] = { }; static const struct vfio_device_ops mdpy_dev_ops = { - .open = mdpy_open, - .release = mdpy_close, .read = mdpy_read, .write = mdpy_write, .ioctl = mdpy_ioctl, diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 37cc9067e160..5983cdb16e3d 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -1207,17 +1207,6 @@ static long mtty_ioctl(struct vfio_device *vdev, unsigned int cmd, return -ENOTTY; } -static int mtty_open(struct vfio_device *vdev) -{ - pr_info("%s\n", __func__); - return 0; -} - -static void mtty_close(struct vfio_device *mdev) -{ - pr_info("%s\n", __func__); -} - static ssize_t sample_mtty_dev_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1325,8 +1314,6 @@ static struct attribute_group *mdev_type_groups[] = { static const struct vfio_device_ops mtty_dev_ops = { .name = "vfio-mtty", - .open = mtty_open, - .release = mtty_close, .read = mtty_read, .write = mtty_write, .ioctl = mtty_ioctl, From da119f387e94642da959a22ae9c22e09abe34926 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:02 -0300 Subject: [PATCH 10/37] vfio/fsl: Move to the device set infrastructure FSL uses the internal reflck to implement the open_device() functionality, conversion to the core code is straightforward. The decision on which set to be part of is trivially based on the is_fsl_mc_bus_dprc() and we use a 'struct device *' pointer as the set_id. The dev_set lock is protecting the interrupts setup. The FSL MC devices are using MSIs and only the DPRC device is allocating the MSIs from the MSI domain. The other devices just take interrupts from a pool. The lock is protecting the access to this pool. Signed-off-by: Yishai Hadas Tested-by: Diana Craciun OSS Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/fsl-mc/vfio_fsl_mc.c | 156 ++++------------------ drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 6 +- drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 7 - 3 files changed, 29 insertions(+), 140 deletions(-) diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index 122997c61ba4..0ead91bfa838 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -19,81 +19,10 @@ static struct fsl_mc_driver vfio_fsl_mc_driver; -static DEFINE_MUTEX(reflck_lock); - -static void vfio_fsl_mc_reflck_get(struct vfio_fsl_mc_reflck *reflck) -{ - kref_get(&reflck->kref); -} - -static void vfio_fsl_mc_reflck_release(struct kref *kref) -{ - struct vfio_fsl_mc_reflck *reflck = container_of(kref, - struct vfio_fsl_mc_reflck, - kref); - - mutex_destroy(&reflck->lock); - kfree(reflck); - mutex_unlock(&reflck_lock); -} - -static void vfio_fsl_mc_reflck_put(struct vfio_fsl_mc_reflck *reflck) -{ - kref_put_mutex(&reflck->kref, vfio_fsl_mc_reflck_release, &reflck_lock); -} - -static struct vfio_fsl_mc_reflck *vfio_fsl_mc_reflck_alloc(void) -{ - struct vfio_fsl_mc_reflck *reflck; - - reflck = kzalloc(sizeof(*reflck), GFP_KERNEL); - if (!reflck) - return ERR_PTR(-ENOMEM); - - kref_init(&reflck->kref); - mutex_init(&reflck->lock); - - return reflck; -} - -static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev) -{ - int ret = 0; - - mutex_lock(&reflck_lock); - if (is_fsl_mc_bus_dprc(vdev->mc_dev)) { - vdev->reflck = vfio_fsl_mc_reflck_alloc(); - ret = PTR_ERR_OR_ZERO(vdev->reflck); - } else { - struct device *mc_cont_dev = vdev->mc_dev->dev.parent; - struct vfio_device *device; - struct vfio_fsl_mc_device *cont_vdev; - - device = vfio_device_get_from_dev(mc_cont_dev); - if (!device) { - ret = -ENODEV; - goto unlock; - } - - cont_vdev = - container_of(device, struct vfio_fsl_mc_device, vdev); - if (!cont_vdev || !cont_vdev->reflck) { - vfio_device_put(device); - ret = -ENODEV; - goto unlock; - } - vfio_fsl_mc_reflck_get(cont_vdev->reflck); - vdev->reflck = cont_vdev->reflck; - vfio_device_put(device); - } - -unlock: - mutex_unlock(&reflck_lock); - return ret; -} - -static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev) +static int vfio_fsl_mc_open_device(struct vfio_device *core_vdev) { + struct vfio_fsl_mc_device *vdev = + container_of(core_vdev, struct vfio_fsl_mc_device, vdev); struct fsl_mc_device *mc_dev = vdev->mc_dev; int count = mc_dev->obj_desc.region_count; int i; @@ -136,58 +65,30 @@ static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev) kfree(vdev->regions); } -static int vfio_fsl_mc_open(struct vfio_device *core_vdev) -{ - struct vfio_fsl_mc_device *vdev = - container_of(core_vdev, struct vfio_fsl_mc_device, vdev); - int ret = 0; - - mutex_lock(&vdev->reflck->lock); - if (!vdev->refcnt) { - ret = vfio_fsl_mc_regions_init(vdev); - if (ret) - goto out; - } - vdev->refcnt++; -out: - mutex_unlock(&vdev->reflck->lock); - - return ret; -} - -static void vfio_fsl_mc_release(struct vfio_device *core_vdev) + +static void vfio_fsl_mc_close_device(struct vfio_device *core_vdev) { struct vfio_fsl_mc_device *vdev = container_of(core_vdev, struct vfio_fsl_mc_device, vdev); + struct fsl_mc_device *mc_dev = vdev->mc_dev; + struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev); + struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev); int ret; - mutex_lock(&vdev->reflck->lock); + vfio_fsl_mc_regions_cleanup(vdev); - if (!(--vdev->refcnt)) { - struct fsl_mc_device *mc_dev = vdev->mc_dev; - struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev); - struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev); + /* reset the device before cleaning up the interrupts */ + ret = dprc_reset_container(mc_cont->mc_io, 0, mc_cont->mc_handle, + mc_cont->obj_desc.id, + DPRC_RESET_OPTION_NON_RECURSIVE); - vfio_fsl_mc_regions_cleanup(vdev); + if (WARN_ON(ret)) + dev_warn(&mc_cont->dev, + "VFIO_FLS_MC: reset device has failed (%d)\n", ret); - /* reset the device before cleaning up the interrupts */ - ret = dprc_reset_container(mc_cont->mc_io, 0, - mc_cont->mc_handle, - mc_cont->obj_desc.id, - DPRC_RESET_OPTION_NON_RECURSIVE); + vfio_fsl_mc_irqs_cleanup(vdev); - if (ret) { - dev_warn(&mc_cont->dev, "VFIO_FLS_MC: reset device has failed (%d)\n", - ret); - WARN_ON(1); - } - - vfio_fsl_mc_irqs_cleanup(vdev); - - fsl_mc_cleanup_irq_pool(mc_cont); - } - - mutex_unlock(&vdev->reflck->lock); + fsl_mc_cleanup_irq_pool(mc_cont); } static long vfio_fsl_mc_ioctl(struct vfio_device *core_vdev, @@ -504,8 +405,8 @@ static int vfio_fsl_mc_mmap(struct vfio_device *core_vdev, static const struct vfio_device_ops vfio_fsl_mc_ops = { .name = "vfio-fsl-mc", - .open = vfio_fsl_mc_open, - .release = vfio_fsl_mc_release, + .open_device = vfio_fsl_mc_open_device, + .close_device = vfio_fsl_mc_close_device, .ioctl = vfio_fsl_mc_ioctl, .read = vfio_fsl_mc_read, .write = vfio_fsl_mc_write, @@ -625,13 +526,16 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) vdev->mc_dev = mc_dev; mutex_init(&vdev->igate); - ret = vfio_fsl_mc_reflck_attach(vdev); + if (is_fsl_mc_bus_dprc(mc_dev)) + ret = vfio_assign_device_set(&vdev->vdev, &mc_dev->dev); + else + ret = vfio_assign_device_set(&vdev->vdev, mc_dev->dev.parent); if (ret) goto out_uninit; ret = vfio_fsl_mc_init_device(vdev); if (ret) - goto out_reflck; + goto out_uninit; ret = vfio_register_group_dev(&vdev->vdev); if (ret) { @@ -639,12 +543,6 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev) goto out_device; } - /* - * This triggers recursion into vfio_fsl_mc_probe() on another device - * and the vfio_fsl_mc_reflck_attach() must succeed, which relies on the - * vfio_add_group_dev() above. It has no impact on this vdev, so it is - * safe to be after the vfio device is made live. - */ ret = vfio_fsl_mc_scan_container(mc_dev); if (ret) goto out_group_dev; @@ -655,8 +553,6 @@ out_group_dev: vfio_unregister_group_dev(&vdev->vdev); out_device: vfio_fsl_uninit_device(vdev); -out_reflck: - vfio_fsl_mc_reflck_put(vdev->reflck); out_uninit: vfio_uninit_group_dev(&vdev->vdev); kfree(vdev); @@ -675,7 +571,7 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) dprc_remove_devices(mc_dev, NULL, 0); vfio_fsl_uninit_device(vdev); - vfio_fsl_mc_reflck_put(vdev->reflck); + vfio_uninit_group_dev(&vdev->vdev); kfree(vdev); vfio_iommu_group_put(mc_dev->dev.iommu_group, dev); diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c index 0d9f3002df7f..77e584093a23 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c @@ -120,7 +120,7 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, if (start != 0 || count != 1) return -EINVAL; - mutex_lock(&vdev->reflck->lock); + mutex_lock(&vdev->vdev.dev_set->lock); ret = fsl_mc_populate_irq_pool(mc_cont, FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS); if (ret) @@ -129,7 +129,7 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, ret = vfio_fsl_mc_irqs_allocate(vdev); if (ret) goto unlock; - mutex_unlock(&vdev->reflck->lock); + mutex_unlock(&vdev->vdev.dev_set->lock); if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { s32 fd = *(s32 *)data; @@ -154,7 +154,7 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev, return 0; unlock: - mutex_unlock(&vdev->reflck->lock); + mutex_unlock(&vdev->vdev.dev_set->lock); return ret; } diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h index 89700e00e77d..4ad63ececb91 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h @@ -22,11 +22,6 @@ struct vfio_fsl_mc_irq { char *name; }; -struct vfio_fsl_mc_reflck { - struct kref kref; - struct mutex lock; -}; - struct vfio_fsl_mc_region { u32 flags; u32 type; @@ -39,9 +34,7 @@ struct vfio_fsl_mc_device { struct vfio_device vdev; struct fsl_mc_device *mc_dev; struct notifier_block nb; - int refcnt; struct vfio_fsl_mc_region *regions; - struct vfio_fsl_mc_reflck *reflck; struct mutex igate; struct vfio_fsl_mc_irq *mc_irqs; }; From ab7e5e34a9f661f5649f48d4531c4a75713ff7cf Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:03 -0300 Subject: [PATCH 11/37] vfio/platform: Use open_device() instead of open coding a refcnt scheme Platform simply wants to run some code when the device is first opened/last closed. Use the core framework and locking for this. Aside from removing a bit of code this narrows the locking scope from a global lock. Reviewed-by: Cornelia Huck Reviewed-by: Eric Auger Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/7-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_common.c | 95 ++++++++----------- drivers/vfio/platform/vfio_platform_private.h | 1 - 2 files changed, 40 insertions(+), 56 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index bdde8605178c..6af7ce7d619c 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -218,65 +218,52 @@ static int vfio_platform_call_reset(struct vfio_platform_device *vdev, return -EINVAL; } -static void vfio_platform_release(struct vfio_device *core_vdev) -{ - struct vfio_platform_device *vdev = - container_of(core_vdev, struct vfio_platform_device, vdev); - - mutex_lock(&driver_lock); - - if (!(--vdev->refcnt)) { - const char *extra_dbg = NULL; - int ret; - - ret = vfio_platform_call_reset(vdev, &extra_dbg); - if (ret && vdev->reset_required) { - dev_warn(vdev->device, "reset driver is required and reset call failed in release (%d) %s\n", - ret, extra_dbg ? extra_dbg : ""); - WARN_ON(1); - } - pm_runtime_put(vdev->device); - vfio_platform_regions_cleanup(vdev); - vfio_platform_irq_cleanup(vdev); - } - - mutex_unlock(&driver_lock); -} - -static int vfio_platform_open(struct vfio_device *core_vdev) +static void vfio_platform_close_device(struct vfio_device *core_vdev) { struct vfio_platform_device *vdev = container_of(core_vdev, struct vfio_platform_device, vdev); + const char *extra_dbg = NULL; int ret; - mutex_lock(&driver_lock); - - if (!vdev->refcnt) { - const char *extra_dbg = NULL; - - ret = vfio_platform_regions_init(vdev); - if (ret) - goto err_reg; - - ret = vfio_platform_irq_init(vdev); - if (ret) - goto err_irq; - - ret = pm_runtime_get_sync(vdev->device); - if (ret < 0) - goto err_rst; - - ret = vfio_platform_call_reset(vdev, &extra_dbg); - if (ret && vdev->reset_required) { - dev_warn(vdev->device, "reset driver is required and reset call failed in open (%d) %s\n", - ret, extra_dbg ? extra_dbg : ""); - goto err_rst; - } + ret = vfio_platform_call_reset(vdev, &extra_dbg); + if (WARN_ON(ret && vdev->reset_required)) { + dev_warn( + vdev->device, + "reset driver is required and reset call failed in release (%d) %s\n", + ret, extra_dbg ? extra_dbg : ""); } + pm_runtime_put(vdev->device); + vfio_platform_regions_cleanup(vdev); + vfio_platform_irq_cleanup(vdev); +} - vdev->refcnt++; +static int vfio_platform_open_device(struct vfio_device *core_vdev) +{ + struct vfio_platform_device *vdev = + container_of(core_vdev, struct vfio_platform_device, vdev); + const char *extra_dbg = NULL; + int ret; - mutex_unlock(&driver_lock); + ret = vfio_platform_regions_init(vdev); + if (ret) + return ret; + + ret = vfio_platform_irq_init(vdev); + if (ret) + goto err_irq; + + ret = pm_runtime_get_sync(vdev->device); + if (ret < 0) + goto err_rst; + + ret = vfio_platform_call_reset(vdev, &extra_dbg); + if (ret && vdev->reset_required) { + dev_warn( + vdev->device, + "reset driver is required and reset call failed in open (%d) %s\n", + ret, extra_dbg ? extra_dbg : ""); + goto err_rst; + } return 0; err_rst: @@ -284,8 +271,6 @@ err_rst: vfio_platform_irq_cleanup(vdev); err_irq: vfio_platform_regions_cleanup(vdev); -err_reg: - mutex_unlock(&driver_lock); return ret; } @@ -616,8 +601,8 @@ static int vfio_platform_mmap(struct vfio_device *core_vdev, struct vm_area_stru static const struct vfio_device_ops vfio_platform_ops = { .name = "vfio-platform", - .open = vfio_platform_open, - .release = vfio_platform_release, + .open_device = vfio_platform_open_device, + .close_device = vfio_platform_close_device, .ioctl = vfio_platform_ioctl, .read = vfio_platform_read, .write = vfio_platform_write, diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index dfb834c13659..520d2a8e8375 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -48,7 +48,6 @@ struct vfio_platform_device { u32 num_regions; struct vfio_platform_irq *irqs; u32 num_irqs; - int refcnt; struct mutex igate; const char *compat; const char *acpihid; From 2cd8b14aaa667f5c6b7918e8d769872fa9acb0ac Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 5 Aug 2021 22:19:04 -0300 Subject: [PATCH 12/37] vfio/pci: Move to the device set infrastructure PCI wants to have the usual open/close_device() logic with the slight twist that the open/close_device() must be done under a singelton lock shared by all of the vfio_devices that are in the PCI "reset group". The reset group, and thus the device set, is determined by what devices pci_reset_bus() touches, which is either the entire bus or only the slot. Rely on the core code to do everything reflck was doing and delete reflck entirely. Signed-off-by: Yishai Hadas Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/8-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 162 +++++++--------------------- drivers/vfio/pci/vfio_pci_private.h | 7 -- 2 files changed, 37 insertions(+), 132 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index fab3715d60d4..5d6db93d6c68 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -530,53 +530,40 @@ static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val) vfio_device_put(&pf_vdev->vdev); } -static void vfio_pci_release(struct vfio_device *core_vdev) +static void vfio_pci_close_device(struct vfio_device *core_vdev) { struct vfio_pci_device *vdev = container_of(core_vdev, struct vfio_pci_device, vdev); - mutex_lock(&vdev->reflck->lock); + vfio_pci_vf_token_user_add(vdev, -1); + vfio_spapr_pci_eeh_release(vdev->pdev); + vfio_pci_disable(vdev); - if (!(--vdev->refcnt)) { - vfio_pci_vf_token_user_add(vdev, -1); - vfio_spapr_pci_eeh_release(vdev->pdev); - vfio_pci_disable(vdev); - - mutex_lock(&vdev->igate); - if (vdev->err_trigger) { - eventfd_ctx_put(vdev->err_trigger); - vdev->err_trigger = NULL; - } - if (vdev->req_trigger) { - eventfd_ctx_put(vdev->req_trigger); - vdev->req_trigger = NULL; - } - mutex_unlock(&vdev->igate); + mutex_lock(&vdev->igate); + if (vdev->err_trigger) { + eventfd_ctx_put(vdev->err_trigger); + vdev->err_trigger = NULL; } - - mutex_unlock(&vdev->reflck->lock); + if (vdev->req_trigger) { + eventfd_ctx_put(vdev->req_trigger); + vdev->req_trigger = NULL; + } + mutex_unlock(&vdev->igate); } -static int vfio_pci_open(struct vfio_device *core_vdev) +static int vfio_pci_open_device(struct vfio_device *core_vdev) { struct vfio_pci_device *vdev = container_of(core_vdev, struct vfio_pci_device, vdev); int ret = 0; - mutex_lock(&vdev->reflck->lock); + ret = vfio_pci_enable(vdev); + if (ret) + return ret; - if (!vdev->refcnt) { - ret = vfio_pci_enable(vdev); - if (ret) - goto error; - - vfio_spapr_pci_eeh_open(vdev->pdev); - vfio_pci_vf_token_user_add(vdev, 1); - } - vdev->refcnt++; -error: - mutex_unlock(&vdev->reflck->lock); - return ret; + vfio_spapr_pci_eeh_open(vdev->pdev); + vfio_pci_vf_token_user_add(vdev, 1); + return 0; } static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) @@ -1870,8 +1857,8 @@ static int vfio_pci_match(struct vfio_device *core_vdev, char *buf) static const struct vfio_device_ops vfio_pci_ops = { .name = "vfio-pci", - .open = vfio_pci_open, - .release = vfio_pci_release, + .open_device = vfio_pci_open_device, + .close_device = vfio_pci_close_device, .ioctl = vfio_pci_ioctl, .read = vfio_pci_read, .write = vfio_pci_write, @@ -1880,9 +1867,6 @@ static const struct vfio_device_ops vfio_pci_ops = { .match = vfio_pci_match, }; -static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev); -static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck); - static int vfio_pci_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -2020,12 +2004,23 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&vdev->vma_list); init_rwsem(&vdev->memory_lock); - ret = vfio_pci_reflck_attach(vdev); + if (pci_is_root_bus(pdev->bus)) { + ret = vfio_assign_device_set(&vdev->vdev, vdev); + } else if (!pci_probe_reset_slot(pdev->slot)) { + ret = vfio_assign_device_set(&vdev->vdev, pdev->slot); + } else { + /* + * If there is no slot reset support for this device, the whole + * bus needs to be grouped together to support bus-wide resets. + */ + ret = vfio_assign_device_set(&vdev->vdev, pdev->bus); + } + if (ret) goto out_uninit; ret = vfio_pci_vf_init(vdev); if (ret) - goto out_reflck; + goto out_uninit; ret = vfio_pci_vga_init(vdev); if (ret) goto out_vf; @@ -2057,8 +2052,6 @@ out_power: vfio_pci_set_power_state(vdev, PCI_D0); out_vf: vfio_pci_vf_uninit(vdev); -out_reflck: - vfio_pci_reflck_put(vdev->reflck); out_uninit: vfio_uninit_group_dev(&vdev->vdev); kfree(vdev->pm_save); @@ -2077,7 +2070,6 @@ static void vfio_pci_remove(struct pci_dev *pdev) vfio_unregister_group_dev(&vdev->vdev); vfio_pci_vf_uninit(vdev); - vfio_pci_reflck_put(vdev->reflck); vfio_uninit_group_dev(&vdev->vdev); vfio_pci_vga_uninit(vdev); @@ -2153,86 +2145,6 @@ static struct pci_driver vfio_pci_driver = { .err_handler = &vfio_err_handlers, }; -static DEFINE_MUTEX(reflck_lock); - -static struct vfio_pci_reflck *vfio_pci_reflck_alloc(void) -{ - struct vfio_pci_reflck *reflck; - - reflck = kzalloc(sizeof(*reflck), GFP_KERNEL); - if (!reflck) - return ERR_PTR(-ENOMEM); - - kref_init(&reflck->kref); - mutex_init(&reflck->lock); - - return reflck; -} - -static void vfio_pci_reflck_get(struct vfio_pci_reflck *reflck) -{ - kref_get(&reflck->kref); -} - -static int vfio_pci_reflck_find(struct pci_dev *pdev, void *data) -{ - struct vfio_pci_reflck **preflck = data; - struct vfio_device *device; - struct vfio_pci_device *vdev; - - device = vfio_device_get_from_dev(&pdev->dev); - if (!device) - return 0; - - if (pci_dev_driver(pdev) != &vfio_pci_driver) { - vfio_device_put(device); - return 0; - } - - vdev = container_of(device, struct vfio_pci_device, vdev); - - if (vdev->reflck) { - vfio_pci_reflck_get(vdev->reflck); - *preflck = vdev->reflck; - vfio_device_put(device); - return 1; - } - - vfio_device_put(device); - return 0; -} - -static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev) -{ - bool slot = !pci_probe_reset_slot(vdev->pdev->slot); - - mutex_lock(&reflck_lock); - - if (pci_is_root_bus(vdev->pdev->bus) || - vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_reflck_find, - &vdev->reflck, slot) <= 0) - vdev->reflck = vfio_pci_reflck_alloc(); - - mutex_unlock(&reflck_lock); - - return PTR_ERR_OR_ZERO(vdev->reflck); -} - -static void vfio_pci_reflck_release(struct kref *kref) -{ - struct vfio_pci_reflck *reflck = container_of(kref, - struct vfio_pci_reflck, - kref); - - kfree(reflck); - mutex_unlock(&reflck_lock); -} - -static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck) -{ - kref_put_mutex(&reflck->kref, vfio_pci_reflck_release, &reflck_lock); -} - static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) { struct vfio_devices *devs = data; @@ -2254,7 +2166,7 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) vdev = container_of(device, struct vfio_pci_device, vdev); /* Fault if the device is not unused */ - if (vdev->refcnt) { + if (device->open_count) { vfio_device_put(device); return -EBUSY; } @@ -2303,7 +2215,7 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data) * - At least one of the affected devices is marked dirty via * needs_reset (such as by lack of FLR support) * Then attempt to perform that bus or slot reset. Callers are required - * to hold vdev->reflck->lock, protecting the bus/slot reset group from + * to hold vdev->dev_set->lock, protecting the bus/slot reset group from * concurrent opens. A vfio_device reference is acquired for each device * to prevent unbinds during the reset operation. * diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index bbc56c857ef0..70414b6c904d 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -83,11 +83,6 @@ struct vfio_pci_dummy_resource { struct list_head res_next; }; -struct vfio_pci_reflck { - struct kref kref; - struct mutex lock; -}; - struct vfio_pci_vf_token { struct mutex lock; uuid_t uuid; @@ -130,8 +125,6 @@ struct vfio_pci_device { bool needs_pm_restore; struct pci_saved_state *pci_saved_state; struct pci_saved_state *pm_save; - struct vfio_pci_reflck *reflck; - int refcnt; int ioeventfds_nr; struct eventfd_ctx *err_trigger; struct eventfd_ctx *req_trigger; From a882c16a2b7ef6e0ab3f0d1e41345b667893cbfd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:05 -0300 Subject: [PATCH 13/37] vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set vfio_pci_try_bus_reset() is triggering a reset of the entire_dev set if any device within it has accumulated a needs_reset. This reset can only be done once all of the drivers operating the PCI devices to be reset are in a known safe state. Make this clearer by directly operating on the dev_set instead of the vfio_pci_device. Rename the function to vfio_pci_dev_set_try_reset(). Use the device list inside the dev_set to check that all drivers are in a safe state instead of working backwards from the pci_device. The dev_set->lock directly prevents devices from joining/leaving the set, or changing their state, which further implies the pci_device cannot change drivers or that the vfio_device be freed, eliminating the need for get/put's. If a pci_device to be reset is not in the dev_set then the reset cannot be used as we can't know what the state of that driver is. Directly measure this by checking that every pci_device is in the dev_set - which effectively proves that VFIO drivers are attached to everything. Remove the odd interaction around vfio_pci_set_power_state() - have the only caller avoid its redundant vfio_pci_set_power_state() instead of avoiding it inside vfio_pci_dev_set_try_reset(). This restructuring corrects a call to pci_dev_driver() without holding the device_lock() and removes a hard wiring to &vfio_pci_driver. Signed-off-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/9-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 182 +++++++++++++++++------------------- 1 file changed, 86 insertions(+), 96 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 5d6db93d6c68..0147f04c91b2 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -223,7 +223,7 @@ no_mmap: } } -static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); +static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); static void vfio_pci_disable(struct vfio_pci_device *vdev); static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data); @@ -404,6 +404,9 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp; int i, bar; + /* For needs_reset */ + lockdep_assert_held(&vdev->vdev.dev_set->lock); + /* Stop the device from further DMA */ pci_clear_master(pdev); @@ -487,9 +490,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) out: pci_disable_device(pdev); - vfio_pci_try_bus_reset(vdev); - - if (!disable_idle_d3) + if (!vfio_pci_dev_set_try_reset(vdev->vdev.dev_set) && !disable_idle_d3) vfio_pci_set_power_state(vdev, PCI_D3hot); } @@ -2145,36 +2146,6 @@ static struct pci_driver vfio_pci_driver = { .err_handler = &vfio_err_handlers, }; -static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data) -{ - struct vfio_devices *devs = data; - struct vfio_device *device; - struct vfio_pci_device *vdev; - - if (devs->cur_index == devs->max_index) - return -ENOSPC; - - device = vfio_device_get_from_dev(&pdev->dev); - if (!device) - return -EINVAL; - - if (pci_dev_driver(pdev) != &vfio_pci_driver) { - vfio_device_put(device); - return -EBUSY; - } - - vdev = container_of(device, struct vfio_pci_device, vdev); - - /* Fault if the device is not unused */ - if (device->open_count) { - vfio_device_put(device); - return -EBUSY; - } - - devs->devices[devs->cur_index++] = vdev; - return 0; -} - static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data) { struct vfio_devices *devs = data; @@ -2208,79 +2179,98 @@ static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data) return 0; } +static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data) +{ + struct vfio_device_set *dev_set = data; + struct vfio_device *cur; + + list_for_each_entry(cur, &dev_set->device_list, dev_set_list) + if (cur->dev == &pdev->dev) + return 0; + return -EBUSY; +} + /* - * If a bus or slot reset is available for the provided device and: + * vfio-core considers a group to be viable and will create a vfio_device even + * if some devices are bound to drivers like pci-stub or pcieport. Here we + * require all PCI devices to be inside our dev_set since that ensures they stay + * put and that every driver controlling the device can co-ordinate with the + * device reset. + * + * Returns the pci_dev to pass to pci_reset_bus() if every PCI device to be + * reset is inside the dev_set, and pci_reset_bus() can succeed. NULL otherwise. + */ +static struct pci_dev * +vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set) +{ + struct pci_dev *pdev; + + lockdep_assert_held(&dev_set->lock); + + /* + * By definition all PCI devices in the dev_set share the same PCI + * reset, so any pci_dev will have the same outcomes for + * pci_probe_reset_*() and pci_reset_bus(). + */ + pdev = list_first_entry(&dev_set->device_list, struct vfio_pci_device, + vdev.dev_set_list)->pdev; + + /* pci_reset_bus() is supported */ + if (pci_probe_reset_slot(pdev->slot) && pci_probe_reset_bus(pdev->bus)) + return NULL; + + if (vfio_pci_for_each_slot_or_bus(pdev, vfio_pci_is_device_in_set, + dev_set, + !pci_probe_reset_slot(pdev->slot))) + return NULL; + return pdev; +} + +static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set) +{ + struct vfio_pci_device *cur; + bool needs_reset = false; + + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) { + /* No VFIO device in the set can have an open device FD */ + if (cur->vdev.open_count) + return false; + needs_reset |= cur->needs_reset; + } + return needs_reset; +} + +/* + * If a bus or slot reset is available for the provided dev_set and: * - All of the devices affected by that bus or slot reset are unused - * (!refcnt) * - At least one of the affected devices is marked dirty via * needs_reset (such as by lack of FLR support) - * Then attempt to perform that bus or slot reset. Callers are required - * to hold vdev->dev_set->lock, protecting the bus/slot reset group from - * concurrent opens. A vfio_device reference is acquired for each device - * to prevent unbinds during the reset operation. - * - * NB: vfio-core considers a group to be viable even if some devices are - * bound to drivers like pci-stub or pcieport. Here we require all devices - * to be bound to vfio_pci since that's the only way we can be sure they - * stay put. + * Then attempt to perform that bus or slot reset. + * Returns true if the dev_set was reset. */ -static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) +static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set) { - struct vfio_devices devs = { .cur_index = 0 }; - int i = 0, ret = -EINVAL; - bool slot = false; - struct vfio_pci_device *tmp; + struct vfio_pci_device *cur; + struct pci_dev *pdev; + int ret; - if (!pci_probe_reset_slot(vdev->pdev->slot)) - slot = true; - else if (pci_probe_reset_bus(vdev->pdev->bus)) - return; + if (!vfio_pci_dev_set_needs_reset(dev_set)) + return false; - if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs, - &i, slot) || !i) - return; + pdev = vfio_pci_dev_set_resettable(dev_set); + if (!pdev) + return false; - devs.max_index = i; - devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL); - if (!devs.devices) - return; + ret = pci_reset_bus(pdev); + if (ret) + return false; - if (vfio_pci_for_each_slot_or_bus(vdev->pdev, - vfio_pci_get_unused_devs, - &devs, slot)) - goto put_devs; - - /* Does at least one need a reset? */ - for (i = 0; i < devs.cur_index; i++) { - tmp = devs.devices[i]; - if (tmp->needs_reset) { - ret = pci_reset_bus(vdev->pdev); - break; - } + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) { + cur->needs_reset = false; + if (!disable_idle_d3) + vfio_pci_set_power_state(cur, PCI_D3hot); } - -put_devs: - for (i = 0; i < devs.cur_index; i++) { - tmp = devs.devices[i]; - - /* - * If reset was successful, affected devices no longer need - * a reset and we should return all the collateral devices - * to low power. If not successful, we either didn't reset - * the bus or timed out waiting for it, so let's not touch - * the power state. - */ - if (!ret) { - tmp->needs_reset = false; - - if (tmp != vdev && !disable_idle_d3) - vfio_pci_set_power_state(tmp, PCI_D3hot); - } - - vfio_device_put(&tmp->vdev); - } - - kfree(devs.devices); + return true; } static void __exit vfio_pci_cleanup(void) From db44c17458fb54880b9a65479e464b64c365a87d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:06 -0300 Subject: [PATCH 14/37] vfio/pci: Reorganize VFIO_DEVICE_PCI_HOT_RESET to use the device set Like vfio_pci_dev_set_try_reset() this code wants to reset all of the devices in the "reset group" which is the same membership as the device set. Instead of trying to reconstruct the device set from the PCI list go directly from the device set's device list to execute the reset. The same basic structure as vfio_pci_dev_set_try_reset() is used. The 'vfio_devices' struct is replaced with the device set linked list and we simply sweep it multiple times under the lock. This eliminates a memory allocation and get/put traffic and another improperly locked test of pci_dev_driver(). Reviewed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/10-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 213 +++++++++++++++--------------------- 1 file changed, 89 insertions(+), 124 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 0147f04c91b2..a4f44ea52fa3 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -223,9 +223,11 @@ no_mmap: } } +struct vfio_pci_group_info; static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); static void vfio_pci_disable(struct vfio_pci_device *vdev); -static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data); +static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, + struct vfio_pci_group_info *groups); /* * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND @@ -643,37 +645,11 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) return 0; } -struct vfio_pci_group_entry { - struct vfio_group *group; - int id; -}; - struct vfio_pci_group_info { int count; - struct vfio_pci_group_entry *groups; + struct vfio_group **groups; }; -static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data) -{ - struct vfio_pci_group_info *info = data; - struct iommu_group *group; - int id, i; - - group = iommu_group_get(&pdev->dev); - if (!group) - return -EPERM; - - id = iommu_group_id(group); - - for (i = 0; i < info->count; i++) - if (info->groups[i].id == id) - break; - - iommu_group_put(group); - - return (i == info->count) ? -EINVAL : 0; -} - static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot) { for (; pdev; pdev = pdev->bus->self) @@ -751,12 +727,6 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, return 0; } -struct vfio_devices { - struct vfio_pci_device **devices; - int cur_index; - int max_index; -}; - static long vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) { @@ -1125,11 +1095,10 @@ reset_info_exit: } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) { struct vfio_pci_hot_reset hdr; int32_t *group_fds; - struct vfio_pci_group_entry *groups; + struct vfio_group **groups; struct vfio_pci_group_info info; - struct vfio_devices devs = { .cur_index = 0 }; bool slot = false; - int i, group_idx, mem_idx = 0, count = 0, ret = 0; + int group_idx, count = 0, ret = 0; minsz = offsetofend(struct vfio_pci_hot_reset, count); @@ -1196,9 +1165,7 @@ reset_info_exit: break; } - groups[group_idx].group = group; - groups[group_idx].id = - vfio_external_user_iommu_id(group); + groups[group_idx] = group; } kfree(group_fds); @@ -1210,64 +1177,11 @@ reset_info_exit: info.count = hdr.count; info.groups = groups; - /* - * Test whether all the affected devices are contained - * by the set of groups provided by the user. - */ - ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, - vfio_pci_validate_devs, - &info, slot); - if (ret) - goto hot_reset_release; - - devs.max_index = count; - devs.devices = kcalloc(count, sizeof(struct vfio_device *), - GFP_KERNEL); - if (!devs.devices) { - ret = -ENOMEM; - goto hot_reset_release; - } - - /* - * We need to get memory_lock for each device, but devices - * can share mmap_lock, therefore we need to zap and hold - * the vma_lock for each device, and only then get each - * memory_lock. - */ - ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, - vfio_pci_try_zap_and_vma_lock_cb, - &devs, slot); - if (ret) - goto hot_reset_release; - - for (; mem_idx < devs.cur_index; mem_idx++) { - struct vfio_pci_device *tmp = devs.devices[mem_idx]; - - ret = down_write_trylock(&tmp->memory_lock); - if (!ret) { - ret = -EBUSY; - goto hot_reset_release; - } - mutex_unlock(&tmp->vma_lock); - } - - /* User has access, do the reset */ - ret = pci_reset_bus(vdev->pdev); + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info); hot_reset_release: - for (i = 0; i < devs.cur_index; i++) { - struct vfio_pci_device *tmp = devs.devices[i]; - - if (i < mem_idx) - up_write(&tmp->memory_lock); - else - mutex_unlock(&tmp->vma_lock); - vfio_device_put(&tmp->vdev); - } - kfree(devs.devices); - for (group_idx--; group_idx >= 0; group_idx--) - vfio_group_put_external_user(groups[group_idx].group); + vfio_group_put_external_user(groups[group_idx]); kfree(groups); return ret; @@ -2146,37 +2060,15 @@ static struct pci_driver vfio_pci_driver = { .err_handler = &vfio_err_handlers, }; -static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data) +static bool vfio_dev_in_groups(struct vfio_pci_device *vdev, + struct vfio_pci_group_info *groups) { - struct vfio_devices *devs = data; - struct vfio_device *device; - struct vfio_pci_device *vdev; + unsigned int i; - if (devs->cur_index == devs->max_index) - return -ENOSPC; - - device = vfio_device_get_from_dev(&pdev->dev); - if (!device) - return -EINVAL; - - if (pci_dev_driver(pdev) != &vfio_pci_driver) { - vfio_device_put(device); - return -EBUSY; - } - - vdev = container_of(device, struct vfio_pci_device, vdev); - - /* - * Locking multiple devices is prone to deadlock, runaway and - * unwind if we hit contention. - */ - if (!vfio_pci_zap_and_vma_lock(vdev, true)) { - vfio_device_put(device); - return -EBUSY; - } - - devs->devices[devs->cur_index++] = vdev; - return 0; + for (i = 0; i < groups->count; i++) + if (groups->groups[i] == vdev->vdev.group) + return true; + return false; } static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data) @@ -2226,6 +2118,79 @@ vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set) return pdev; } +/* + * We need to get memory_lock for each device, but devices can share mmap_lock, + * therefore we need to zap and hold the vma_lock for each device, and only then + * get each memory_lock. + */ +static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, + struct vfio_pci_group_info *groups) +{ + struct vfio_pci_device *cur_mem; + struct vfio_pci_device *cur_vma; + struct vfio_pci_device *cur; + struct pci_dev *pdev; + bool is_mem = true; + int ret; + + mutex_lock(&dev_set->lock); + cur_mem = list_first_entry(&dev_set->device_list, + struct vfio_pci_device, vdev.dev_set_list); + + pdev = vfio_pci_dev_set_resettable(dev_set); + if (!pdev) { + ret = -EINVAL; + goto err_unlock; + } + + list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) { + /* + * Test whether all the affected devices are contained by the + * set of groups provided by the user. + */ + if (!vfio_dev_in_groups(cur_vma, groups)) { + ret = -EINVAL; + goto err_undo; + } + + /* + * Locking multiple devices is prone to deadlock, runaway and + * unwind if we hit contention. + */ + if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) { + ret = -EBUSY; + goto err_undo; + } + } + cur_vma = NULL; + + list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) { + if (!down_write_trylock(&cur_mem->memory_lock)) { + ret = -EBUSY; + goto err_undo; + } + mutex_unlock(&cur_mem->vma_lock); + } + cur_mem = NULL; + + ret = pci_reset_bus(pdev); + +err_undo: + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) { + if (cur == cur_mem) + is_mem = false; + if (cur == cur_vma) + break; + if (is_mem) + up_write(&cur->memory_lock); + else + mutex_unlock(&cur->vma_lock); + } +err_unlock: + mutex_unlock(&dev_set->lock); + return ret; +} + static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set) { struct vfio_pci_device *cur; From 3cb24827147b75557bddc5b39d63897786935b14 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:07 -0300 Subject: [PATCH 15/37] vfio/mbochs: Fix close when multiple device FDs are open mbochs_close() iterates over global device state and frees it. Currently this is done every time a device FD is closed, but if multiple device FDs are open this could corrupt other still active FDs. Change this to use close_device() so it only runs on the last close. Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/11-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- samples/vfio-mdev/mbochs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index 7b2e12fe7082..c313ab4d1f4e 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -1278,7 +1278,7 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd, return -ENOTTY; } -static void mbochs_close(struct vfio_device *vdev) +static void mbochs_close_device(struct vfio_device *vdev) { struct mdev_state *mdev_state = container_of(vdev, struct mdev_state, vdev); @@ -1396,7 +1396,7 @@ static struct attribute_group *mdev_type_groups[] = { }; static const struct vfio_device_ops mbochs_dev_ops = { - .release = mbochs_close, + .close_device = mbochs_close_device, .read = mbochs_read, .write = mbochs_write, .ioctl = mbochs_ioctl, From 9b0d6b7e28a9bbbf4cee0727a299c2107047b1a5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:08 -0300 Subject: [PATCH 16/37] vfio/ap,ccw: Fix open/close when multiple device FDs are open The user can open multiple device FDs if it likes, however these open() functions call vfio_register_notifier() on some device global state. Calling vfio_register_notifier() twice in will trigger a WARN_ON from notifier_chain_register() and the first close will wrongly delete the notifier and more. Since these really want the new open/close_device() semantics just change the functions over. Reviewed-by: Cornelia Huck Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/12-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_ops.c | 8 ++++---- drivers/s390/crypto/vfio_ap_ops.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index c57d2a7f0919..7f540ad0b568 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -159,7 +159,7 @@ static int vfio_ccw_mdev_remove(struct mdev_device *mdev) return 0; } -static int vfio_ccw_mdev_open(struct mdev_device *mdev) +static int vfio_ccw_mdev_open_device(struct mdev_device *mdev) { struct vfio_ccw_private *private = dev_get_drvdata(mdev_parent_dev(mdev)); @@ -194,7 +194,7 @@ out_unregister: return ret; } -static void vfio_ccw_mdev_release(struct mdev_device *mdev) +static void vfio_ccw_mdev_close_device(struct mdev_device *mdev) { struct vfio_ccw_private *private = dev_get_drvdata(mdev_parent_dev(mdev)); @@ -638,8 +638,8 @@ static const struct mdev_parent_ops vfio_ccw_mdev_ops = { .supported_type_groups = mdev_type_groups, .create = vfio_ccw_mdev_create, .remove = vfio_ccw_mdev_remove, - .open = vfio_ccw_mdev_open, - .release = vfio_ccw_mdev_release, + .open_device = vfio_ccw_mdev_open_device, + .close_device = vfio_ccw_mdev_close_device, .read = vfio_ccw_mdev_read, .write = vfio_ccw_mdev_write, .ioctl = vfio_ccw_mdev_ioctl, diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 122c85c22469..cee5626fe0a4 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1315,7 +1315,7 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) return rc; } -static int vfio_ap_mdev_open(struct mdev_device *mdev) +static int vfio_ap_mdev_open_device(struct mdev_device *mdev) { struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); unsigned long events; @@ -1348,7 +1348,7 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev) return ret; } -static void vfio_ap_mdev_release(struct mdev_device *mdev) +static void vfio_ap_mdev_close_device(struct mdev_device *mdev) { struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); @@ -1427,8 +1427,8 @@ static const struct mdev_parent_ops vfio_ap_matrix_ops = { .mdev_attr_groups = vfio_ap_mdev_attr_groups, .create = vfio_ap_mdev_create, .remove = vfio_ap_mdev_remove, - .open = vfio_ap_mdev_open, - .release = vfio_ap_mdev_release, + .open_device = vfio_ap_mdev_open_device, + .close_device = vfio_ap_mdev_close_device, .ioctl = vfio_ap_mdev_ioctl, }; From dd574d9b728d583e30289244be139f82d0de3fb3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:09 -0300 Subject: [PATCH 17/37] vfio/gvt: Fix open/close when multiple device FDs are open The user can open multiple device FDs if it likes, however the open function calls vfio_register_notifier() on device global state. Calling vfio_register_notifier() twice will trigger a WARN_ON from notifier_chain_register() and the first close will wrongly delete the notifier and more. Since these really want the new open/close_device() semantics just change the function over. Reviewed-by: Zhenyu Wang Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/13-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/gpu/drm/i915/gvt/kvmgt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 1ac98f8aba31..7efa386449d1 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -885,7 +885,7 @@ static int intel_vgpu_group_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static int intel_vgpu_open(struct mdev_device *mdev) +static int intel_vgpu_open_device(struct mdev_device *mdev) { struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); @@ -1004,7 +1004,7 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) vgpu->handle = 0; } -static void intel_vgpu_release(struct mdev_device *mdev) +static void intel_vgpu_close_device(struct mdev_device *mdev) { struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); @@ -1753,8 +1753,8 @@ static struct mdev_parent_ops intel_vgpu_ops = { .create = intel_vgpu_create, .remove = intel_vgpu_remove, - .open = intel_vgpu_open, - .release = intel_vgpu_release, + .open_device = intel_vgpu_open_device, + .close_device = intel_vgpu_close_device, .read = intel_vgpu_read, .write = intel_vgpu_write, From eb24c1007e6852e024dc33b0dd9617b8500a1291 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Aug 2021 22:19:10 -0300 Subject: [PATCH 18/37] vfio: Remove struct vfio_device_ops open/release Nothing uses this anymore, delete it. Signed-off-by: Yishai Hadas Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/14-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/mdev/vfio_mdev.c | 22 ---------------------- drivers/vfio/vfio.c | 14 +------------- include/linux/mdev.h | 7 ------- include/linux/vfio.h | 4 ---- 4 files changed, 1 insertion(+), 46 deletions(-) diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index e12196ffd487..7a9883048216 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -37,26 +37,6 @@ static void vfio_mdev_close_device(struct vfio_device *core_vdev) parent->ops->close_device(mdev); } -static int vfio_mdev_open(struct vfio_device *core_vdev) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (unlikely(!parent->ops->open)) - return 0; - - return parent->ops->open(mdev); -} - -static void vfio_mdev_release(struct vfio_device *core_vdev) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (likely(parent->ops->release)) - parent->ops->release(mdev); -} - static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) { @@ -122,8 +102,6 @@ static const struct vfio_device_ops vfio_mdev_dev_ops = { .name = "vfio-mdev", .open_device = vfio_mdev_open_device, .close_device = vfio_mdev_close_device, - .open = vfio_mdev_open, - .release = vfio_mdev_release, .ioctl = vfio_mdev_unlocked_ioctl, .read = vfio_mdev_read, .write = vfio_mdev_write, diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 9cc17768c425..3c034fe14ccb 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1470,19 +1470,13 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) } mutex_unlock(&device->dev_set->lock); - if (device->ops->open) { - ret = device->ops->open(device); - if (ret) - goto err_close_device; - } - /* * We can't use anon_inode_getfd() because we need to modify * the f_mode flags directly to allow more than just ioctls */ fdno = ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) - goto err_release; + goto err_close_device; filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, device, O_RDWR); @@ -1509,9 +1503,6 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) err_fd: put_unused_fd(fdno); -err_release: - if (device->ops->release) - device->ops->release(device); err_close_device: mutex_lock(&device->dev_set->lock); if (device->open_count == 1 && device->ops->close_device) @@ -1659,9 +1650,6 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; - if (device->ops->release) - device->ops->release(device); - mutex_lock(&device->dev_set->lock); if (!--device->open_count && device->ops->close_device) device->ops->close_device(device); diff --git a/include/linux/mdev.h b/include/linux/mdev.h index cb5b7ed1d7c3..68427e8fadeb 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -72,11 +72,6 @@ struct device *mtype_get_parent_dev(struct mdev_type *mtype); * @mdev: mdev_device device structure which is being * destroyed * Returns integer: success (0) or error (< 0) - * @open: Open mediated device. - * @mdev: mediated device. - * Returns integer: success (0) or error (< 0) - * @release: release mediated device - * @mdev: mediated device. * @read: Read emulation callback * @mdev: mediated device structure * @buf: read buffer @@ -113,8 +108,6 @@ struct mdev_parent_ops { int (*remove)(struct mdev_device *mdev); int (*open_device)(struct mdev_device *mdev); void (*close_device)(struct mdev_device *mdev); - int (*open)(struct mdev_device *mdev); - void (*release)(struct mdev_device *mdev); ssize_t (*read)(struct mdev_device *mdev, char __user *buf, size_t count, loff_t *ppos); ssize_t (*write)(struct mdev_device *mdev, const char __user *buf, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index f0e6a72875e4..b53a9557884a 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -46,8 +46,6 @@ struct vfio_device { * * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device - * @open: Called when userspace creates new file descriptor for device - * @release: Called when userspace releases file descriptor for device * @read: Perform read(2) on device file descriptor * @write: Perform write(2) on device file descriptor * @ioctl: Perform ioctl(2) on device file descriptor, supporting VFIO_DEVICE_* @@ -62,8 +60,6 @@ struct vfio_device_ops { char *name; int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); - int (*open)(struct vfio_device *vdev); - void (*release)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, size_t count, loff_t *ppos); ssize_t (*write)(struct vfio_device *vdev, const char __user *buf, From ab78130e6e99e74c2c6d5670fa3e7f290e07c2c5 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Sun, 22 Aug 2021 12:36:43 +0800 Subject: [PATCH 19/37] vfio: platform: reset: Convert to SPDX identifier use SPDX-License-Identifier instead of a verbose license text Signed-off-by: Cai Huoqing Link: https://lore.kernel.org/r/20210822043643.2040-1-caihuoqing@baidu.com Signed-off-by: Alex Williamson --- drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c b/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c index 96064ef8f629..1131ebe4837d 100644 --- a/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c +++ b/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2017 Broadcom - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ /* From 29848a034ac749622f5ef4686f9e48d69254f4dc Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Tue, 24 Aug 2021 08:37:49 +0800 Subject: [PATCH 20/37] vfio-pci/zdev: Remove repeated verbose license text The SPDX and verbose license text are redundant, however in this case the verbose license indicates a GPL v2 only while SPDX specifies v2+. Remove the verbose license and correct SPDX to the more restricted version. Signed-off-by: Cai Huoqing Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20210824003749.1039-1-caihuoqing@baidu.com [aw: commit log] Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_zdev.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index 7b011b62c766..104fcf6658db 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -1,15 +1,10 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: GPL-2.0-only /* * VFIO ZPCI devices support * * Copyright (C) IBM Corp. 2020. All rights reserved. * Author(s): Pierre Morel * Matthew Rosato - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #include #include From ffc95d1b8edb80d2dab77f2e8a823c8d93b06419 Mon Sep 17 00:00:00 2001 From: Anthony Yznaga Date: Mon, 23 Aug 2021 09:35:50 -0700 Subject: [PATCH 21/37] vfio/type1: Fix vfio_find_dma_valid return vfio_find_dma_valid is defined to return WAITED on success if it was necessary to wait. However, the loop forgets the WAITED value returned by vfio_wait() and returns 0 in a later iteration. Fix it. Signed-off-by: Anthony Yznaga Reviewed-by: Steve Sistare Link: https://lore.kernel.org/r/1629736550-2388-1-git-send-email-anthony.yznaga@oracle.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0b4f7c174c7a..0e9217687f5c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -612,17 +612,17 @@ static int vfio_wait(struct vfio_iommu *iommu) static int vfio_find_dma_valid(struct vfio_iommu *iommu, dma_addr_t start, size_t size, struct vfio_dma **dma_p) { - int ret; + int ret = 0; do { *dma_p = vfio_find_dma(iommu, start, size); if (!*dma_p) - ret = -EINVAL; + return -EINVAL; else if (!(*dma_p)->vaddr_invalid) - ret = 0; + return ret; else ret = vfio_wait(iommu); - } while (ret > 0); + } while (ret == WAITED); return ret; } From 1e753732bda6dcf888ea0b90b2a91ac1c1a0bae9 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Mon, 23 Aug 2021 17:20:46 -0400 Subject: [PATCH 22/37] s390/vfio-ap: r/w lock for PQAP interception handler function pointer The function pointer to the interception handler for the PQAP instruction can get changed during the interception process. Let's add a semaphore to struct kvm_s390_crypto to control read/write access to the function pointer contained therein. The semaphore must be locked for write access by the vfio_ap device driver when notified that the KVM pointer has been set or cleared. It must be locked for read access by the interception framework when the PQAP instruction is intercepted. Signed-off-by: Tony Krowiak Reviewed-by: Jason Gunthorpe Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20210823212047.1476436-2-akrowiak@linux.ibm.com Signed-off-by: Alex Williamson --- arch/s390/include/asm/kvm_host.h | 8 +++----- arch/s390/kvm/kvm-s390.c | 1 + arch/s390/kvm/priv.c | 15 +++++++++------ drivers/s390/crypto/vfio_ap_ops.c | 23 +++++++++++++++++------ drivers/s390/crypto/vfio_ap_private.h | 2 +- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 161a9e12bfb8..d681ae462350 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -798,14 +798,12 @@ struct kvm_s390_cpu_model { unsigned short ibc; }; -struct kvm_s390_module_hook { - int (*hook)(struct kvm_vcpu *vcpu); - struct module *owner; -}; +typedef int (*crypto_hook)(struct kvm_vcpu *vcpu); struct kvm_s390_crypto { struct kvm_s390_crypto_cb *crycb; - struct kvm_s390_module_hook *pqap_hook; + struct rw_semaphore pqap_hook_rwsem; + crypto_hook *pqap_hook; __u32 crycbd; __u8 aes_kw; __u8 dea_kw; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4527ac7b5961..5c4f559bcd60 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2630,6 +2630,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm) { kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; kvm_s390_set_crycb_format(kvm); + init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); if (!test_kvm_facility(kvm, 76)) return; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 9928f785c677..53da4ceb16a3 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -610,6 +610,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) static int handle_pqap(struct kvm_vcpu *vcpu) { struct ap_queue_status status = {}; + crypto_hook pqap_hook; unsigned long reg0; int ret; uint8_t fc; @@ -654,18 +655,20 @@ static int handle_pqap(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* - * Verify that the hook callback is registered, lock the owner - * and call the hook. + * If the hook callback is registered, there will be a pointer to the + * hook function pointer in the kvm_s390_crypto structure. Lock the + * owner, retrieve the hook function pointer and call the hook. */ + down_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem); if (vcpu->kvm->arch.crypto.pqap_hook) { - if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner)) - return -EOPNOTSUPP; - ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu); - module_put(vcpu->kvm->arch.crypto.pqap_hook->owner); + pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook; + ret = pqap_hook(vcpu); if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000) kvm_s390_set_psw_cc(vcpu, 3); + up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem); return ret; } + up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem); /* * A vfio_driver must register a hook. * No hook means no driver to enable the SIE CRYCB and no queues. diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index cee5626fe0a4..439ca7768eb7 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -352,8 +352,7 @@ static int vfio_ap_mdev_create(struct mdev_device *mdev) vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix); init_waitqueue_head(&matrix_mdev->wait_for_kvm); mdev_set_drvdata(mdev, matrix_mdev); - matrix_mdev->pqap_hook.hook = handle_pqap; - matrix_mdev->pqap_hook.owner = THIS_MODULE; + matrix_mdev->pqap_hook = handle_pqap; mutex_lock(&matrix_dev->lock); list_add(&matrix_mdev->node, &matrix_dev->mdev_list); mutex_unlock(&matrix_dev->lock); @@ -1115,15 +1114,20 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, } kvm_get_kvm(kvm); + matrix_mdev->kvm = kvm; matrix_mdev->kvm_busy = true; mutex_unlock(&matrix_dev->lock); + + down_write(&matrix_mdev->kvm->arch.crypto.pqap_hook_rwsem); + kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; + up_write(&matrix_mdev->kvm->arch.crypto.pqap_hook_rwsem); + kvm_arch_crypto_set_masks(kvm, matrix_mdev->matrix.apm, matrix_mdev->matrix.aqm, matrix_mdev->matrix.adm); + mutex_lock(&matrix_dev->lock); - kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; - matrix_mdev->kvm = kvm; matrix_mdev->kvm_busy = false; wake_up_all(&matrix_mdev->wait_for_kvm); } @@ -1189,10 +1193,17 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) if (matrix_mdev->kvm) { matrix_mdev->kvm_busy = true; mutex_unlock(&matrix_dev->lock); - kvm_arch_crypto_clear_masks(matrix_mdev->kvm); + + if (matrix_mdev->kvm->arch.crypto.crycbd) { + down_write(&matrix_mdev->kvm->arch.crypto.pqap_hook_rwsem); + matrix_mdev->kvm->arch.crypto.pqap_hook = NULL; + up_write(&matrix_mdev->kvm->arch.crypto.pqap_hook_rwsem); + + kvm_arch_crypto_clear_masks(matrix_mdev->kvm); + } + mutex_lock(&matrix_dev->lock); vfio_ap_mdev_reset_queues(matrix_mdev->mdev); - matrix_mdev->kvm->arch.crypto.pqap_hook = NULL; kvm_put_kvm(matrix_mdev->kvm); matrix_mdev->kvm = NULL; matrix_mdev->kvm_busy = false; diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index f82a6396acae..e12218e5a629 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -86,7 +86,7 @@ struct ap_matrix_mdev { bool kvm_busy; wait_queue_head_t wait_for_kvm; struct kvm *kvm; - struct kvm_s390_module_hook pqap_hook; + crypto_hook pqap_hook; struct mdev_device *mdev; }; From 86956e70761b3292156d668e87126844334dd71b Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Mon, 23 Aug 2021 17:20:47 -0400 Subject: [PATCH 23/37] s390/vfio-ap: replace open coded locks for VFIO_GROUP_NOTIFY_SET_KVM notification It was pointed out during an unrelated patch review that locks should not be open coded - i.e., writing the algorithm of a standard lock in a function instead of using a lock from the standard library. The setting and testing of a busy flag and sleeping on a wait_event is the same thing a lock does. The open coded locks are invisible to lockdep, so potential locking problems are not detected. This patch removes the open coded locks used during VFIO_GROUP_NOTIFY_SET_KVM notification. The busy flag and wait queue were introduced to resolve a possible circular locking dependency reported by lockdep when starting a secure execution guest configured with AP adapters and domains. Reversing the order in which the kvm->lock mutex and matrix_dev->lock mutex are locked resolves the issue reported by lockdep, thus enabling the removal of the open coded locks. Signed-off-by: Tony Krowiak Acked-by: Halil Pasic Link: https://lore.kernel.org/r/20210823212047.1476436-3-akrowiak@linux.ibm.com Signed-off-by: Alex Williamson --- arch/s390/kvm/kvm-s390.c | 31 +++++- drivers/s390/crypto/vfio_ap_ops.c | 132 ++++++++------------------ drivers/s390/crypto/vfio_ap_private.h | 2 - 3 files changed, 67 insertions(+), 98 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5c4f559bcd60..efda0615741f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2559,12 +2559,26 @@ static void kvm_s390_set_crycb_format(struct kvm *kvm) kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; } +/* + * kvm_arch_crypto_set_masks + * + * @kvm: pointer to the target guest's KVM struct containing the crypto masks + * to be set. + * @apm: the mask identifying the accessible AP adapters + * @aqm: the mask identifying the accessible AP domains + * @adm: the mask identifying the accessible AP control domains + * + * Set the masks that identify the adapters, domains and control domains to + * which the KVM guest is granted access. + * + * Note: The kvm->lock mutex must be locked by the caller before invoking this + * function. + */ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm) { struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; - mutex_lock(&kvm->lock); kvm_s390_vcpu_block_all(kvm); switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { @@ -2595,13 +2609,23 @@ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, /* recreate the shadow crycb for each vcpu */ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); kvm_s390_vcpu_unblock_all(kvm); - mutex_unlock(&kvm->lock); } EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); +/* + * kvm_arch_crypto_clear_masks + * + * @kvm: pointer to the target guest's KVM struct containing the crypto masks + * to be cleared. + * + * Clear the masks that identify the adapters, domains and control domains to + * which the KVM guest is granted access. + * + * Note: The kvm->lock mutex must be locked by the caller before invoking this + * function. + */ void kvm_arch_crypto_clear_masks(struct kvm *kvm) { - mutex_lock(&kvm->lock); kvm_s390_vcpu_block_all(kvm); memset(&kvm->arch.crypto.crycb->apcb0, 0, @@ -2613,7 +2637,6 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm) /* recreate the shadow crycb for each vcpu */ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); kvm_s390_vcpu_unblock_all(kvm); - mutex_unlock(&kvm->lock); } EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 439ca7768eb7..c46937de5758 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -294,15 +294,6 @@ static int handle_pqap(struct kvm_vcpu *vcpu) matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook, struct ap_matrix_mdev, pqap_hook); - /* - * If the KVM pointer is in the process of being set, wait until the - * process has completed. - */ - wait_event_cmd(matrix_mdev->wait_for_kvm, - !matrix_mdev->kvm_busy, - mutex_unlock(&matrix_dev->lock), - mutex_lock(&matrix_dev->lock)); - /* If the there is no guest using the mdev, there is nothing to do */ if (!matrix_mdev->kvm) goto out_unlock; @@ -350,7 +341,6 @@ static int vfio_ap_mdev_create(struct mdev_device *mdev) matrix_mdev->mdev = mdev; vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix); - init_waitqueue_head(&matrix_mdev->wait_for_kvm); mdev_set_drvdata(mdev, matrix_mdev); matrix_mdev->pqap_hook = handle_pqap; mutex_lock(&matrix_dev->lock); @@ -619,11 +609,8 @@ static ssize_t assign_adapter_store(struct device *dev, mutex_lock(&matrix_dev->lock); - /* - * If the KVM pointer is in flux or the guest is running, disallow - * un-assignment of adapter - */ - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) { + /* If the KVM guest is running, disallow assignment of adapter */ + if (matrix_mdev->kvm) { ret = -EBUSY; goto done; } @@ -692,11 +679,8 @@ static ssize_t unassign_adapter_store(struct device *dev, mutex_lock(&matrix_dev->lock); - /* - * If the KVM pointer is in flux or the guest is running, disallow - * un-assignment of adapter - */ - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) { + /* If the KVM guest is running, disallow unassignment of adapter */ + if (matrix_mdev->kvm) { ret = -EBUSY; goto done; } @@ -782,11 +766,8 @@ static ssize_t assign_domain_store(struct device *dev, mutex_lock(&matrix_dev->lock); - /* - * If the KVM pointer is in flux or the guest is running, disallow - * assignment of domain - */ - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) { + /* If the KVM guest is running, disallow assignment of domain */ + if (matrix_mdev->kvm) { ret = -EBUSY; goto done; } @@ -850,11 +831,8 @@ static ssize_t unassign_domain_store(struct device *dev, mutex_lock(&matrix_dev->lock); - /* - * If the KVM pointer is in flux or the guest is running, disallow - * un-assignment of domain - */ - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) { + /* If the KVM guest is running, disallow unassignment of domain */ + if (matrix_mdev->kvm) { ret = -EBUSY; goto done; } @@ -904,11 +882,8 @@ static ssize_t assign_control_domain_store(struct device *dev, mutex_lock(&matrix_dev->lock); - /* - * If the KVM pointer is in flux or the guest is running, disallow - * assignment of control domain. - */ - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) { + /* If the KVM guest is running, disallow assignment of control domain */ + if (matrix_mdev->kvm) { ret = -EBUSY; goto done; } @@ -963,11 +938,8 @@ static ssize_t unassign_control_domain_store(struct device *dev, mutex_lock(&matrix_dev->lock); - /* - * If the KVM pointer is in flux or the guest is running, disallow - * un-assignment of control domain. - */ - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) { + /* If a KVM guest is running, disallow unassignment of control domain */ + if (matrix_mdev->kvm) { ret = -EBUSY; goto done; } @@ -1108,28 +1080,30 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, struct ap_matrix_mdev *m; if (kvm->arch.crypto.crycbd) { + down_write(&kvm->arch.crypto.pqap_hook_rwsem); + kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; + up_write(&kvm->arch.crypto.pqap_hook_rwsem); + + mutex_lock(&kvm->lock); + mutex_lock(&matrix_dev->lock); + list_for_each_entry(m, &matrix_dev->mdev_list, node) { - if (m != matrix_mdev && m->kvm == kvm) + if (m != matrix_mdev && m->kvm == kvm) { + mutex_unlock(&kvm->lock); + mutex_unlock(&matrix_dev->lock); return -EPERM; + } } kvm_get_kvm(kvm); matrix_mdev->kvm = kvm; - matrix_mdev->kvm_busy = true; - mutex_unlock(&matrix_dev->lock); - - down_write(&matrix_mdev->kvm->arch.crypto.pqap_hook_rwsem); - kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; - up_write(&matrix_mdev->kvm->arch.crypto.pqap_hook_rwsem); - kvm_arch_crypto_set_masks(kvm, matrix_mdev->matrix.apm, matrix_mdev->matrix.aqm, matrix_mdev->matrix.adm); - mutex_lock(&matrix_dev->lock); - matrix_mdev->kvm_busy = false; - wake_up_all(&matrix_mdev->wait_for_kvm); + mutex_unlock(&kvm->lock); + mutex_unlock(&matrix_dev->lock); } return 0; @@ -1179,35 +1153,24 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, * done under the @matrix_mdev->lock. * */ -static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) +static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev, + struct kvm *kvm) { - /* - * If the KVM pointer is in the process of being set, wait until the - * process has completed. - */ - wait_event_cmd(matrix_mdev->wait_for_kvm, - !matrix_mdev->kvm_busy, - mutex_unlock(&matrix_dev->lock), - mutex_lock(&matrix_dev->lock)); - - if (matrix_mdev->kvm) { - matrix_mdev->kvm_busy = true; - mutex_unlock(&matrix_dev->lock); - - if (matrix_mdev->kvm->arch.crypto.crycbd) { - down_write(&matrix_mdev->kvm->arch.crypto.pqap_hook_rwsem); - matrix_mdev->kvm->arch.crypto.pqap_hook = NULL; - up_write(&matrix_mdev->kvm->arch.crypto.pqap_hook_rwsem); - - kvm_arch_crypto_clear_masks(matrix_mdev->kvm); - } + if (kvm && kvm->arch.crypto.crycbd) { + down_write(&kvm->arch.crypto.pqap_hook_rwsem); + kvm->arch.crypto.pqap_hook = NULL; + up_write(&kvm->arch.crypto.pqap_hook_rwsem); + mutex_lock(&kvm->lock); mutex_lock(&matrix_dev->lock); + + kvm_arch_crypto_clear_masks(kvm); vfio_ap_mdev_reset_queues(matrix_mdev->mdev); - kvm_put_kvm(matrix_mdev->kvm); + kvm_put_kvm(kvm); matrix_mdev->kvm = NULL; - matrix_mdev->kvm_busy = false; - wake_up_all(&matrix_mdev->wait_for_kvm); + + mutex_unlock(&kvm->lock); + mutex_unlock(&matrix_dev->lock); } } @@ -1220,16 +1183,13 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, if (action != VFIO_GROUP_NOTIFY_SET_KVM) return NOTIFY_OK; - mutex_lock(&matrix_dev->lock); matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier); if (!data) - vfio_ap_mdev_unset_kvm(matrix_mdev); + vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm); else if (vfio_ap_mdev_set_kvm(matrix_mdev, data)) notify_rc = NOTIFY_DONE; - mutex_unlock(&matrix_dev->lock); - return notify_rc; } @@ -1363,14 +1323,11 @@ static void vfio_ap_mdev_close_device(struct mdev_device *mdev) { struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); - mutex_lock(&matrix_dev->lock); - vfio_ap_mdev_unset_kvm(matrix_mdev); - mutex_unlock(&matrix_dev->lock); - vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &matrix_mdev->iommu_notifier); vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &matrix_mdev->group_notifier); + vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm); module_put(THIS_MODULE); } @@ -1412,15 +1369,6 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev, break; } - /* - * If the KVM pointer is in the process of being set, wait until - * the process has completed. - */ - wait_event_cmd(matrix_mdev->wait_for_kvm, - !matrix_mdev->kvm_busy, - mutex_unlock(&matrix_dev->lock), - mutex_lock(&matrix_dev->lock)); - ret = vfio_ap_mdev_reset_queues(mdev); break; default: diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index e12218e5a629..22d2e0ca3ae5 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -83,8 +83,6 @@ struct ap_matrix_mdev { struct ap_matrix matrix; struct notifier_block group_notifier; struct notifier_block iommu_notifier; - bool kvm_busy; - wait_queue_head_t wait_for_kvm; struct kvm *kvm; crypto_hook pqap_hook; struct mdev_device *mdev; From eb0feefd4c025b2697464d141f7ff178095f34df Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Aug 2021 11:42:04 -0300 Subject: [PATCH 24/37] vfio/ap_ops: Convert to use vfio_register_group_dev() This is straightforward conversion, the ap_matrix_mdev is actually serving as the vfio_device and we can replace all the mdev_get_drvdata()'s with a simple container_of() or a dev_get_drvdata() for sysfs paths. Cc: Alex Williamson Cc: Cornelia Huck Cc: kvm@vger.kernel.org Cc: Christoph Hellwig Reviewed-by: Tony Krowiak Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v4-0203a4ab0596+f7-vfio_ap_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/s390/crypto/vfio_ap_ops.c | 155 +++++++++++++++----------- drivers/s390/crypto/vfio_ap_private.h | 2 + 2 files changed, 91 insertions(+), 66 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index c46937de5758..2347808fa3e4 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -24,8 +24,9 @@ #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" -static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev); +static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev); static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); +static const struct vfio_device_ops vfio_ap_matrix_dev_ops; static int match_apqn(struct device *dev, const void *data) { @@ -326,43 +327,57 @@ static void vfio_ap_matrix_init(struct ap_config_info *info, matrix->adm_max = info->apxa ? info->Nd : 15; } -static int vfio_ap_mdev_create(struct mdev_device *mdev) +static int vfio_ap_mdev_probe(struct mdev_device *mdev) { struct ap_matrix_mdev *matrix_mdev; + int ret; if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0)) return -EPERM; matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL); if (!matrix_mdev) { - atomic_inc(&matrix_dev->available_instances); - return -ENOMEM; + ret = -ENOMEM; + goto err_dec_available; } + vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev, + &vfio_ap_matrix_dev_ops); matrix_mdev->mdev = mdev; vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix); - mdev_set_drvdata(mdev, matrix_mdev); matrix_mdev->pqap_hook = handle_pqap; mutex_lock(&matrix_dev->lock); list_add(&matrix_mdev->node, &matrix_dev->mdev_list); mutex_unlock(&matrix_dev->lock); + ret = vfio_register_group_dev(&matrix_mdev->vdev); + if (ret) + goto err_list; + dev_set_drvdata(&mdev->dev, matrix_mdev); return 0; + +err_list: + mutex_lock(&matrix_dev->lock); + list_del(&matrix_mdev->node); + mutex_unlock(&matrix_dev->lock); + kfree(matrix_mdev); +err_dec_available: + atomic_inc(&matrix_dev->available_instances); + return ret; } -static int vfio_ap_mdev_remove(struct mdev_device *mdev) +static void vfio_ap_mdev_remove(struct mdev_device *mdev) { - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev); + + vfio_unregister_group_dev(&matrix_mdev->vdev); mutex_lock(&matrix_dev->lock); - vfio_ap_mdev_reset_queues(mdev); + vfio_ap_mdev_reset_queues(matrix_mdev); list_del(&matrix_mdev->node); kfree(matrix_mdev); - mdev_set_drvdata(mdev, NULL); atomic_inc(&matrix_dev->available_instances); mutex_unlock(&matrix_dev->lock); - - return 0; } static ssize_t name_show(struct mdev_type *mtype, @@ -604,8 +619,7 @@ static ssize_t assign_adapter_store(struct device *dev, { int ret; unsigned long apid; - struct mdev_device *mdev = mdev_from_dev(dev); - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&matrix_dev->lock); @@ -674,8 +688,7 @@ static ssize_t unassign_adapter_store(struct device *dev, { int ret; unsigned long apid; - struct mdev_device *mdev = mdev_from_dev(dev); - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&matrix_dev->lock); @@ -760,8 +773,7 @@ static ssize_t assign_domain_store(struct device *dev, { int ret; unsigned long apqi; - struct mdev_device *mdev = mdev_from_dev(dev); - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); unsigned long max_apqi = matrix_mdev->matrix.aqm_max; mutex_lock(&matrix_dev->lock); @@ -826,8 +838,7 @@ static ssize_t unassign_domain_store(struct device *dev, { int ret; unsigned long apqi; - struct mdev_device *mdev = mdev_from_dev(dev); - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&matrix_dev->lock); @@ -877,8 +888,7 @@ static ssize_t assign_control_domain_store(struct device *dev, { int ret; unsigned long id; - struct mdev_device *mdev = mdev_from_dev(dev); - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&matrix_dev->lock); @@ -932,8 +942,7 @@ static ssize_t unassign_control_domain_store(struct device *dev, { int ret; unsigned long domid; - struct mdev_device *mdev = mdev_from_dev(dev); - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); unsigned long max_domid = matrix_mdev->matrix.adm_max; mutex_lock(&matrix_dev->lock); @@ -968,8 +977,7 @@ static ssize_t control_domains_show(struct device *dev, int nchars = 0; int n; char *bufpos = buf; - struct mdev_device *mdev = mdev_from_dev(dev); - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); unsigned long max_domid = matrix_mdev->matrix.adm_max; mutex_lock(&matrix_dev->lock); @@ -987,8 +995,7 @@ static DEVICE_ATTR_RO(control_domains); static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mdev_device *mdev = mdev_from_dev(dev); - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); char *bufpos = buf; unsigned long apid; unsigned long apqi; @@ -1165,7 +1172,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev, mutex_lock(&matrix_dev->lock); kvm_arch_crypto_clear_masks(kvm); - vfio_ap_mdev_reset_queues(matrix_mdev->mdev); + vfio_ap_mdev_reset_queues(matrix_mdev); kvm_put_kvm(kvm); matrix_mdev->kvm = NULL; @@ -1259,13 +1266,12 @@ free_resources: return ret; } -static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) +static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev) { int ret; int rc = 0; unsigned long apid, apqi; struct vfio_ap_queue *q; - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, matrix_mdev->matrix.apm_max + 1) { @@ -1286,49 +1292,45 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) return rc; } -static int vfio_ap_mdev_open_device(struct mdev_device *mdev) +static int vfio_ap_mdev_open_device(struct vfio_device *vdev) { - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = + container_of(vdev, struct ap_matrix_mdev, vdev); unsigned long events; int ret; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier; events = VFIO_GROUP_NOTIFY_SET_KVM; - ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, + ret = vfio_register_notifier(vdev->dev, VFIO_GROUP_NOTIFY, &events, &matrix_mdev->group_notifier); - if (ret) { - module_put(THIS_MODULE); + if (ret) return ret; - } matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier; events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, + ret = vfio_register_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, &events, &matrix_mdev->iommu_notifier); - if (!ret) - return ret; + if (ret) + goto out_unregister_group; + return 0; - vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, +out_unregister_group: + vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY, &matrix_mdev->group_notifier); - module_put(THIS_MODULE); return ret; } -static void vfio_ap_mdev_close_device(struct mdev_device *mdev) +static void vfio_ap_mdev_close_device(struct vfio_device *vdev) { - struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); + struct ap_matrix_mdev *matrix_mdev = + container_of(vdev, struct ap_matrix_mdev, vdev); - vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, + vfio_unregister_notifier(vdev->dev, VFIO_IOMMU_NOTIFY, &matrix_mdev->iommu_notifier); - vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, + vfio_unregister_notifier(vdev->dev, VFIO_GROUP_NOTIFY, &matrix_mdev->group_notifier); vfio_ap_mdev_unset_kvm(matrix_mdev, matrix_mdev->kvm); - module_put(THIS_MODULE); } static int vfio_ap_mdev_get_device_info(unsigned long arg) @@ -1351,11 +1353,12 @@ static int vfio_ap_mdev_get_device_info(unsigned long arg) return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } -static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev, +static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev, unsigned int cmd, unsigned long arg) { + struct ap_matrix_mdev *matrix_mdev = + container_of(vdev, struct ap_matrix_mdev, vdev); int ret; - struct ap_matrix_mdev *matrix_mdev; mutex_lock(&matrix_dev->lock); switch (cmd) { @@ -1363,13 +1366,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev, ret = vfio_ap_mdev_get_device_info(arg); break; case VFIO_DEVICE_RESET: - matrix_mdev = mdev_get_drvdata(mdev); - if (WARN(!matrix_mdev, "Driver data missing from mdev!!")) { - ret = -EINVAL; - break; - } - - ret = vfio_ap_mdev_reset_queues(mdev); + ret = vfio_ap_mdev_reset_queues(matrix_mdev); break; default: ret = -EOPNOTSUPP; @@ -1380,25 +1377,51 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev, return ret; } +static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { + .open_device = vfio_ap_mdev_open_device, + .close_device = vfio_ap_mdev_close_device, + .ioctl = vfio_ap_mdev_ioctl, +}; + +static struct mdev_driver vfio_ap_matrix_driver = { + .driver = { + .name = "vfio_ap_mdev", + .owner = THIS_MODULE, + .mod_name = KBUILD_MODNAME, + .dev_groups = vfio_ap_mdev_attr_groups, + }, + .probe = vfio_ap_mdev_probe, + .remove = vfio_ap_mdev_remove, +}; + static const struct mdev_parent_ops vfio_ap_matrix_ops = { .owner = THIS_MODULE, + .device_driver = &vfio_ap_matrix_driver, .supported_type_groups = vfio_ap_mdev_type_groups, - .mdev_attr_groups = vfio_ap_mdev_attr_groups, - .create = vfio_ap_mdev_create, - .remove = vfio_ap_mdev_remove, - .open_device = vfio_ap_mdev_open_device, - .close_device = vfio_ap_mdev_close_device, - .ioctl = vfio_ap_mdev_ioctl, }; int vfio_ap_mdev_register(void) { + int ret; + atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT); - return mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops); + ret = mdev_register_driver(&vfio_ap_matrix_driver); + if (ret) + return ret; + + ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops); + if (ret) + goto err_driver; + return 0; + +err_driver: + mdev_unregister_driver(&vfio_ap_matrix_driver); + return ret; } void vfio_ap_mdev_unregister(void) { mdev_unregister_device(&matrix_dev->device); + mdev_unregister_driver(&vfio_ap_matrix_driver); } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index 22d2e0ca3ae5..77760e2b546f 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "ap_bus.h" @@ -79,6 +80,7 @@ struct ap_matrix { * @kvm: the struct holding guest's state */ struct ap_matrix_mdev { + struct vfio_device vdev; struct list_head node; struct ap_matrix matrix; struct notifier_block group_notifier; From 1cbd70fe37870b938463fd0a4a07e45fc4a3db3c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:00 +0300 Subject: [PATCH 25/37] vfio/pci: Rename vfio_pci.c to vfio_pci_core.c This is a preparation patch for separating the vfio_pci driver to a subsystem driver and a generic pci driver. This patch doesn't change any logic. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-2-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/Makefile | 2 +- drivers/vfio/pci/{vfio_pci.c => vfio_pci_core.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename drivers/vfio/pci/{vfio_pci.c => vfio_pci_core.c} (100%) diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 3ff42093962f..66a40488e967 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o +vfio-pci-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci_core.c similarity index 100% rename from drivers/vfio/pci/vfio_pci.c rename to drivers/vfio/pci/vfio_pci_core.c From 9a389938695a068a3149c2d21a16c34b63ca002f Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:01 +0300 Subject: [PATCH 26/37] vfio/pci: Rename vfio_pci_private.h to vfio_pci_core.h This is a preparation patch for separating the vfio_pci driver to a subsystem driver and a generic pci driver. This patch doesn't change any logic. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-3-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 2 +- drivers/vfio/pci/vfio_pci_core.c | 2 +- drivers/vfio/pci/{vfio_pci_private.h => vfio_pci_core.h} | 6 +++--- drivers/vfio/pci/vfio_pci_igd.c | 2 +- drivers/vfio/pci/vfio_pci_intrs.c | 2 +- drivers/vfio/pci/vfio_pci_rdwr.c | 2 +- drivers/vfio/pci/vfio_pci_zdev.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) rename drivers/vfio/pci/{vfio_pci_private.h => vfio_pci_core.h} (98%) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 70e28efbc51f..0bc269c0b03f 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -26,7 +26,7 @@ #include #include -#include "vfio_pci_private.h" +#include "vfio_pci_core.h" /* Fake capability ID for standard config space */ #define PCI_CAP_ID_BASIC 0 diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index a4f44ea52fa3..2a5dca0823c4 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -28,7 +28,7 @@ #include #include -#include "vfio_pci_private.h" +#include "vfio_pci_core.h" #define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson " diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_core.h similarity index 98% rename from drivers/vfio/pci/vfio_pci_private.h rename to drivers/vfio/pci/vfio_pci_core.h index 70414b6c904d..ef26e781961d 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_core.h @@ -15,8 +15,8 @@ #include #include -#ifndef VFIO_PCI_PRIVATE_H -#define VFIO_PCI_PRIVATE_H +#ifndef VFIO_PCI_CORE_H +#define VFIO_PCI_CORE_H #define VFIO_PCI_OFFSET_SHIFT 40 @@ -205,4 +205,4 @@ static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, } #endif -#endif /* VFIO_PCI_PRIVATE_H */ +#endif /* VFIO_PCI_CORE_H */ diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index aa0a29fd2762..d57c409b4033 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -15,7 +15,7 @@ #include #include -#include "vfio_pci_private.h" +#include "vfio_pci_core.h" #define OPREGION_SIGNATURE "IntelGraphicsMem" #define OPREGION_SIZE (8 * 1024) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 869dce5f134d..df1e8c8c274c 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -20,7 +20,7 @@ #include #include -#include "vfio_pci_private.h" +#include "vfio_pci_core.h" /* * INTx diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index a0b5fc8e46f4..667e82726e75 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -17,7 +17,7 @@ #include #include -#include "vfio_pci_private.h" +#include "vfio_pci_core.h" #ifdef __LITTLE_ENDIAN #define vfio_ioread64 ioread64 diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index 7b011b62c766..ecae0c3d95a0 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -19,7 +19,7 @@ #include #include -#include "vfio_pci_private.h" +#include "vfio_pci_core.h" /* * Add the Base PCI Function information to the device info region. From 536475109c82841126ca341ef0f138e7298880c1 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:02 +0300 Subject: [PATCH 27/37] vfio/pci: Rename vfio_pci_device to vfio_pci_core_device This is a preparation patch for separating the vfio_pci driver to a subsystem driver and a generic pci driver. This patch doesn't change any logic. The new vfio_pci_core_device structure will be the main structure of the core driver and later on vfio_pci_device structure will be the main structure of the generic vfio_pci driver. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-4-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 68 ++++++++--------- drivers/vfio/pci/vfio_pci_core.c | 118 +++++++++++++++-------------- drivers/vfio/pci/vfio_pci_core.h | 52 ++++++------- drivers/vfio/pci/vfio_pci_igd.c | 17 +++-- drivers/vfio/pci/vfio_pci_intrs.c | 40 +++++----- drivers/vfio/pci/vfio_pci_rdwr.c | 16 ++-- drivers/vfio/pci/vfio_pci_zdev.c | 2 +- 7 files changed, 158 insertions(+), 155 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 0bc269c0b03f..1f034f768a27 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -108,9 +108,9 @@ static const u16 pci_ext_cap_length[PCI_EXT_CAP_ID_MAX + 1] = { struct perm_bits { u8 *virt; /* read/write virtual data, not hw */ u8 *write; /* writeable bits */ - int (*readfn)(struct vfio_pci_device *vdev, int pos, int count, + int (*readfn)(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 *val); - int (*writefn)(struct vfio_pci_device *vdev, int pos, int count, + int (*writefn)(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val); }; @@ -171,7 +171,7 @@ static int vfio_user_config_write(struct pci_dev *pdev, int offset, return ret; } -static int vfio_default_config_read(struct vfio_pci_device *vdev, int pos, +static int vfio_default_config_read(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 *val) { @@ -197,7 +197,7 @@ static int vfio_default_config_read(struct vfio_pci_device *vdev, int pos, return count; } -static int vfio_default_config_write(struct vfio_pci_device *vdev, int pos, +static int vfio_default_config_write(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val) { @@ -244,7 +244,7 @@ static int vfio_default_config_write(struct vfio_pci_device *vdev, int pos, } /* Allow direct read from hardware, except for capability next pointer */ -static int vfio_direct_config_read(struct vfio_pci_device *vdev, int pos, +static int vfio_direct_config_read(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 *val) { @@ -269,7 +269,7 @@ static int vfio_direct_config_read(struct vfio_pci_device *vdev, int pos, } /* Raw access skips any kind of virtualization */ -static int vfio_raw_config_write(struct vfio_pci_device *vdev, int pos, +static int vfio_raw_config_write(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val) { @@ -282,7 +282,7 @@ static int vfio_raw_config_write(struct vfio_pci_device *vdev, int pos, return count; } -static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos, +static int vfio_raw_config_read(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 *val) { @@ -296,7 +296,7 @@ static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos, } /* Virt access uses only virtualization */ -static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos, +static int vfio_virt_config_write(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val) { @@ -304,7 +304,7 @@ static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos, return count; } -static int vfio_virt_config_read(struct vfio_pci_device *vdev, int pos, +static int vfio_virt_config_read(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 *val) { @@ -396,7 +396,7 @@ static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write) } /* Caller should hold memory_lock semaphore */ -bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev) +bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]); @@ -413,7 +413,7 @@ bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev) * Restore the *real* BARs after we detect a FLR or backdoor reset. * (backdoor = some device specific technique that we didn't catch) */ -static void vfio_bar_restore(struct vfio_pci_device *vdev) +static void vfio_bar_restore(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; u32 *rbar = vdev->rbar; @@ -460,7 +460,7 @@ static __le32 vfio_generate_bar_flags(struct pci_dev *pdev, int bar) * Pretend we're hardware and tweak the values of the *virtual* PCI BARs * to reflect the hardware capabilities. This implements BAR sizing. */ -static void vfio_bar_fixup(struct vfio_pci_device *vdev) +static void vfio_bar_fixup(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; int i; @@ -514,7 +514,7 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev) vdev->bardirty = false; } -static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos, +static int vfio_basic_config_read(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 *val) { @@ -536,7 +536,7 @@ static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos, } /* Test whether BARs match the value we think they should contain */ -static bool vfio_need_bar_restore(struct vfio_pci_device *vdev) +static bool vfio_need_bar_restore(struct vfio_pci_core_device *vdev) { int i = 0, pos = PCI_BASE_ADDRESS_0, ret; u32 bar; @@ -552,7 +552,7 @@ static bool vfio_need_bar_restore(struct vfio_pci_device *vdev) return false; } -static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, +static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val) { @@ -692,7 +692,7 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm) return 0; } -static int vfio_pm_config_write(struct vfio_pci_device *vdev, int pos, +static int vfio_pm_config_write(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val) { @@ -747,7 +747,7 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm) return 0; } -static int vfio_vpd_config_write(struct vfio_pci_device *vdev, int pos, +static int vfio_vpd_config_write(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val) { @@ -829,7 +829,7 @@ static int __init init_pci_cap_pcix_perm(struct perm_bits *perm) return 0; } -static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, +static int vfio_exp_config_write(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val) { @@ -913,7 +913,7 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) return 0; } -static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos, +static int vfio_af_config_write(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val) { @@ -1072,7 +1072,7 @@ int __init vfio_pci_init_perm_bits(void) return ret; } -static int vfio_find_cap_start(struct vfio_pci_device *vdev, int pos) +static int vfio_find_cap_start(struct vfio_pci_core_device *vdev, int pos) { u8 cap; int base = (pos >= PCI_CFG_SPACE_SIZE) ? PCI_CFG_SPACE_SIZE : @@ -1089,7 +1089,7 @@ static int vfio_find_cap_start(struct vfio_pci_device *vdev, int pos) return pos; } -static int vfio_msi_config_read(struct vfio_pci_device *vdev, int pos, +static int vfio_msi_config_read(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 *val) { @@ -1109,7 +1109,7 @@ static int vfio_msi_config_read(struct vfio_pci_device *vdev, int pos, return vfio_default_config_read(vdev, pos, count, perm, offset, val); } -static int vfio_msi_config_write(struct vfio_pci_device *vdev, int pos, +static int vfio_msi_config_write(struct vfio_pci_core_device *vdev, int pos, int count, struct perm_bits *perm, int offset, __le32 val) { @@ -1189,7 +1189,7 @@ static int init_pci_cap_msi_perm(struct perm_bits *perm, int len, u16 flags) } /* Determine MSI CAP field length; initialize msi_perms on 1st call per vdev */ -static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos) +static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos) { struct pci_dev *pdev = vdev->pdev; int len, ret; @@ -1222,7 +1222,7 @@ static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos) } /* Determine extended capability length for VC (2 & 9) and MFVC */ -static int vfio_vc_cap_len(struct vfio_pci_device *vdev, u16 pos) +static int vfio_vc_cap_len(struct vfio_pci_core_device *vdev, u16 pos) { struct pci_dev *pdev = vdev->pdev; u32 tmp; @@ -1263,7 +1263,7 @@ static int vfio_vc_cap_len(struct vfio_pci_device *vdev, u16 pos) return len; } -static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) +static int vfio_cap_len(struct vfio_pci_core_device *vdev, u8 cap, u8 pos) { struct pci_dev *pdev = vdev->pdev; u32 dword; @@ -1338,7 +1338,7 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) return 0; } -static int vfio_ext_cap_len(struct vfio_pci_device *vdev, u16 ecap, u16 epos) +static int vfio_ext_cap_len(struct vfio_pci_core_device *vdev, u16 ecap, u16 epos) { struct pci_dev *pdev = vdev->pdev; u8 byte; @@ -1412,7 +1412,7 @@ static int vfio_ext_cap_len(struct vfio_pci_device *vdev, u16 ecap, u16 epos) return 0; } -static int vfio_fill_vconfig_bytes(struct vfio_pci_device *vdev, +static int vfio_fill_vconfig_bytes(struct vfio_pci_core_device *vdev, int offset, int size) { struct pci_dev *pdev = vdev->pdev; @@ -1459,7 +1459,7 @@ static int vfio_fill_vconfig_bytes(struct vfio_pci_device *vdev, return ret; } -static int vfio_cap_init(struct vfio_pci_device *vdev) +static int vfio_cap_init(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; u8 *map = vdev->pci_config_map; @@ -1549,7 +1549,7 @@ static int vfio_cap_init(struct vfio_pci_device *vdev) return 0; } -static int vfio_ecap_init(struct vfio_pci_device *vdev) +static int vfio_ecap_init(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; u8 *map = vdev->pci_config_map; @@ -1669,7 +1669,7 @@ static const struct pci_device_id known_bogus_vf_intx_pin[] = { * for each area requiring emulated bits, but the array of pointers * would be comparable in size (at least for standard config space). */ -int vfio_config_init(struct vfio_pci_device *vdev) +int vfio_config_init(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; u8 *map, *vconfig; @@ -1773,7 +1773,7 @@ out: return pcibios_err_to_errno(ret); } -void vfio_config_free(struct vfio_pci_device *vdev) +void vfio_config_free(struct vfio_pci_core_device *vdev) { kfree(vdev->vconfig); vdev->vconfig = NULL; @@ -1790,7 +1790,7 @@ void vfio_config_free(struct vfio_pci_device *vdev) * Find the remaining number of bytes in a dword that match the given * position. Stop at either the end of the capability or the dword boundary. */ -static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_device *vdev, +static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_core_device *vdev, loff_t pos) { u8 cap = vdev->pci_config_map[pos]; @@ -1802,7 +1802,7 @@ static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_device *vdev, return i; } -static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf, +static ssize_t vfio_config_do_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { struct pci_dev *pdev = vdev->pdev; @@ -1885,7 +1885,7 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf, return ret; } -ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, char __user *buf, +ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { size_t done = 0; diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 2a5dca0823c4..2729479390a0 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -121,7 +121,7 @@ static bool vfio_pci_is_denylisted(struct pci_dev *pdev) */ static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga) { - struct vfio_pci_device *vdev = opaque; + struct vfio_pci_core_device *vdev = opaque; struct pci_dev *tmp = NULL, *pdev = vdev->pdev; unsigned char max_busnr; unsigned int decodes; @@ -155,7 +155,7 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev) return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; } -static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev) +static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) { struct resource *res; int i; @@ -225,7 +225,7 @@ no_mmap: struct vfio_pci_group_info; static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); -static void vfio_pci_disable(struct vfio_pci_device *vdev); +static void vfio_pci_disable(struct vfio_pci_core_device *vdev); static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, struct vfio_pci_group_info *groups); @@ -260,7 +260,7 @@ static bool vfio_pci_nointx(struct pci_dev *pdev) return false; } -static void vfio_pci_probe_power_state(struct vfio_pci_device *vdev) +static void vfio_pci_probe_power_state(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; u16 pmcsr; @@ -280,7 +280,7 @@ static void vfio_pci_probe_power_state(struct vfio_pci_device *vdev) * by PM capability emulation and separately from pci_dev internal saved state * to avoid it being overwritten and consumed around other resets. */ -int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state) +int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t state) { struct pci_dev *pdev = vdev->pdev; bool needs_restore = false, needs_save = false; @@ -311,7 +311,7 @@ int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state) return ret; } -static int vfio_pci_enable(struct vfio_pci_device *vdev) +static int vfio_pci_enable(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; int ret; @@ -399,7 +399,7 @@ disable_exit: return ret; } -static void vfio_pci_disable(struct vfio_pci_device *vdev) +static void vfio_pci_disable(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; struct vfio_pci_dummy_resource *dummy_res, *tmp; @@ -498,7 +498,7 @@ out: static struct pci_driver vfio_pci_driver; -static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev) +static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev) { struct pci_dev *physfn = pci_physfn(vdev->pdev); struct vfio_device *pf_dev; @@ -515,12 +515,12 @@ static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev) return NULL; } - return container_of(pf_dev, struct vfio_pci_device, vdev); + return container_of(pf_dev, struct vfio_pci_core_device, vdev); } -static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val) +static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val) { - struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev); + struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev); if (!pf_vdev) return; @@ -535,8 +535,8 @@ static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val) static void vfio_pci_close_device(struct vfio_device *core_vdev) { - struct vfio_pci_device *vdev = - container_of(core_vdev, struct vfio_pci_device, vdev); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); vfio_pci_vf_token_user_add(vdev, -1); vfio_spapr_pci_eeh_release(vdev->pdev); @@ -556,8 +556,8 @@ static void vfio_pci_close_device(struct vfio_device *core_vdev) static int vfio_pci_open_device(struct vfio_device *core_vdev) { - struct vfio_pci_device *vdev = - container_of(core_vdev, struct vfio_pci_device, vdev); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); int ret = 0; ret = vfio_pci_enable(vdev); @@ -569,7 +569,7 @@ static int vfio_pci_open_device(struct vfio_device *core_vdev) return 0; } -static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) +static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type) { if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { u8 pin; @@ -690,7 +690,7 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev, return walk.ret; } -static int msix_mmappable_cap(struct vfio_pci_device *vdev, +static int msix_mmappable_cap(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps) { struct vfio_info_cap_header header = { @@ -701,7 +701,7 @@ static int msix_mmappable_cap(struct vfio_pci_device *vdev, return vfio_info_add_capability(caps, &header, sizeof(header)); } -int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, +int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, unsigned int type, unsigned int subtype, const struct vfio_pci_regops *ops, size_t size, u32 flags, void *data) @@ -730,8 +730,8 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, static long vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) { - struct vfio_pci_device *vdev = - container_of(core_vdev, struct vfio_pci_device, vdev); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); unsigned long minsz; if (cmd == VFIO_DEVICE_GET_INFO) { @@ -1271,7 +1271,7 @@ hot_reset_release: return -ENOTTY; } -static ssize_t vfio_pci_rw(struct vfio_pci_device *vdev, char __user *buf, +static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); @@ -1305,8 +1305,8 @@ static ssize_t vfio_pci_rw(struct vfio_pci_device *vdev, char __user *buf, static ssize_t vfio_pci_read(struct vfio_device *core_vdev, char __user *buf, size_t count, loff_t *ppos) { - struct vfio_pci_device *vdev = - container_of(core_vdev, struct vfio_pci_device, vdev); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); if (!count) return 0; @@ -1317,8 +1317,8 @@ static ssize_t vfio_pci_read(struct vfio_device *core_vdev, char __user *buf, static ssize_t vfio_pci_write(struct vfio_device *core_vdev, const char __user *buf, size_t count, loff_t *ppos) { - struct vfio_pci_device *vdev = - container_of(core_vdev, struct vfio_pci_device, vdev); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); if (!count) return 0; @@ -1327,7 +1327,7 @@ static ssize_t vfio_pci_write(struct vfio_device *core_vdev, const char __user * } /* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ -static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try) +static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try) { struct vfio_pci_mmap_vma *mmap_vma, *tmp; @@ -1415,14 +1415,14 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try) } } -void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device *vdev) +void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev) { vfio_pci_zap_and_vma_lock(vdev, false); down_write(&vdev->memory_lock); mutex_unlock(&vdev->vma_lock); } -u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev) +u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev) { u16 cmd; @@ -1435,14 +1435,14 @@ u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev) return cmd; } -void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd) +void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 cmd) { pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd); up_write(&vdev->memory_lock); } /* Caller holds vma_lock */ -static int __vfio_pci_add_vma(struct vfio_pci_device *vdev, +static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev, struct vm_area_struct *vma) { struct vfio_pci_mmap_vma *mmap_vma; @@ -1468,7 +1468,7 @@ static void vfio_pci_mmap_open(struct vm_area_struct *vma) static void vfio_pci_mmap_close(struct vm_area_struct *vma) { - struct vfio_pci_device *vdev = vma->vm_private_data; + struct vfio_pci_core_device *vdev = vma->vm_private_data; struct vfio_pci_mmap_vma *mmap_vma; mutex_lock(&vdev->vma_lock); @@ -1485,7 +1485,7 @@ static void vfio_pci_mmap_close(struct vm_area_struct *vma) static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - struct vfio_pci_device *vdev = vma->vm_private_data; + struct vfio_pci_core_device *vdev = vma->vm_private_data; struct vfio_pci_mmap_vma *mmap_vma; vm_fault_t ret = VM_FAULT_NOPAGE; @@ -1535,8 +1535,8 @@ static const struct vm_operations_struct vfio_pci_mmap_ops = { static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) { - struct vfio_pci_device *vdev = - container_of(core_vdev, struct vfio_pci_device, vdev); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); struct pci_dev *pdev = vdev->pdev; unsigned int index; u64 phys_len, req_len, pgoff, req_start; @@ -1606,8 +1606,8 @@ static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *v static void vfio_pci_request(struct vfio_device *core_vdev, unsigned int count) { - struct vfio_pci_device *vdev = - container_of(core_vdev, struct vfio_pci_device, vdev); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); struct pci_dev *pdev = vdev->pdev; mutex_lock(&vdev->igate); @@ -1626,7 +1626,7 @@ static void vfio_pci_request(struct vfio_device *core_vdev, unsigned int count) mutex_unlock(&vdev->igate); } -static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev, +static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, bool vf_token, uuid_t *uuid) { /* @@ -1658,7 +1658,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev, return 0; /* No VF token provided or required */ if (vdev->pdev->is_virtfn) { - struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev); + struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev); bool match; if (!pf_vdev) { @@ -1722,8 +1722,8 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev, static int vfio_pci_match(struct vfio_device *core_vdev, char *buf) { - struct vfio_pci_device *vdev = - container_of(core_vdev, struct vfio_pci_device, vdev); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); bool vf_token = false; uuid_t uuid; int ret; @@ -1785,8 +1785,8 @@ static const struct vfio_device_ops vfio_pci_ops = { static int vfio_pci_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct vfio_pci_device *vdev = container_of(nb, - struct vfio_pci_device, nb); + struct vfio_pci_core_device *vdev = container_of(nb, + struct vfio_pci_core_device, nb); struct device *dev = data; struct pci_dev *pdev = to_pci_dev(dev); struct pci_dev *physfn = pci_physfn(pdev); @@ -1810,7 +1810,7 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb, return 0; } -static int vfio_pci_vf_init(struct vfio_pci_device *vdev) +static int vfio_pci_vf_init(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; int ret; @@ -1834,7 +1834,7 @@ static int vfio_pci_vf_init(struct vfio_pci_device *vdev) return 0; } -static void vfio_pci_vf_uninit(struct vfio_pci_device *vdev) +static void vfio_pci_vf_uninit(struct vfio_pci_core_device *vdev) { if (!vdev->vf_token) return; @@ -1845,7 +1845,7 @@ static void vfio_pci_vf_uninit(struct vfio_pci_device *vdev) kfree(vdev->vf_token); } -static int vfio_pci_vga_init(struct vfio_pci_device *vdev) +static int vfio_pci_vga_init(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; int ret; @@ -1860,7 +1860,7 @@ static int vfio_pci_vga_init(struct vfio_pci_device *vdev) return 0; } -static void vfio_pci_vga_uninit(struct vfio_pci_device *vdev) +static void vfio_pci_vga_uninit(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; @@ -1874,7 +1874,7 @@ static void vfio_pci_vga_uninit(struct vfio_pci_device *vdev) static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - struct vfio_pci_device *vdev; + struct vfio_pci_core_device *vdev; struct iommu_group *group; int ret; @@ -1978,7 +1978,7 @@ out_group_put: static void vfio_pci_remove(struct pci_dev *pdev) { - struct vfio_pci_device *vdev = dev_get_drvdata(&pdev->dev); + struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev); pci_disable_sriov(pdev); @@ -2002,14 +2002,14 @@ static void vfio_pci_remove(struct pci_dev *pdev) static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { - struct vfio_pci_device *vdev; + struct vfio_pci_core_device *vdev; struct vfio_device *device; device = vfio_device_get_from_dev(&pdev->dev); if (device == NULL) return PCI_ERS_RESULT_DISCONNECT; - vdev = container_of(device, struct vfio_pci_device, vdev); + vdev = container_of(device, struct vfio_pci_core_device, vdev); mutex_lock(&vdev->igate); @@ -2060,7 +2060,7 @@ static struct pci_driver vfio_pci_driver = { .err_handler = &vfio_err_handlers, }; -static bool vfio_dev_in_groups(struct vfio_pci_device *vdev, +static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, struct vfio_pci_group_info *groups) { unsigned int i; @@ -2104,7 +2104,8 @@ vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set) * reset, so any pci_dev will have the same outcomes for * pci_probe_reset_*() and pci_reset_bus(). */ - pdev = list_first_entry(&dev_set->device_list, struct vfio_pci_device, + pdev = list_first_entry(&dev_set->device_list, + struct vfio_pci_core_device, vdev.dev_set_list)->pdev; /* pci_reset_bus() is supported */ @@ -2126,16 +2127,17 @@ vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set) static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, struct vfio_pci_group_info *groups) { - struct vfio_pci_device *cur_mem; - struct vfio_pci_device *cur_vma; - struct vfio_pci_device *cur; + struct vfio_pci_core_device *cur_mem; + struct vfio_pci_core_device *cur_vma; + struct vfio_pci_core_device *cur; struct pci_dev *pdev; bool is_mem = true; int ret; mutex_lock(&dev_set->lock); cur_mem = list_first_entry(&dev_set->device_list, - struct vfio_pci_device, vdev.dev_set_list); + struct vfio_pci_core_device, + vdev.dev_set_list); pdev = vfio_pci_dev_set_resettable(dev_set); if (!pdev) { @@ -2193,7 +2195,7 @@ err_unlock: static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set) { - struct vfio_pci_device *cur; + struct vfio_pci_core_device *cur; bool needs_reset = false; list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) { @@ -2215,7 +2217,7 @@ static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set) */ static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set) { - struct vfio_pci_device *cur; + struct vfio_pci_core_device *cur; struct pci_dev *pdev; int ret; diff --git a/drivers/vfio/pci/vfio_pci_core.h b/drivers/vfio/pci/vfio_pci_core.h index ef26e781961d..2ceaa6e4ca25 100644 --- a/drivers/vfio/pci/vfio_pci_core.h +++ b/drivers/vfio/pci/vfio_pci_core.h @@ -33,7 +33,7 @@ struct vfio_pci_ioeventfd { struct list_head next; - struct vfio_pci_device *vdev; + struct vfio_pci_core_device *vdev; struct virqfd *virqfd; void __iomem *addr; uint64_t data; @@ -52,18 +52,18 @@ struct vfio_pci_irq_ctx { struct irq_bypass_producer producer; }; -struct vfio_pci_device; +struct vfio_pci_core_device; struct vfio_pci_region; struct vfio_pci_regops { - ssize_t (*rw)(struct vfio_pci_device *vdev, char __user *buf, + ssize_t (*rw)(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); - void (*release)(struct vfio_pci_device *vdev, + void (*release)(struct vfio_pci_core_device *vdev, struct vfio_pci_region *region); - int (*mmap)(struct vfio_pci_device *vdev, + int (*mmap)(struct vfio_pci_core_device *vdev, struct vfio_pci_region *region, struct vm_area_struct *vma); - int (*add_capability)(struct vfio_pci_device *vdev, + int (*add_capability)(struct vfio_pci_core_device *vdev, struct vfio_pci_region *region, struct vfio_info_cap *caps); }; @@ -94,7 +94,7 @@ struct vfio_pci_mmap_vma { struct list_head vma_next; }; -struct vfio_pci_device { +struct vfio_pci_core_device { struct vfio_device vdev; struct pci_dev *pdev; void __iomem *barmap[PCI_STD_NUM_BARS]; @@ -144,61 +144,61 @@ struct vfio_pci_device { #define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev))) #define irq_is(vdev, type) (vdev->irq_type == type) -extern void vfio_pci_intx_mask(struct vfio_pci_device *vdev); -extern void vfio_pci_intx_unmask(struct vfio_pci_device *vdev); +extern void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev); +extern void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev); -extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, +extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags, unsigned index, unsigned start, unsigned count, void *data); -extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, +extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); -extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, +extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); -extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, +extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); -extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset, +extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, uint64_t data, int count, int fd); extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); -extern int vfio_config_init(struct vfio_pci_device *vdev); -extern void vfio_config_free(struct vfio_pci_device *vdev); +extern int vfio_config_init(struct vfio_pci_core_device *vdev); +extern void vfio_config_free(struct vfio_pci_core_device *vdev); -extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, +extern int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, unsigned int type, unsigned int subtype, const struct vfio_pci_regops *ops, size_t size, u32 flags, void *data); -extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev, +extern int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t state); -extern bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev); -extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device +extern bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev); +extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev); -extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev); -extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, +extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev); +extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 cmd); #ifdef CONFIG_VFIO_PCI_IGD -extern int vfio_pci_igd_init(struct vfio_pci_device *vdev); +extern int vfio_pci_igd_init(struct vfio_pci_core_device *vdev); #else -static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev) +static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) { return -ENODEV; } #endif #ifdef CONFIG_S390 -extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, +extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps); #else -static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, +static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps) { return -ENODEV; diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index d57c409b4033..a324ca7e6b5a 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -25,8 +25,9 @@ #define OPREGION_RVDS 0x3c2 #define OPREGION_VERSION 0x16 -static ssize_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite) +static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev, + char __user *buf, size_t count, loff_t *ppos, + bool iswrite) { unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; void *base = vdev->region[i].data; @@ -45,7 +46,7 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf, return count; } -static void vfio_pci_igd_release(struct vfio_pci_device *vdev, +static void vfio_pci_igd_release(struct vfio_pci_core_device *vdev, struct vfio_pci_region *region) { memunmap(region->data); @@ -56,7 +57,7 @@ static const struct vfio_pci_regops vfio_pci_igd_regops = { .release = vfio_pci_igd_release, }; -static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) +static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev) { __le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR); u32 addr, size; @@ -160,7 +161,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) return ret; } -static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, +static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { @@ -253,7 +254,7 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, return count; } -static void vfio_pci_igd_cfg_release(struct vfio_pci_device *vdev, +static void vfio_pci_igd_cfg_release(struct vfio_pci_core_device *vdev, struct vfio_pci_region *region) { struct pci_dev *pdev = region->data; @@ -266,7 +267,7 @@ static const struct vfio_pci_regops vfio_pci_igd_cfg_regops = { .release = vfio_pci_igd_cfg_release, }; -static int vfio_pci_igd_cfg_init(struct vfio_pci_device *vdev) +static int vfio_pci_igd_cfg_init(struct vfio_pci_core_device *vdev) { struct pci_dev *host_bridge, *lpc_bridge; int ret; @@ -314,7 +315,7 @@ static int vfio_pci_igd_cfg_init(struct vfio_pci_device *vdev) return 0; } -int vfio_pci_igd_init(struct vfio_pci_device *vdev) +int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) { int ret; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index df1e8c8c274c..945ddbdf4d11 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -27,13 +27,13 @@ */ static void vfio_send_intx_eventfd(void *opaque, void *unused) { - struct vfio_pci_device *vdev = opaque; + struct vfio_pci_core_device *vdev = opaque; if (likely(is_intx(vdev) && !vdev->virq_disabled)) eventfd_signal(vdev->ctx[0].trigger, 1); } -void vfio_pci_intx_mask(struct vfio_pci_device *vdev) +void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; unsigned long flags; @@ -73,7 +73,7 @@ void vfio_pci_intx_mask(struct vfio_pci_device *vdev) */ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) { - struct vfio_pci_device *vdev = opaque; + struct vfio_pci_core_device *vdev = opaque; struct pci_dev *pdev = vdev->pdev; unsigned long flags; int ret = 0; @@ -107,7 +107,7 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) return ret; } -void vfio_pci_intx_unmask(struct vfio_pci_device *vdev) +void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) { if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0) vfio_send_intx_eventfd(vdev, NULL); @@ -115,7 +115,7 @@ void vfio_pci_intx_unmask(struct vfio_pci_device *vdev) static irqreturn_t vfio_intx_handler(int irq, void *dev_id) { - struct vfio_pci_device *vdev = dev_id; + struct vfio_pci_core_device *vdev = dev_id; unsigned long flags; int ret = IRQ_NONE; @@ -139,7 +139,7 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) return ret; } -static int vfio_intx_enable(struct vfio_pci_device *vdev) +static int vfio_intx_enable(struct vfio_pci_core_device *vdev) { if (!is_irq_none(vdev)) return -EINVAL; @@ -168,7 +168,7 @@ static int vfio_intx_enable(struct vfio_pci_device *vdev) return 0; } -static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) +static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) { struct pci_dev *pdev = vdev->pdev; unsigned long irqflags = IRQF_SHARED; @@ -223,7 +223,7 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) return 0; } -static void vfio_intx_disable(struct vfio_pci_device *vdev) +static void vfio_intx_disable(struct vfio_pci_core_device *vdev) { vfio_virqfd_disable(&vdev->ctx[0].unmask); vfio_virqfd_disable(&vdev->ctx[0].mask); @@ -244,7 +244,7 @@ static irqreturn_t vfio_msihandler(int irq, void *arg) return IRQ_HANDLED; } -static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) +static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msix) { struct pci_dev *pdev = vdev->pdev; unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI; @@ -285,7 +285,7 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) return 0; } -static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, +static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, int vector, int fd, bool msix) { struct pci_dev *pdev = vdev->pdev; @@ -364,7 +364,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, return 0; } -static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start, +static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, unsigned count, int32_t *fds, bool msix) { int i, j, ret = 0; @@ -385,7 +385,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start, return ret; } -static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) +static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix) { struct pci_dev *pdev = vdev->pdev; int i; @@ -417,7 +417,7 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) /* * IOCTL support */ -static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, +static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { @@ -444,7 +444,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, return 0; } -static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev, +static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { @@ -464,7 +464,7 @@ static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev, return 0; } -static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev, +static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { @@ -507,7 +507,7 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev, return 0; } -static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev, +static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { @@ -613,7 +613,7 @@ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx, return -EINVAL; } -static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev, +static int vfio_pci_set_err_trigger(struct vfio_pci_core_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { @@ -624,7 +624,7 @@ static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev, count, flags, data); } -static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev, +static int vfio_pci_set_req_trigger(struct vfio_pci_core_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { @@ -635,11 +635,11 @@ static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev, count, flags, data); } -int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, +int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags, unsigned index, unsigned start, unsigned count, void *data) { - int (*func)(struct vfio_pci_device *vdev, unsigned index, + int (*func)(struct vfio_pci_core_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) = NULL; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 667e82726e75..8fff4689dd44 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -38,7 +38,7 @@ #define vfio_iowrite8 iowrite8 #define VFIO_IOWRITE(size) \ -static int vfio_pci_iowrite##size(struct vfio_pci_device *vdev, \ +static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \ bool test_mem, u##size val, void __iomem *io) \ { \ if (test_mem) { \ @@ -65,7 +65,7 @@ VFIO_IOWRITE(64) #endif #define VFIO_IOREAD(size) \ -static int vfio_pci_ioread##size(struct vfio_pci_device *vdev, \ +static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \ bool test_mem, u##size *val, void __iomem *io) \ { \ if (test_mem) { \ @@ -94,7 +94,7 @@ VFIO_IOREAD(32) * reads with -1. This is intended for handling MSI-X vector tables and * leftover space for ROM BARs. */ -static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem, +static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start, size_t x_end, bool iswrite) @@ -200,7 +200,7 @@ static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem, return done; } -static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar) +static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar) { struct pci_dev *pdev = vdev->pdev; int ret; @@ -224,7 +224,7 @@ static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar) return 0; } -ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, +ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { struct pci_dev *pdev = vdev->pdev; @@ -288,7 +288,7 @@ out: return done; } -ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, +ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { int ret; @@ -384,7 +384,7 @@ static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) { struct vfio_pci_ioeventfd *ioeventfd = opaque; - struct vfio_pci_device *vdev = ioeventfd->vdev; + struct vfio_pci_core_device *vdev = ioeventfd->vdev; if (ioeventfd->test_mem) { if (!down_read_trylock(&vdev->memory_lock)) @@ -410,7 +410,7 @@ static void vfio_pci_ioeventfd_thread(void *opaque, void *unused) vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem); } -long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset, +long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, uint64_t data, int count, int fd) { struct pci_dev *pdev = vdev->pdev; diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index ecae0c3d95a0..2ffbdc11f089 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -114,7 +114,7 @@ static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps) /* * Add all supported capabilities to the VFIO_DEVICE_GET_INFO capability chain. */ -int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, +int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps) { struct zpci_dev *zdev = to_zpci(vdev->pdev); From bf9fdc9a74cf61fe9f646c43eac4823481f1e20a Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:03 +0300 Subject: [PATCH 28/37] vfio/pci: Rename ops functions to fit core namings This is another preparation patch for separating the vfio_pci driver to a subsystem driver and a generic pci driver. This patch doesn't change any logic. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-5-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 2729479390a0..ee5c8fe2a324 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -533,7 +533,7 @@ static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int va vfio_device_put(&pf_vdev->vdev); } -static void vfio_pci_close_device(struct vfio_device *core_vdev) +static void vfio_pci_core_close_device(struct vfio_device *core_vdev) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -554,7 +554,7 @@ static void vfio_pci_close_device(struct vfio_device *core_vdev) mutex_unlock(&vdev->igate); } -static int vfio_pci_open_device(struct vfio_device *core_vdev) +static int vfio_pci_core_open_device(struct vfio_device *core_vdev) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -727,7 +727,7 @@ int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, return 0; } -static long vfio_pci_ioctl(struct vfio_device *core_vdev, +static long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) { struct vfio_pci_core_device *vdev = @@ -1302,7 +1302,7 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf, return -EINVAL; } -static ssize_t vfio_pci_read(struct vfio_device *core_vdev, char __user *buf, +static ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, size_t count, loff_t *ppos) { struct vfio_pci_core_device *vdev = @@ -1314,7 +1314,7 @@ static ssize_t vfio_pci_read(struct vfio_device *core_vdev, char __user *buf, return vfio_pci_rw(vdev, buf, count, ppos, false); } -static ssize_t vfio_pci_write(struct vfio_device *core_vdev, const char __user *buf, +static ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, size_t count, loff_t *ppos) { struct vfio_pci_core_device *vdev = @@ -1533,7 +1533,7 @@ static const struct vm_operations_struct vfio_pci_mmap_ops = { .fault = vfio_pci_mmap_fault, }; -static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) +static int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -1604,7 +1604,7 @@ static int vfio_pci_mmap(struct vfio_device *core_vdev, struct vm_area_struct *v return 0; } -static void vfio_pci_request(struct vfio_device *core_vdev, unsigned int count) +static void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -1720,7 +1720,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, #define VF_TOKEN_ARG "vf_token=" -static int vfio_pci_match(struct vfio_device *core_vdev, char *buf) +static int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -1772,14 +1772,14 @@ static int vfio_pci_match(struct vfio_device *core_vdev, char *buf) static const struct vfio_device_ops vfio_pci_ops = { .name = "vfio-pci", - .open_device = vfio_pci_open_device, - .close_device = vfio_pci_close_device, - .ioctl = vfio_pci_ioctl, - .read = vfio_pci_read, - .write = vfio_pci_write, - .mmap = vfio_pci_mmap, - .request = vfio_pci_request, - .match = vfio_pci_match, + .open_device = vfio_pci_core_open_device, + .close_device = vfio_pci_core_close_device, + .ioctl = vfio_pci_core_ioctl, + .read = vfio_pci_core_read, + .write = vfio_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, }; static int vfio_pci_bus_notifier(struct notifier_block *nb, From c39f8fa76cdd0c96f82fa785a0d6c92afe8f4a77 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:04 +0300 Subject: [PATCH 29/37] vfio/pci: Include vfio header in vfio_pci_core.h The vfio_device structure is embedded into the vfio_pci_core_device structure, so there is no reason for not including the header file in the vfio_pci_core header as well. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-6-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 1 - drivers/vfio/pci/vfio_pci_core.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index ee5c8fe2a324..94f062818e0c 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/vfio/pci/vfio_pci_core.h b/drivers/vfio/pci/vfio_pci_core.h index 2ceaa6e4ca25..17ad048752b6 100644 --- a/drivers/vfio/pci/vfio_pci_core.h +++ b/drivers/vfio/pci/vfio_pci_core.h @@ -10,6 +10,7 @@ #include #include +#include #include #include #include From ff53edf6d6ab0970f86595b3f5d179df51848723 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:05 +0300 Subject: [PATCH 30/37] vfio/pci: Split the pci_driver code out of vfio_pci_core.c Split the vfio_pci driver into two logical parts, the 'struct pci_driver' (vfio_pci.c) which implements "Generic VFIO support for any PCI device" and a library of code (vfio_pci_core.c) that helps implementing a struct vfio_device on top of a PCI device. vfio_pci.ko continues to present the same interface under sysfs and this change should have no functional impact. Following patches will turn vfio_pci and vfio_pci_core into a separate module. This is a preparation for allowing another module to provide the pci_driver and allow that module to customize how VFIO is setup, inject its own operations, and easily extend vendor specific functionality. At this point the vfio_pci_core still contains a lot of vfio_pci functionality mixed into it. Following patches will move more of the large scale items out, but another cleanup series will be needed to get everything. Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-7-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/Makefile | 2 +- drivers/vfio/pci/vfio_pci.c | 223 +++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_core.c | 271 +++++++------------------------ drivers/vfio/pci/vfio_pci_core.h | 23 +++ 4 files changed, 304 insertions(+), 215 deletions(-) create mode 100644 drivers/vfio/pci/vfio_pci.c diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 66a40488e967..8aa517b4b671 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -vfio-pci-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o +vfio-pci-y := vfio_pci.o vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c new file mode 100644 index 000000000000..4e31bd3001ad --- /dev/null +++ b/drivers/vfio/pci/vfio_pci.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, pugs@cisco.com + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vfio_pci_core.h" + +#define DRIVER_AUTHOR "Alex Williamson " +#define DRIVER_DESC "VFIO PCI - User Level meta-driver" + +static char ids[1024] __initdata; +module_param_string(ids, ids, sizeof(ids), 0); +MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified"); + +static bool enable_sriov; +#ifdef CONFIG_PCI_IOV +module_param(enable_sriov, bool, 0644); +MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration. Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF."); +#endif + +static bool disable_denylist; +module_param(disable_denylist, bool, 0444); +MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users."); + +static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) +{ + switch (pdev->vendor) { + case PCI_VENDOR_ID_INTEL: + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_QAT_C3XXX: + case PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF: + case PCI_DEVICE_ID_INTEL_QAT_C62X: + case PCI_DEVICE_ID_INTEL_QAT_C62X_VF: + case PCI_DEVICE_ID_INTEL_QAT_DH895XCC: + case PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF: + return true; + default: + return false; + } + } + + return false; +} + +static bool vfio_pci_is_denylisted(struct pci_dev *pdev) +{ + if (!vfio_pci_dev_in_denylist(pdev)) + return false; + + if (disable_denylist) { + pci_warn(pdev, + "device denylist disabled - allowing device %04x:%04x.\n", + pdev->vendor, pdev->device); + return false; + } + + pci_warn(pdev, "%04x:%04x exists in vfio-pci device denylist, driver probing disallowed.\n", + pdev->vendor, pdev->device); + + return true; +} + +static const struct vfio_device_ops vfio_pci_ops = { + .name = "vfio-pci", + .open_device = vfio_pci_core_open_device, + .close_device = vfio_pci_core_close_device, + .ioctl = vfio_pci_core_ioctl, + .read = vfio_pci_core_read, + .write = vfio_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, +}; + +static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct vfio_pci_core_device *vdev; + int ret; + + if (vfio_pci_is_denylisted(pdev)) + return -EINVAL; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + vfio_pci_core_init_device(vdev, pdev, &vfio_pci_ops); + + ret = vfio_pci_core_register_device(vdev); + if (ret) + goto out_free; + return 0; + +out_free: + vfio_pci_core_uninit_device(vdev); + kfree(vdev); + return ret; +} + +static void vfio_pci_remove(struct pci_dev *pdev) +{ + struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev); + + vfio_pci_core_unregister_device(vdev); + vfio_pci_core_uninit_device(vdev); + kfree(vdev); +} + +static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) +{ + if (!enable_sriov) + return -ENOENT; + + return vfio_pci_core_sriov_configure(pdev, nr_virtfn); +} + +static struct pci_driver vfio_pci_driver = { + .name = "vfio-pci", + .id_table = NULL, /* only dynamic ids */ + .probe = vfio_pci_probe, + .remove = vfio_pci_remove, + .sriov_configure = vfio_pci_sriov_configure, + .err_handler = &vfio_pci_core_err_handlers, +}; + +static void __init vfio_pci_fill_ids(void) +{ + char *p, *id; + int rc; + + /* no ids passed actually */ + if (ids[0] == '\0') + return; + + /* add ids specified in the module parameter */ + p = ids; + while ((id = strsep(&p, ","))) { + unsigned int vendor, device, subvendor = PCI_ANY_ID, + subdevice = PCI_ANY_ID, class = 0, class_mask = 0; + int fields; + + if (!strlen(id)) + continue; + + fields = sscanf(id, "%x:%x:%x:%x:%x:%x", + &vendor, &device, &subvendor, &subdevice, + &class, &class_mask); + + if (fields < 2) { + pr_warn("invalid id string \"%s\"\n", id); + continue; + } + + rc = pci_add_dynid(&vfio_pci_driver, vendor, device, + subvendor, subdevice, class, class_mask, 0); + if (rc) + pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n", + vendor, device, subvendor, subdevice, + class, class_mask, rc); + else + pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n", + vendor, device, subvendor, subdevice, + class, class_mask); + } +} + +static int __init vfio_pci_init(void) +{ + int ret; + + ret = vfio_pci_core_init(); + if (ret) + return ret; + + /* Register and scan for devices */ + ret = pci_register_driver(&vfio_pci_driver); + if (ret) + goto out; + + vfio_pci_fill_ids(); + + if (disable_denylist) + pr_warn("device denylist disabled.\n"); + + return 0; + +out: + vfio_pci_core_cleanup(); + return ret; +} +module_init(vfio_pci_init); + +static void __exit vfio_pci_cleanup(void) +{ + pci_unregister_driver(&vfio_pci_driver); + vfio_pci_core_cleanup(); +} +module_exit(vfio_pci_cleanup); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 94f062818e0c..c0d71f72d4f1 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -8,8 +8,6 @@ * Author: Tom Lyon, pugs@cisco.com */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include #include #include @@ -29,14 +27,6 @@ #include "vfio_pci_core.h" -#define DRIVER_VERSION "0.2" -#define DRIVER_AUTHOR "Alex Williamson " -#define DRIVER_DESC "VFIO PCI - User Level meta-driver" - -static char ids[1024] __initdata; -module_param_string(ids, ids, sizeof(ids), 0); -MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified"); - static bool nointxmask; module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nointxmask, @@ -53,16 +43,6 @@ module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(disable_idle_d3, "Disable using the PCI D3 low power state for idle, unused devices"); -static bool enable_sriov; -#ifdef CONFIG_PCI_IOV -module_param(enable_sriov, bool, 0644); -MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration. Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF."); -#endif - -static bool disable_denylist; -module_param(disable_denylist, bool, 0444); -MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users."); - static inline bool vfio_vga_disabled(void) { #ifdef CONFIG_VFIO_PCI_VGA @@ -72,44 +52,6 @@ static inline bool vfio_vga_disabled(void) #endif } -static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) -{ - switch (pdev->vendor) { - case PCI_VENDOR_ID_INTEL: - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_QAT_C3XXX: - case PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF: - case PCI_DEVICE_ID_INTEL_QAT_C62X: - case PCI_DEVICE_ID_INTEL_QAT_C62X_VF: - case PCI_DEVICE_ID_INTEL_QAT_DH895XCC: - case PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF: - return true; - default: - return false; - } - } - - return false; -} - -static bool vfio_pci_is_denylisted(struct pci_dev *pdev) -{ - if (!vfio_pci_dev_in_denylist(pdev)) - return false; - - if (disable_denylist) { - pci_warn(pdev, - "device denylist disabled - allowing device %04x:%04x.\n", - pdev->vendor, pdev->device); - return false; - } - - pci_warn(pdev, "%04x:%04x exists in vfio-pci device denylist, driver probing disallowed.\n", - pdev->vendor, pdev->device); - - return true; -} - /* * Our VGA arbiter participation is limited since we don't know anything * about the device itself. However, if the device is the only VGA device @@ -495,8 +437,6 @@ out: vfio_pci_set_power_state(vdev, PCI_D3hot); } -static struct pci_driver vfio_pci_driver; - static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev) { struct pci_dev *physfn = pci_physfn(vdev->pdev); @@ -509,7 +449,7 @@ static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vde if (!pf_dev) return NULL; - if (pci_dev_driver(physfn) != &vfio_pci_driver) { + if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) { vfio_device_put(pf_dev); return NULL; } @@ -532,7 +472,7 @@ static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int va vfio_device_put(&pf_vdev->vdev); } -static void vfio_pci_core_close_device(struct vfio_device *core_vdev) +void vfio_pci_core_close_device(struct vfio_device *core_vdev) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -553,7 +493,7 @@ static void vfio_pci_core_close_device(struct vfio_device *core_vdev) mutex_unlock(&vdev->igate); } -static int vfio_pci_core_open_device(struct vfio_device *core_vdev) +int vfio_pci_core_open_device(struct vfio_device *core_vdev) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -726,8 +666,8 @@ int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, return 0; } -static long vfio_pci_core_ioctl(struct vfio_device *core_vdev, - unsigned int cmd, unsigned long arg) +long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, + unsigned long arg) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -1301,8 +1241,8 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf, return -EINVAL; } -static ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, - size_t count, loff_t *ppos) +ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, + size_t count, loff_t *ppos) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -1313,8 +1253,8 @@ static ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *bu return vfio_pci_rw(vdev, buf, count, ppos, false); } -static ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, - size_t count, loff_t *ppos) +ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, + size_t count, loff_t *ppos) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -1532,7 +1472,7 @@ static const struct vm_operations_struct vfio_pci_mmap_ops = { .fault = vfio_pci_mmap_fault, }; -static int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) +int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -1603,7 +1543,7 @@ static int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_stru return 0; } -static void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) +void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -1719,7 +1659,7 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, #define VF_TOKEN_ARG "vf_token=" -static int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf) +int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); @@ -1769,18 +1709,6 @@ static int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf) return 1; /* Match */ } -static const struct vfio_device_ops vfio_pci_ops = { - .name = "vfio-pci", - .open_device = vfio_pci_core_open_device, - .close_device = vfio_pci_core_close_device, - .ioctl = vfio_pci_core_ioctl, - .read = vfio_pci_core_read, - .write = vfio_pci_core_write, - .mmap = vfio_pci_core_mmap, - .request = vfio_pci_core_request, - .match = vfio_pci_core_match, -}; - static int vfio_pci_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -1795,15 +1723,16 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb, pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n", pci_name(pdev)); pdev->driver_override = kasprintf(GFP_KERNEL, "%s", - vfio_pci_ops.name); + vdev->vdev.ops->name); } else if (action == BUS_NOTIFY_BOUND_DRIVER && pdev->is_virtfn && physfn == vdev->pdev) { struct pci_driver *drv = pci_dev_driver(pdev); - if (drv && drv != &vfio_pci_driver) + if (drv && drv != pci_dev_driver(vdev->pdev)) pci_warn(vdev->pdev, - "VF %s bound to driver %s while PF bound to vfio-pci\n", - pci_name(pdev), drv->name); + "VF %s bound to driver %s while PF bound to driver %s\n", + pci_name(pdev), drv->name, + pci_dev_driver(vdev->pdev)->name); } return 0; @@ -1871,15 +1800,39 @@ static void vfio_pci_vga_uninit(struct vfio_pci_core_device *vdev) VGA_RSRC_LEGACY_MEM); } -static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, + struct pci_dev *pdev, + const struct vfio_device_ops *vfio_pci_ops) { - struct vfio_pci_core_device *vdev; + vfio_init_group_dev(&vdev->vdev, &pdev->dev, vfio_pci_ops); + vdev->pdev = pdev; + vdev->irq_type = VFIO_PCI_NUM_IRQS; + mutex_init(&vdev->igate); + spin_lock_init(&vdev->irqlock); + mutex_init(&vdev->ioeventfds_lock); + INIT_LIST_HEAD(&vdev->dummy_resources_list); + INIT_LIST_HEAD(&vdev->ioeventfds_list); + mutex_init(&vdev->vma_lock); + INIT_LIST_HEAD(&vdev->vma_list); + init_rwsem(&vdev->memory_lock); +} + +void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev) +{ + mutex_destroy(&vdev->igate); + mutex_destroy(&vdev->ioeventfds_lock); + mutex_destroy(&vdev->vma_lock); + vfio_uninit_group_dev(&vdev->vdev); + kfree(vdev->region); + kfree(vdev->pm_save); +} + +int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) +{ + struct pci_dev *pdev = vdev->pdev; struct iommu_group *group; int ret; - if (vfio_pci_is_denylisted(pdev)) - return -EINVAL; - if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) return -EINVAL; @@ -1900,24 +1853,6 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!group) return -EINVAL; - vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); - if (!vdev) { - ret = -ENOMEM; - goto out_group_put; - } - - vfio_init_group_dev(&vdev->vdev, &pdev->dev, &vfio_pci_ops); - vdev->pdev = pdev; - vdev->irq_type = VFIO_PCI_NUM_IRQS; - mutex_init(&vdev->igate); - spin_lock_init(&vdev->irqlock); - mutex_init(&vdev->ioeventfds_lock); - INIT_LIST_HEAD(&vdev->dummy_resources_list); - INIT_LIST_HEAD(&vdev->ioeventfds_list); - mutex_init(&vdev->vma_lock); - INIT_LIST_HEAD(&vdev->vma_list); - init_rwsem(&vdev->memory_lock); - if (pci_is_root_bus(pdev->bus)) { ret = vfio_assign_device_set(&vdev->vdev, vdev); } else if (!pci_probe_reset_slot(pdev->slot)) { @@ -1931,10 +1866,10 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } if (ret) - goto out_uninit; + goto out_group_put; ret = vfio_pci_vf_init(vdev); if (ret) - goto out_uninit; + goto out_group_put; ret = vfio_pci_vga_init(vdev); if (ret) goto out_vf; @@ -1966,36 +1901,26 @@ out_power: vfio_pci_set_power_state(vdev, PCI_D0); out_vf: vfio_pci_vf_uninit(vdev); -out_uninit: - vfio_uninit_group_dev(&vdev->vdev); - kfree(vdev->pm_save); - kfree(vdev); out_group_put: vfio_iommu_group_put(group, &pdev->dev); return ret; } -static void vfio_pci_remove(struct pci_dev *pdev) +void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) { - struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev); + struct pci_dev *pdev = vdev->pdev; pci_disable_sriov(pdev); vfio_unregister_group_dev(&vdev->vdev); vfio_pci_vf_uninit(vdev); - vfio_uninit_group_dev(&vdev->vdev); vfio_pci_vga_uninit(vdev); vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); if (!disable_idle_d3) vfio_pci_set_power_state(vdev, PCI_D0); - - mutex_destroy(&vdev->ioeventfds_lock); - kfree(vdev->region); - kfree(vdev->pm_save); - kfree(vdev); } static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, @@ -2022,16 +1947,11 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_CAN_RECOVER; } -static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) +int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) { struct vfio_device *device; int ret = 0; - might_sleep(); - - if (!enable_sriov) - return -ENOENT; - device = vfio_device_get_from_dev(&pdev->dev); if (!device) return -ENODEV; @@ -2046,19 +1966,10 @@ static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) return ret < 0 ? ret : nr_virtfn; } -static const struct pci_error_handlers vfio_err_handlers = { +const struct pci_error_handlers vfio_pci_core_err_handlers = { .error_detected = vfio_pci_aer_err_detected, }; -static struct pci_driver vfio_pci_driver = { - .name = "vfio-pci", - .id_table = NULL, /* only dynamic ids */ - .probe = vfio_pci_probe, - .remove = vfio_pci_remove, - .sriov_configure = vfio_pci_sriov_configure, - .err_handler = &vfio_err_handlers, -}; - static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, struct vfio_pci_group_info *groups) { @@ -2239,83 +2150,15 @@ static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set) return true; } -static void __exit vfio_pci_cleanup(void) +/* This will become the __exit function of vfio_pci_core.ko */ +void vfio_pci_core_cleanup(void) { - pci_unregister_driver(&vfio_pci_driver); vfio_pci_uninit_perm_bits(); } -static void __init vfio_pci_fill_ids(void) +/* This will become the __init function of vfio_pci_core.ko */ +int __init vfio_pci_core_init(void) { - char *p, *id; - int rc; - - /* no ids passed actually */ - if (ids[0] == '\0') - return; - - /* add ids specified in the module parameter */ - p = ids; - while ((id = strsep(&p, ","))) { - unsigned int vendor, device, subvendor = PCI_ANY_ID, - subdevice = PCI_ANY_ID, class = 0, class_mask = 0; - int fields; - - if (!strlen(id)) - continue; - - fields = sscanf(id, "%x:%x:%x:%x:%x:%x", - &vendor, &device, &subvendor, &subdevice, - &class, &class_mask); - - if (fields < 2) { - pr_warn("invalid id string \"%s\"\n", id); - continue; - } - - rc = pci_add_dynid(&vfio_pci_driver, vendor, device, - subvendor, subdevice, class, class_mask, 0); - if (rc) - pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n", - vendor, device, subvendor, subdevice, - class, class_mask, rc); - else - pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n", - vendor, device, subvendor, subdevice, - class, class_mask); - } -} - -static int __init vfio_pci_init(void) -{ - int ret; - /* Allocate shared config space permission data used by all devices */ - ret = vfio_pci_init_perm_bits(); - if (ret) - return ret; - - /* Register and scan for devices */ - ret = pci_register_driver(&vfio_pci_driver); - if (ret) - goto out_driver; - - vfio_pci_fill_ids(); - - if (disable_denylist) - pr_warn("device denylist disabled.\n"); - - return 0; - -out_driver: - vfio_pci_uninit_perm_bits(); - return ret; + return vfio_pci_init_perm_bits(); } - -module_init(vfio_pci_init); -module_exit(vfio_pci_cleanup); - -MODULE_VERSION(DRIVER_VERSION); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/pci/vfio_pci_core.h b/drivers/vfio/pci/vfio_pci_core.h index 17ad048752b6..7dbdd4dda5c0 100644 --- a/drivers/vfio/pci/vfio_pci_core.h +++ b/drivers/vfio/pci/vfio_pci_core.h @@ -206,4 +206,27 @@ static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, } #endif +/* Will be exported for vfio pci drivers usage */ +void vfio_pci_core_cleanup(void); +int vfio_pci_core_init(void); +void vfio_pci_core_close_device(struct vfio_device *core_vdev); +int vfio_pci_core_open_device(struct vfio_device *core_vdev); +void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, + struct pci_dev *pdev, + const struct vfio_device_ops *vfio_pci_ops); +int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev); +void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev); +void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev); +int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn); +extern const struct pci_error_handlers vfio_pci_core_err_handlers; +long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, + unsigned long arg); +ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, + size_t count, loff_t *ppos); +ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, + size_t count, loff_t *ppos); +int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); +void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); +int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); + #endif /* VFIO_PCI_CORE_H */ From 2fb89f56a624fd74e6e15154f3e9fdceca98b784 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:06 +0300 Subject: [PATCH 31/37] vfio/pci: Move igd initialization to vfio_pci.c igd is related to the vfio_pci pci_driver implementation, move it out of vfio_pci_core.c. This is preparation for splitting vfio_pci.ko into 2 drivers. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-8-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 29 +++++++++++++++++++++++- drivers/vfio/pci/vfio_pci_core.c | 39 ++++---------------------------- drivers/vfio/pci/vfio_pci_core.h | 9 +++++++- 3 files changed, 41 insertions(+), 36 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 4e31bd3001ad..2729b777a56d 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -82,9 +82,36 @@ static bool vfio_pci_is_denylisted(struct pci_dev *pdev) return true; } +static int vfio_pci_open_device(struct vfio_device *core_vdev) +{ + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + struct pci_dev *pdev = vdev->pdev; + int ret; + + ret = vfio_pci_core_enable(vdev); + if (ret) + return ret; + + if (vfio_pci_is_vga(pdev) && + pdev->vendor == PCI_VENDOR_ID_INTEL && + IS_ENABLED(CONFIG_VFIO_PCI_IGD)) { + ret = vfio_pci_igd_init(vdev); + if (ret && ret != -ENODEV) { + pci_warn(pdev, "Failed to setup Intel IGD regions\n"); + vfio_pci_core_disable(vdev); + return ret; + } + } + + vfio_pci_core_finish_enable(vdev); + + return 0; +} + static const struct vfio_device_ops vfio_pci_ops = { .name = "vfio-pci", - .open_device = vfio_pci_core_open_device, + .open_device = vfio_pci_open_device, .close_device = vfio_pci_core_close_device, .ioctl = vfio_pci_core_ioctl, .read = vfio_pci_core_read, diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index c0d71f72d4f1..3b3bf7445367 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -91,11 +91,6 @@ static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga) return decodes; } -static inline bool vfio_pci_is_vga(struct pci_dev *pdev) -{ - return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; -} - static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) { struct resource *res; @@ -166,7 +161,6 @@ no_mmap: struct vfio_pci_group_info; static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); -static void vfio_pci_disable(struct vfio_pci_core_device *vdev); static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, struct vfio_pci_group_info *groups); @@ -252,7 +246,7 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat return ret; } -static int vfio_pci_enable(struct vfio_pci_core_device *vdev) +int vfio_pci_core_enable(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; int ret; @@ -321,26 +315,11 @@ static int vfio_pci_enable(struct vfio_pci_core_device *vdev) if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) vdev->has_vga = true; - if (vfio_pci_is_vga(pdev) && - pdev->vendor == PCI_VENDOR_ID_INTEL && - IS_ENABLED(CONFIG_VFIO_PCI_IGD)) { - ret = vfio_pci_igd_init(vdev); - if (ret && ret != -ENODEV) { - pci_warn(pdev, "Failed to setup Intel IGD regions\n"); - goto disable_exit; - } - } - - vfio_pci_probe_mmaps(vdev); return 0; - -disable_exit: - vfio_pci_disable(vdev); - return ret; } -static void vfio_pci_disable(struct vfio_pci_core_device *vdev) +void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; struct vfio_pci_dummy_resource *dummy_res, *tmp; @@ -479,7 +458,7 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) vfio_pci_vf_token_user_add(vdev, -1); vfio_spapr_pci_eeh_release(vdev->pdev); - vfio_pci_disable(vdev); + vfio_pci_core_disable(vdev); mutex_lock(&vdev->igate); if (vdev->err_trigger) { @@ -493,19 +472,11 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) mutex_unlock(&vdev->igate); } -int vfio_pci_core_open_device(struct vfio_device *core_vdev) +void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) { - struct vfio_pci_core_device *vdev = - container_of(core_vdev, struct vfio_pci_core_device, vdev); - int ret = 0; - - ret = vfio_pci_enable(vdev); - if (ret) - return ret; - + vfio_pci_probe_mmaps(vdev); vfio_spapr_pci_eeh_open(vdev->pdev); vfio_pci_vf_token_user_add(vdev, 1); - return 0; } static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type) diff --git a/drivers/vfio/pci/vfio_pci_core.h b/drivers/vfio/pci/vfio_pci_core.h index 7dbdd4dda5c0..ffaf544f35db 100644 --- a/drivers/vfio/pci/vfio_pci_core.h +++ b/drivers/vfio/pci/vfio_pci_core.h @@ -210,7 +210,6 @@ static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, void vfio_pci_core_cleanup(void); int vfio_pci_core_init(void); void vfio_pci_core_close_device(struct vfio_device *core_vdev); -int vfio_pci_core_open_device(struct vfio_device *core_vdev); void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, struct pci_dev *pdev, const struct vfio_device_ops *vfio_pci_ops); @@ -228,5 +227,13 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); +int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); +void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); +void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); + +static inline bool vfio_pci_is_vga(struct pci_dev *pdev) +{ + return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; +} #endif /* VFIO_PCI_CORE_H */ From c61302aa48f7c46b5c9d893109488af951be12e4 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 26 Aug 2021 13:39:07 +0300 Subject: [PATCH 32/37] vfio/pci: Move module parameters to vfio_pci.c This is a preparation before splitting vfio_pci.ko to 2 modules. As module parameters are a kind of uAPI they need to stay on vfio_pci.ko to avoid a user visible impact. For now continue to keep the implementation of these options in vfio_pci_core.c. Arguably they are vfio_pci functionality, but further splitting of vfio_pci_core.c will be better done in another series Signed-off-by: Yishai Hadas Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20210826103912.128972-9-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 23 +++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_core.c | 20 ++++++++------------ drivers/vfio/pci/vfio_pci_core.h | 2 ++ 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 2729b777a56d..163e560c4495 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -34,6 +34,22 @@ static char ids[1024] __initdata; module_param_string(ids, ids, sizeof(ids), 0); MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified"); +static bool nointxmask; +module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(nointxmask, + "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); + +#ifdef CONFIG_VFIO_PCI_VGA +static bool disable_vga; +module_param(disable_vga, bool, S_IRUGO); +MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci"); +#endif + +static bool disable_idle_d3; +module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(disable_idle_d3, + "Disable using the PCI D3 low power state for idle, unused devices"); + static bool enable_sriov; #ifdef CONFIG_PCI_IOV module_param(enable_sriov, bool, 0644); @@ -215,6 +231,13 @@ static void __init vfio_pci_fill_ids(void) static int __init vfio_pci_init(void) { int ret; + bool is_disable_vga = true; + +#ifdef CONFIG_VFIO_PCI_VGA + is_disable_vga = disable_vga; +#endif + + vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3); ret = vfio_pci_core_init(); if (ret) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 3b3bf7445367..65eafaafb2e0 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -28,20 +28,8 @@ #include "vfio_pci_core.h" static bool nointxmask; -module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(nointxmask, - "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); - -#ifdef CONFIG_VFIO_PCI_VGA static bool disable_vga; -module_param(disable_vga, bool, S_IRUGO); -MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci"); -#endif - static bool disable_idle_d3; -module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(disable_idle_d3, - "Disable using the PCI D3 low power state for idle, unused devices"); static inline bool vfio_vga_disabled(void) { @@ -2121,6 +2109,14 @@ static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set) return true; } +void vfio_pci_core_set_params(bool is_nointxmask, bool is_disable_vga, + bool is_disable_idle_d3) +{ + nointxmask = is_nointxmask; + disable_vga = is_disable_vga; + disable_idle_d3 = is_disable_idle_d3; +} + /* This will become the __exit function of vfio_pci_core.ko */ void vfio_pci_core_cleanup(void) { diff --git a/drivers/vfio/pci/vfio_pci_core.h b/drivers/vfio/pci/vfio_pci_core.h index ffaf544f35db..7a2da1e14de3 100644 --- a/drivers/vfio/pci/vfio_pci_core.h +++ b/drivers/vfio/pci/vfio_pci_core.h @@ -209,6 +209,8 @@ static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, /* Will be exported for vfio pci drivers usage */ void vfio_pci_core_cleanup(void); int vfio_pci_core_init(void); +void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga, + bool is_disable_idle_d3); void vfio_pci_core_close_device(struct vfio_device *core_vdev); void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, struct pci_dev *pdev, From 343b7258687ecfbb363bfda8833a7cf641aac524 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:08 +0300 Subject: [PATCH 33/37] PCI: Add 'override_only' field to struct pci_device_id Add 'override_only' field to struct pci_device_id to be used as part of pci_match_device(). When set, a driver only matches the entry when dev->driver_override is set to that driver. In addition, add a helper macro named 'PCI_DEVICE_DRIVER_OVERRIDE' to enable setting some data on it. Next patch from this series will use the above functionality. Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-10-yishaih@nvidia.com Signed-off-by: Alex Williamson --- Documentation/PCI/pci.rst | 1 + drivers/pci/pci-driver.c | 28 +++++++++++++++++++++------- include/linux/mod_devicetable.h | 2 ++ include/linux/pci.h | 15 +++++++++++++++ 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/Documentation/PCI/pci.rst b/Documentation/PCI/pci.rst index fa651e25d98c..87c6f4a6ca32 100644 --- a/Documentation/PCI/pci.rst +++ b/Documentation/PCI/pci.rst @@ -103,6 +103,7 @@ need pass only as many optional fields as necessary: - subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF) - class and classmask fields default to 0 - driver_data defaults to 0UL. + - override_only field defaults to 0. Note that driver_data must match the value used by any of the pci_device_id entries defined in the driver. This makes the driver_data field mandatory diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 3a72352aa5cf..123c590ebe1d 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -136,7 +136,7 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, struct pci_dev *dev) { struct pci_dynid *dynid; - const struct pci_device_id *found_id = NULL; + const struct pci_device_id *found_id = NULL, *ids; /* When driver_override is set, only bind to the matching driver */ if (dev->driver_override && strcmp(dev->driver_override, drv->name)) @@ -152,14 +152,28 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, } spin_unlock(&drv->dynids.lock); - if (!found_id) - found_id = pci_match_id(drv->id_table, dev); + if (found_id) + return found_id; + + for (ids = drv->id_table; (found_id = pci_match_id(ids, dev)); + ids = found_id + 1) { + /* + * The match table is split based on driver_override. + * In case override_only was set, enforce driver_override + * matching. + */ + if (found_id->override_only) { + if (dev->driver_override) + return found_id; + } else { + return found_id; + } + } /* driver_override will always match, send a dummy id */ - if (!found_id && dev->driver_override) - found_id = &pci_device_id_any; - - return found_id; + if (dev->driver_override) + return &pci_device_id_any; + return NULL; } /** diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8e291cfdaf06..2e3ba6d9ece0 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -34,12 +34,14 @@ typedef unsigned long kernel_ulong_t; * Best practice is to use driver_data as an index * into a static list of equivalent device types, * instead of using it as a pointer. + * @override_only: Match only when dev->driver_override is this driver. */ struct pci_device_id { __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/ __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ __u32 class, class_mask; /* (class,subclass,prog-if) triplet */ kernel_ulong_t driver_data; /* Data private to the driver */ + __u32 override_only; }; diff --git a/include/linux/pci.h b/include/linux/pci.h index 540b377ca8f6..0506b1a8c921 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -901,6 +901,21 @@ struct pci_driver { .vendor = (vend), .device = (dev), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID +/** + * PCI_DEVICE_DRIVER_OVERRIDE - macro used to describe a PCI device with + * override_only flags. + * @vend: the 16 bit PCI Vendor ID + * @dev: the 16 bit PCI Device ID + * @driver_override: the 32 bit PCI Device override_only + * + * This macro is used to create a struct pci_device_id that matches only a + * driver_override device. The subvendor and subdevice fields will be set to + * PCI_ANY_ID. + */ +#define PCI_DEVICE_DRIVER_OVERRIDE(vend, dev, driver_override) \ + .vendor = (vend), .device = (dev), .subvendor = PCI_ANY_ID, \ + .subdevice = PCI_ANY_ID, .override_only = (driver_override) + /** * PCI_DEVICE_SUB - macro used to describe a specific PCI device with subsystem * @vend: the 16 bit PCI Vendor ID From cc6711b0bf36de068b10490198d05ac168377989 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:09 +0300 Subject: [PATCH 34/37] PCI / VFIO: Add 'override_only' support for VFIO PCI sub system Expose an 'override_only' helper macro (i.e. PCI_DRIVER_OVERRIDE_DEVICE_VFIO) for VFIO PCI sub system and add the required code to prefix its matching entries with "vfio_" in modules.alias file. It allows VFIO device drivers to include match entries in the modules.alias file produced by kbuild that are not used for normal driver autoprobing and module autoloading. Drivers using these match entries can be connected to the PCI device manually, by userspace, using the existing driver_override sysfs. For example the resulting modules.alias may have: alias pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_core alias vfio_pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_vfio_pci alias vfio_pci:v*d*sv*sd*bc*sc*i* vfio_pci In this example mlx5_core and mlx5_vfio_pci match to the same PCI device. The kernel will autoload and autobind to mlx5_core but the kernel and udev mechanisms will ignore mlx5_vfio_pci. When userspace wants to change a device to the VFIO subsystem it can implement a generic algorithm: 1) Identify the sysfs path to the device: /sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0 2) Get the modalias string from the kernel: $ cat /sys/bus/pci/devices/0000:01:00.0/modalias pci:v000015B3d00001021sv000015B3sd00000001bc02sc00i00 3) Prefix it with vfio_: vfio_pci:v000015B3d00001021sv000015B3sd00000001bc02sc00i00 4) Search modules.alias for the above string and select the entry that has the fewest *'s: alias vfio_pci:v000015B3d00001021sv*sd*bc*sc*i* mlx5_vfio_pci 5) modprobe the matched module name: $ modprobe mlx5_vfio_pci 6) cat the matched module name to driver_override: echo mlx5_vfio_pci > /sys/bus/pci/devices/0000:01:00.0/driver_override 7) unbind device from original module echo 0000:01:00.0 > /sys/bus/pci/devices/0000:01:00.0/driver/unbind 8) probe PCI drivers (or explicitly bind to mlx5_vfio_pci) echo 0000:01:00.0 > /sys/bus/pci/drivers_probe The algorithm is independent of bus type. In future the other buses with VFIO device drivers, like platform and ACPI, can use this algorithm as well. This patch is the infrastructure to provide the information in the modules.alias to userspace. Convert the only VFIO pci_driver which results in one new line in the modules.alias: alias vfio_pci:v*d*sv*sd*bc*sc*i* vfio_pci Later series introduce additional HW specific VFIO PCI drivers, such as mlx5_vfio_pci. Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe Acked-by: Bjorn Helgaas # for pci.h Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-11-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 9 ++++++++- include/linux/mod_devicetable.h | 4 ++++ include/linux/pci.h | 14 ++++++++++++++ scripts/mod/devicetable-offsets.c | 1 + scripts/mod/file2alias.c | 17 +++++++++++++++-- 5 files changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 163e560c4495..85fd638a5955 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -178,9 +178,16 @@ static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) return vfio_pci_core_sriov_configure(pdev, nr_virtfn); } +static const struct pci_device_id vfio_pci_table[] = { + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_ANY_ID, PCI_ANY_ID) }, /* match all by default */ + {} +}; + +MODULE_DEVICE_TABLE(pci, vfio_pci_table); + static struct pci_driver vfio_pci_driver = { .name = "vfio-pci", - .id_table = NULL, /* only dynamic ids */ + .id_table = vfio_pci_table, .probe = vfio_pci_probe, .remove = vfio_pci_remove, .sriov_configure = vfio_pci_sriov_configure, diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 2e3ba6d9ece0..ae2e75d15b21 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -16,6 +16,10 @@ typedef unsigned long kernel_ulong_t; #define PCI_ANY_ID (~0) +enum { + PCI_ID_F_VFIO_DRIVER_OVERRIDE = 1, +}; + /** * struct pci_device_id - PCI device ID structure * @vendor: Vendor ID to match (or PCI_ANY_ID) diff --git a/include/linux/pci.h b/include/linux/pci.h index 0506b1a8c921..527a1dfd1d06 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -916,6 +916,20 @@ struct pci_driver { .vendor = (vend), .device = (dev), .subvendor = PCI_ANY_ID, \ .subdevice = PCI_ANY_ID, .override_only = (driver_override) +/** + * PCI_DRIVER_OVERRIDE_DEVICE_VFIO - macro used to describe a VFIO + * "driver_override" PCI device. + * @vend: the 16 bit PCI Vendor ID + * @dev: the 16 bit PCI Device ID + * + * This macro is used to create a struct pci_device_id that matches a + * specific device. The subvendor and subdevice fields will be set to + * PCI_ANY_ID and the driver_override will be set to + * PCI_ID_F_VFIO_DRIVER_OVERRIDE. + */ +#define PCI_DRIVER_OVERRIDE_DEVICE_VFIO(vend, dev) \ + PCI_DEVICE_DRIVER_OVERRIDE(vend, dev, PCI_ID_F_VFIO_DRIVER_OVERRIDE) + /** * PCI_DEVICE_SUB - macro used to describe a specific PCI device with subsystem * @vend: the 16 bit PCI Vendor ID diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index 9bb6c7edccc4..cc3625617a0e 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -42,6 +42,7 @@ int main(void) DEVID_FIELD(pci_device_id, subdevice); DEVID_FIELD(pci_device_id, class); DEVID_FIELD(pci_device_id, class_mask); + DEVID_FIELD(pci_device_id, override_only); DEVID(ccw_device_id); DEVID_FIELD(ccw_device_id, match_flags); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 7c97fa8e36bc..49aba862073e 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -426,7 +426,7 @@ static int do_ieee1394_entry(const char *filename, return 1; } -/* Looks like: pci:vNdNsvNsdNbcNscNiN. */ +/* Looks like: pci:vNdNsvNsdNbcNscNiN or _pci:vNdNsvNsdNbcNscNiN. */ static int do_pci_entry(const char *filename, void *symval, char *alias) { @@ -440,8 +440,21 @@ static int do_pci_entry(const char *filename, DEF_FIELD(symval, pci_device_id, subdevice); DEF_FIELD(symval, pci_device_id, class); DEF_FIELD(symval, pci_device_id, class_mask); + DEF_FIELD(symval, pci_device_id, override_only); + + switch (override_only) { + case 0: + strcpy(alias, "pci:"); + break; + case PCI_ID_F_VFIO_DRIVER_OVERRIDE: + strcpy(alias, "vfio_pci:"); + break; + default: + warn("Unknown PCI driver_override alias %08X\n", + override_only); + return 0; + } - strcpy(alias, "pci:"); ADD(alias, "v", vendor != PCI_ANY_ID, vendor); ADD(alias, "d", device != PCI_ANY_ID, device); ADD(alias, "sv", subvendor != PCI_ANY_ID, subvendor); From ca4ddaac7fa710a250bbd650cc719425bec973a0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 26 Aug 2021 13:39:10 +0300 Subject: [PATCH 35/37] vfio: Use select for eventfd If VFIO_VIRQFD is required then turn on eventfd automatically. The majority of kconfig users of the EVENTFD use select not depends on. Signed-off-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-12-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 3 ++- drivers/vfio/fsl-mc/Kconfig | 3 ++- drivers/vfio/pci/Kconfig | 2 +- drivers/vfio/platform/Kconfig | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index e44bf736e2b2..698ca35b3f03 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -16,7 +16,8 @@ config VFIO_SPAPR_EEH config VFIO_VIRQFD tristate - depends on VFIO && EVENTFD + depends on VFIO + select EVENTFD default n menuconfig VFIO diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig index b1a527d6b6f2..6df66813c882 100644 --- a/drivers/vfio/fsl-mc/Kconfig +++ b/drivers/vfio/fsl-mc/Kconfig @@ -1,6 +1,7 @@ config VFIO_FSL_MC tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices" - depends on VFIO && FSL_MC_BUS && EVENTFD + depends on VFIO && FSL_MC_BUS + select EVENTFD help Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc (Management Complex) devices. This is required to passthrough diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 5e2e1b9a9fd3..d208a95a2767 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config VFIO_PCI tristate "VFIO support for PCI devices" - depends on VFIO && PCI && EVENTFD + depends on VFIO && PCI depends on MMU select VFIO_VIRQFD select IRQ_BYPASS_MANAGER diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index ab341108a0be..7f78eb96a5d5 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config VFIO_PLATFORM tristate "VFIO support for platform devices" - depends on VFIO && EVENTFD && (ARM || ARM64 || COMPILE_TEST) + depends on VFIO && (ARM || ARM64 || COMPILE_TEST) select VFIO_VIRQFD help Support for platform devices with VFIO. This is required to make From 85c94dcffcb775bafffd6e966db49253e1b789d9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 26 Aug 2021 13:39:11 +0300 Subject: [PATCH 36/37] vfio: Use kconfig if XX/endif blocks instead of repeating 'depends on' This results in less kconfig wordage and a simpler understanding of the required "depends on" to create the menu structure. The next patch increases the nesting level a lot so this is a nice preparatory simplification. Signed-off-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-13-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 44 ++++++++++++++--------------- drivers/vfio/fsl-mc/Kconfig | 2 +- drivers/vfio/mdev/Kconfig | 1 - drivers/vfio/pci/Kconfig | 11 ++++---- drivers/vfio/platform/Kconfig | 6 ++-- drivers/vfio/platform/reset/Kconfig | 4 +-- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 698ca35b3f03..6130d00252ed 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -1,25 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -config VFIO_IOMMU_TYPE1 - tristate - depends on VFIO - default n - -config VFIO_IOMMU_SPAPR_TCE - tristate - depends on VFIO && SPAPR_TCE_IOMMU - default VFIO - -config VFIO_SPAPR_EEH - tristate - depends on EEH && VFIO_IOMMU_SPAPR_TCE - default VFIO - -config VFIO_VIRQFD - tristate - depends on VFIO - select EVENTFD - default n - menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API @@ -30,9 +9,28 @@ menuconfig VFIO If you don't know what to do here, say N. +if VFIO +config VFIO_IOMMU_TYPE1 + tristate + default n + +config VFIO_IOMMU_SPAPR_TCE + tristate + depends on SPAPR_TCE_IOMMU + default VFIO + +config VFIO_SPAPR_EEH + tristate + depends on EEH && VFIO_IOMMU_SPAPR_TCE + default VFIO + +config VFIO_VIRQFD + tristate + select EVENTFD + default n + config VFIO_NOIOMMU bool "VFIO No-IOMMU support" - depends on VFIO help VFIO is built on the ability to isolate devices using the IOMMU. Only with an IOMMU can userspace access to DMA capable devices be @@ -49,4 +47,6 @@ source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" source "drivers/vfio/mdev/Kconfig" source "drivers/vfio/fsl-mc/Kconfig" +endif + source "virt/lib/Kconfig" diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig index 6df66813c882..597d338c5c8a 100644 --- a/drivers/vfio/fsl-mc/Kconfig +++ b/drivers/vfio/fsl-mc/Kconfig @@ -1,6 +1,6 @@ config VFIO_FSL_MC tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices" - depends on VFIO && FSL_MC_BUS + depends on FSL_MC_BUS select EVENTFD help Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig index 763c877a1318..646dbed44eb2 100644 --- a/drivers/vfio/mdev/Kconfig +++ b/drivers/vfio/mdev/Kconfig @@ -2,7 +2,6 @@ config VFIO_MDEV tristate "Mediated device driver framework" - depends on VFIO default n help Provides a framework to virtualize devices. diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index d208a95a2767..afdab7d71e98 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config VFIO_PCI tristate "VFIO support for PCI devices" - depends on VFIO && PCI + depends on PCI depends on MMU select VFIO_VIRQFD select IRQ_BYPASS_MANAGER @@ -11,9 +11,10 @@ config VFIO_PCI If you don't know what to do here, say N. +if VFIO_PCI config VFIO_PCI_VGA bool "VFIO PCI support for VGA devices" - depends on VFIO_PCI && X86 && VGA_ARB + depends on X86 && VGA_ARB help Support for VGA extension to VFIO PCI. This exposes an additional region on VGA devices for accessing legacy VGA addresses used by @@ -22,16 +23,14 @@ config VFIO_PCI_VGA If you don't know what to do here, say N. config VFIO_PCI_MMAP - depends on VFIO_PCI def_bool y if !S390 config VFIO_PCI_INTX - depends on VFIO_PCI def_bool y if !S390 config VFIO_PCI_IGD bool "VFIO PCI extensions for Intel graphics (GVT-d)" - depends on VFIO_PCI && X86 + depends on X86 default y help Support for Intel IGD specific extensions to enable direct @@ -40,3 +39,5 @@ config VFIO_PCI_IGD and LPC bridge config space. To enable Intel IGD assignment through vfio-pci, say Y. + +endif diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index 7f78eb96a5d5..331a5920f5ab 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config VFIO_PLATFORM tristate "VFIO support for platform devices" - depends on VFIO && (ARM || ARM64 || COMPILE_TEST) + depends on ARM || ARM64 || COMPILE_TEST select VFIO_VIRQFD help Support for platform devices with VFIO. This is required to make @@ -10,9 +10,10 @@ config VFIO_PLATFORM If you don't know what to do here, say N. +if VFIO_PLATFORM config VFIO_AMBA tristate "VFIO support for AMBA devices" - depends on VFIO_PLATFORM && (ARM_AMBA || COMPILE_TEST) + depends on ARM_AMBA || COMPILE_TEST help Support for ARM AMBA devices with VFIO. This is required to make use of ARM AMBA devices present on the system using the VFIO @@ -21,3 +22,4 @@ config VFIO_AMBA If you don't know what to do here, say N. source "drivers/vfio/platform/reset/Kconfig" +endif diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig index 1edbe9ee7356..12f5f3d80387 100644 --- a/drivers/vfio/platform/reset/Kconfig +++ b/drivers/vfio/platform/reset/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config VFIO_PLATFORM_CALXEDAXGMAC_RESET tristate "VFIO support for calxeda xgmac reset" - depends on VFIO_PLATFORM help Enables the VFIO platform driver to handle reset for Calxeda xgmac @@ -9,7 +8,6 @@ config VFIO_PLATFORM_CALXEDAXGMAC_RESET config VFIO_PLATFORM_AMDXGBE_RESET tristate "VFIO support for AMD XGBE reset" - depends on VFIO_PLATFORM help Enables the VFIO platform driver to handle reset for AMD XGBE @@ -17,7 +15,7 @@ config VFIO_PLATFORM_AMDXGBE_RESET config VFIO_PLATFORM_BCMFLEXRM_RESET tristate "VFIO support for Broadcom FlexRM reset" - depends on VFIO_PLATFORM && (ARCH_BCM_IPROC || COMPILE_TEST) + depends on ARCH_BCM_IPROC || COMPILE_TEST default ARCH_BCM_IPROC help Enables the VFIO platform driver to handle reset for Broadcom FlexRM From 7fa005caa35ed92563b9e9d88d319b2623763a77 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 26 Aug 2021 13:39:12 +0300 Subject: [PATCH 37/37] vfio/pci: Introduce vfio_pci_core.ko Now that vfio_pci has been split into two source modules, one focusing on the "struct pci_driver" (vfio_pci.c) and a toolbox library of code (vfio_pci_core.c), complete the split and move them into two different kernel modules. As before vfio_pci.ko continues to present the same interface under sysfs and this change will have no functional impact. Splitting into another module and adding exports allows creating new HW specific VFIO PCI drivers that can implement device specific functionality, such as VFIO migration interfaces or specialized device requirements. Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20210826103912.128972-14-yishaih@nvidia.com Signed-off-by: Alex Williamson --- MAINTAINERS | 1 + drivers/vfio/pci/Kconfig | 33 +++++++++------- drivers/vfio/pci/Makefile | 8 ++-- drivers/vfio/pci/vfio_pci.c | 14 ++----- drivers/vfio/pci/vfio_pci_config.c | 2 +- drivers/vfio/pci/vfio_pci_core.c | 39 ++++++++++++++++--- drivers/vfio/pci/vfio_pci_igd.c | 2 +- drivers/vfio/pci/vfio_pci_intrs.c | 2 +- drivers/vfio/pci/vfio_pci_rdwr.c | 2 +- drivers/vfio/pci/vfio_pci_zdev.c | 2 +- .../pci => include/linux}/vfio_pci_core.h | 2 - 11 files changed, 65 insertions(+), 42 deletions(-) rename {drivers/vfio/pci => include/linux}/vfio_pci_core.h (99%) diff --git a/MAINTAINERS b/MAINTAINERS index c9467d2839f5..7f0fcaa8ee67 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19466,6 +19466,7 @@ T: git git://github.com/awilliam/linux-vfio.git F: Documentation/driver-api/vfio.rst F: drivers/vfio/ F: include/linux/vfio.h +F: include/linux/vfio_pci_core.h F: include/uapi/linux/vfio.h VFIO FSL-MC DRIVER diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index afdab7d71e98..860424ccda1b 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,19 +1,28 @@ # SPDX-License-Identifier: GPL-2.0-only -config VFIO_PCI - tristate "VFIO support for PCI devices" - depends on PCI - depends on MMU +if PCI && MMU +config VFIO_PCI_CORE + tristate select VFIO_VIRQFD select IRQ_BYPASS_MANAGER + +config VFIO_PCI_MMAP + def_bool y if !S390 + +config VFIO_PCI_INTX + def_bool y if !S390 + +config VFIO_PCI + tristate "Generic VFIO support for any PCI device" + select VFIO_PCI_CORE help - Support for the PCI VFIO bus driver. This is required to make - use of PCI drivers using the VFIO framework. + Support for the generic PCI VFIO bus driver which can connect any + PCI device to the VFIO framework. If you don't know what to do here, say N. if VFIO_PCI config VFIO_PCI_VGA - bool "VFIO PCI support for VGA devices" + bool "Generic VFIO PCI support for VGA devices" depends on X86 && VGA_ARB help Support for VGA extension to VFIO PCI. This exposes an additional @@ -22,14 +31,8 @@ config VFIO_PCI_VGA If you don't know what to do here, say N. -config VFIO_PCI_MMAP - def_bool y if !S390 - -config VFIO_PCI_INTX - def_bool y if !S390 - config VFIO_PCI_IGD - bool "VFIO PCI extensions for Intel graphics (GVT-d)" + bool "Generic VFIO PCI extensions for Intel graphics (GVT-d)" depends on X86 default y help @@ -39,5 +42,5 @@ config VFIO_PCI_IGD and LPC bridge config space. To enable Intel IGD assignment through vfio-pci, say Y. - +endif endif diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 8aa517b4b671..349d68d242b4 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -vfio-pci-y := vfio_pci.o vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o -vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o -vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o +vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o +vfio-pci-core-$(CONFIG_S390) += vfio_pci_zdev.o +obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o +vfio-pci-y := vfio_pci.o +vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 85fd638a5955..a5ce92beb655 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -25,7 +25,7 @@ #include #include -#include "vfio_pci_core.h" +#include #define DRIVER_AUTHOR "Alex Williamson " #define DRIVER_DESC "VFIO PCI - User Level meta-driver" @@ -153,6 +153,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = vfio_pci_core_register_device(vdev); if (ret) goto out_free; + dev_set_drvdata(&pdev->dev, vdev); return 0; out_free: @@ -246,14 +247,10 @@ static int __init vfio_pci_init(void) vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3); - ret = vfio_pci_core_init(); - if (ret) - return ret; - /* Register and scan for devices */ ret = pci_register_driver(&vfio_pci_driver); if (ret) - goto out; + return ret; vfio_pci_fill_ids(); @@ -261,17 +258,12 @@ static int __init vfio_pci_init(void) pr_warn("device denylist disabled.\n"); return 0; - -out: - vfio_pci_core_cleanup(); - return ret; } module_init(vfio_pci_init); static void __exit vfio_pci_cleanup(void) { pci_unregister_driver(&vfio_pci_driver); - vfio_pci_core_cleanup(); } module_exit(vfio_pci_cleanup); diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 1f034f768a27..6e58b4bf7a60 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -26,7 +26,7 @@ #include #include -#include "vfio_pci_core.h" +#include /* Fake capability ID for standard config space */ #define PCI_CAP_ID_BASIC 0 diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 65eafaafb2e0..675616e08897 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -8,6 +8,8 @@ * Author: Tom Lyon, pugs@cisco.com */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -25,7 +27,10 @@ #include #include -#include "vfio_pci_core.h" +#include + +#define DRIVER_AUTHOR "Alex Williamson " +#define DRIVER_DESC "core driver for VFIO based PCI devices" static bool nointxmask; static bool disable_vga; @@ -306,6 +311,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev) return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_core_enable); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) { @@ -403,6 +409,7 @@ out: if (!vfio_pci_dev_set_try_reset(vdev->vdev.dev_set) && !disable_idle_d3) vfio_pci_set_power_state(vdev, PCI_D3hot); } +EXPORT_SYMBOL_GPL(vfio_pci_core_disable); static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev) { @@ -459,6 +466,7 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) } mutex_unlock(&vdev->igate); } +EXPORT_SYMBOL_GPL(vfio_pci_core_close_device); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) { @@ -466,6 +474,7 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) vfio_spapr_pci_eeh_open(vdev->pdev); vfio_pci_vf_token_user_add(vdev, 1); } +EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable); static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type) { @@ -624,6 +633,7 @@ int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_register_dev_region); long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) @@ -1168,6 +1178,7 @@ hot_reset_release: return -ENOTTY; } +EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl); static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) @@ -1211,6 +1222,7 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, return vfio_pci_rw(vdev, buf, count, ppos, false); } +EXPORT_SYMBOL_GPL(vfio_pci_core_read); ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, size_t count, loff_t *ppos) @@ -1223,6 +1235,7 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true); } +EXPORT_SYMBOL_GPL(vfio_pci_core_write); /* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try) @@ -1501,6 +1514,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma return 0; } +EXPORT_SYMBOL_GPL(vfio_pci_core_mmap); void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) { @@ -1523,6 +1537,7 @@ void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) mutex_unlock(&vdev->igate); } +EXPORT_SYMBOL_GPL(vfio_pci_core_request); static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, bool vf_token, uuid_t *uuid) @@ -1667,6 +1682,7 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf) return 1; /* Match */ } +EXPORT_SYMBOL_GPL(vfio_pci_core_match); static int vfio_pci_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) @@ -1775,6 +1791,7 @@ void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, INIT_LIST_HEAD(&vdev->vma_list); init_rwsem(&vdev->memory_lock); } +EXPORT_SYMBOL_GPL(vfio_pci_core_init_device); void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev) { @@ -1785,6 +1802,7 @@ void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev) kfree(vdev->region); kfree(vdev->pm_save); } +EXPORT_SYMBOL_GPL(vfio_pci_core_uninit_device); int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) { @@ -1852,7 +1870,6 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) ret = vfio_register_group_dev(&vdev->vdev); if (ret) goto out_power; - dev_set_drvdata(&pdev->dev, vdev); return 0; out_power: @@ -1864,6 +1881,7 @@ out_group_put: vfio_iommu_group_put(group, &pdev->dev); return ret; } +EXPORT_SYMBOL_GPL(vfio_pci_core_register_device); void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) { @@ -1881,6 +1899,7 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) if (!disable_idle_d3) vfio_pci_set_power_state(vdev, PCI_D0); } +EXPORT_SYMBOL_GPL(vfio_pci_core_unregister_device); static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state) @@ -1924,10 +1943,12 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) return ret < 0 ? ret : nr_virtfn; } +EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure); const struct pci_error_handlers vfio_pci_core_err_handlers = { .error_detected = vfio_pci_aer_err_detected, }; +EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers); static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, struct vfio_pci_group_info *groups) @@ -2116,16 +2137,22 @@ void vfio_pci_core_set_params(bool is_nointxmask, bool is_disable_vga, disable_vga = is_disable_vga; disable_idle_d3 = is_disable_idle_d3; } +EXPORT_SYMBOL_GPL(vfio_pci_core_set_params); -/* This will become the __exit function of vfio_pci_core.ko */ -void vfio_pci_core_cleanup(void) +static void vfio_pci_core_cleanup(void) { vfio_pci_uninit_perm_bits(); } -/* This will become the __init function of vfio_pci_core.ko */ -int __init vfio_pci_core_init(void) +static int __init vfio_pci_core_init(void) { /* Allocate shared config space permission data used by all devices */ return vfio_pci_init_perm_bits(); } + +module_init(vfio_pci_core_init); +module_exit(vfio_pci_core_cleanup); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index a324ca7e6b5a..7ca4109bba48 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -15,7 +15,7 @@ #include #include -#include "vfio_pci_core.h" +#include #define OPREGION_SIGNATURE "IntelGraphicsMem" #define OPREGION_SIZE (8 * 1024) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 945ddbdf4d11..6069a11fb51a 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -20,7 +20,7 @@ #include #include -#include "vfio_pci_core.h" +#include /* * INTx diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 8fff4689dd44..57d3b2cbbd8e 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -17,7 +17,7 @@ #include #include -#include "vfio_pci_core.h" +#include #ifdef __LITTLE_ENDIAN #define vfio_ioread64 ioread64 diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index 2ffbdc11f089..fe4def9ffffb 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -19,7 +19,7 @@ #include #include -#include "vfio_pci_core.h" +#include /* * Add the Base PCI Function information to the device info region. diff --git a/drivers/vfio/pci/vfio_pci_core.h b/include/linux/vfio_pci_core.h similarity index 99% rename from drivers/vfio/pci/vfio_pci_core.h rename to include/linux/vfio_pci_core.h index 7a2da1e14de3..ef9a44b6cf5d 100644 --- a/drivers/vfio/pci/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -207,8 +207,6 @@ static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, #endif /* Will be exported for vfio pci drivers usage */ -void vfio_pci_core_cleanup(void); -int vfio_pci_core_init(void); void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga, bool is_disable_idle_d3); void vfio_pci_core_close_device(struct vfio_device *core_vdev);