From aa7528fe3576d11f4a10237178a723a1f080a547 Mon Sep 17 00:00:00 2001 From: Jagannathan Raman Date: Tue, 6 Mar 2018 17:39:41 -0500 Subject: [PATCH 1/7] iommu/vt-d: Fix usage of force parameter in intel_ir_reconfigure_irte() It was noticed that the IRTE configured for guest OS kernel was over-written while the guest was running. As a result, vt-d Posted Interrupts configured for the guest are not being delivered directly, and instead bounces off the host. Every interrupt delivery takes a VM Exit. It was noticed that the following stack is doing the over-write: [ 147.463177] modify_irte+0x171/0x1f0 [ 147.463405] intel_ir_set_affinity+0x5c/0x80 [ 147.463641] msi_domain_set_affinity+0x32/0x90 [ 147.463881] irq_do_set_affinity+0x37/0xd0 [ 147.464125] irq_set_affinity_locked+0x9d/0xb0 [ 147.464374] __irq_set_affinity+0x42/0x70 [ 147.464627] write_irq_affinity.isra.5+0xe1/0x110 [ 147.464895] proc_reg_write+0x38/0x70 [ 147.465150] __vfs_write+0x36/0x180 [ 147.465408] ? handle_mm_fault+0xdf/0x200 [ 147.465671] ? _cond_resched+0x15/0x30 [ 147.465936] vfs_write+0xad/0x1a0 [ 147.466204] SyS_write+0x52/0xc0 [ 147.466472] do_syscall_64+0x74/0x1a0 [ 147.466744] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 reversing the sense of force check in intel_ir_reconfigure_irte() restores proper posted interrupt functionality Signed-off-by: Jagannathan Raman Fixes: d491bdff888e ('iommu/vt-d: Reevaluate vector configuration on activate()') Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 66f69af2c219..3062a154a9fb 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1136,7 +1136,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) irte->dest_id = IRTE_DEST(cfg->dest_apicid); /* Update the hardware only if the interrupt is in remapped mode. */ - if (!force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) + if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) modify_irte(&ir_data->irq_2_iommu, irte); } From 94c793accacdb0d33c1df66f3b324eec96d26e58 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 4 Apr 2018 12:56:59 +0200 Subject: [PATCH 2/7] iommu/amd: Hide unused iommu_table_lock The newly introduced lock is only used when CONFIG_IRQ_REMAP is enabled: drivers/iommu/amd_iommu.c:86:24: error: 'iommu_table_lock' defined but not used [-Werror=unused-variable] static DEFINE_SPINLOCK(iommu_table_lock); This moves the definition next to the user, within the #ifdef protected section of the file. Fixes: ea6166f4b83e ("iommu/amd: Split irq_lookup_table out of the amd_iommu_devtable_lock") Signed-off-by: Arnd Bergmann Acked-by: Sebastian Andrzej Siewior Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2a99f0f14795..8fb8c737fffe 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -83,7 +83,6 @@ static DEFINE_SPINLOCK(amd_iommu_devtable_lock); static DEFINE_SPINLOCK(pd_bitmap_lock); -static DEFINE_SPINLOCK(iommu_table_lock); /* List of all available dev_data structures */ static LLIST_HEAD(dev_data_list); @@ -3562,6 +3561,7 @@ EXPORT_SYMBOL(amd_iommu_device_info); *****************************************************************************/ static struct irq_chip amd_ir_chip; +static DEFINE_SPINLOCK(iommu_table_lock); static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) { From 2f8c7f2e76bf040fcdaa2b4f4c2b9ef3b6e4ed25 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 17 Apr 2018 14:09:15 +0200 Subject: [PATCH 3/7] iommu/rockchip: Make clock handling optional iommu clocks are optional, so the driver should not fail if they are not present. Instead just set the number of clocks to 0, which the clk-blk APIs can handle just fine. Fixes: f2e3a5f557ad ("iommu/rockchip: Control clocks needed to access the IOMMU") Signed-off-by: Heiko Stuebner Reviewed-by: Robin Murphy Tested-by: Enric Balletbo i Serra Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 5fc8656c60f9..c0f03af34832 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1175,8 +1175,15 @@ static int rk_iommu_probe(struct platform_device *pdev) for (i = 0; i < iommu->num_clocks; ++i) iommu->clocks[i].id = rk_iommu_clocks[i]; + /* + * iommu clocks should be present for all new devices and devicetrees + * but there are older devicetrees without clocks out in the wild. + * So clocks as optional for the time being. + */ err = devm_clk_bulk_get(iommu->dev, iommu->num_clocks, iommu->clocks); - if (err) + if (err == -ENOENT) + iommu->num_clocks = 0; + else if (err) return err; err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks); From cd2c9fcf5c66fd21f7b83455806fdd14ab9236ee Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 18 Apr 2018 12:40:42 +0100 Subject: [PATCH 4/7] iommu/dma: Move PCI window region reservation back into dma specific path. This pretty much reverts commit 273df9635385 ("iommu/dma: Make PCI window reservation generic") by moving the PCI window region reservation back into the dma specific path so that these regions doesn't get exposed via the IOMMU API interface. With this change, the vfio interface will report only iommu specific reserved regions to the user space. Cc: Joerg Roedel Signed-off-by: Shameer Kolothum Reviewed-by: Robin Murphy Fixes: 273df9635385 ('iommu/dma: Make PCI window reservation generic') Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 54 ++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f05f3cf90756..ddcbbdb5d658 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -167,40 +167,16 @@ EXPORT_SYMBOL(iommu_put_dma_cookie); * @list: Reserved region list from iommu_get_resv_regions() * * IOMMU drivers can use this to implement their .get_resv_regions callback - * for general non-IOMMU-specific reservations. Currently, this covers host - * bridge windows for PCI devices and GICv3 ITS region reservation on ACPI - * based ARM platforms that may require HW MSI reservation. + * for general non-IOMMU-specific reservations. Currently, this covers GICv3 + * ITS region reservation on ACPI based ARM platforms that may require HW MSI + * reservation. */ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { - struct pci_host_bridge *bridge; - struct resource_entry *window; - if (!is_of_node(dev->iommu_fwspec->iommu_fwnode) && - iort_iommu_msi_get_resv_regions(dev, list) < 0) - return; + if (!is_of_node(dev->iommu_fwspec->iommu_fwnode)) + iort_iommu_msi_get_resv_regions(dev, list); - if (!dev_is_pci(dev)) - return; - - bridge = pci_find_host_bridge(to_pci_dev(dev)->bus); - resource_list_for_each_entry(window, &bridge->windows) { - struct iommu_resv_region *region; - phys_addr_t start; - size_t length; - - if (resource_type(window->res) != IORESOURCE_MEM) - continue; - - start = window->res->start - window->offset; - length = window->res->end - window->res->start + 1; - region = iommu_alloc_resv_region(start, length, 0, - IOMMU_RESV_RESERVED); - if (!region) - return; - - list_add_tail(®ion->list, list); - } } EXPORT_SYMBOL(iommu_dma_get_resv_regions); @@ -229,6 +205,23 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, return 0; } +static void iova_reserve_pci_windows(struct pci_dev *dev, + struct iova_domain *iovad) +{ + struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus); + struct resource_entry *window; + unsigned long lo, hi; + + resource_list_for_each_entry(window, &bridge->windows) { + if (resource_type(window->res) != IORESOURCE_MEM) + continue; + + lo = iova_pfn(iovad, window->res->start - window->offset); + hi = iova_pfn(iovad, window->res->end - window->offset); + reserve_iova(iovad, lo, hi); + } +} + static int iova_reserve_iommu_regions(struct device *dev, struct iommu_domain *domain) { @@ -238,6 +231,9 @@ static int iova_reserve_iommu_regions(struct device *dev, LIST_HEAD(resv_regions); int ret = 0; + if (dev_is_pci(dev)) + iova_reserve_pci_windows(to_pci_dev(dev), iovad); + iommu_get_resv_regions(dev, &resv_regions); list_for_each_entry(region, &resv_regions, list) { unsigned long lo, hi; From 0dfc0c792d691f8056f38b5c30789f504be0e467 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 20 Apr 2018 13:29:55 +0800 Subject: [PATCH 5/7] iommu/vt-d: fix shift-out-of-bounds in bug checking It allows to flush more than 4GB of device TLBs. So the mask should be 64bit wide. UBSAN captured this fault as below. [ 3.760024] ================================================================================ [ 3.768440] UBSAN: Undefined behaviour in drivers/iommu/dmar.c:1348:3 [ 3.774864] shift exponent 64 is too large for 32-bit type 'int' [ 3.780853] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G U 4.17.0-rc1+ #89 [ 3.788661] Hardware name: Dell Inc. OptiPlex 7040/0Y7WYT, BIOS 1.2.8 01/26/2016 [ 3.796034] Call Trace: [ 3.798472] [ 3.800479] dump_stack+0x90/0xfb [ 3.803787] ubsan_epilogue+0x9/0x40 [ 3.807353] __ubsan_handle_shift_out_of_bounds+0x10e/0x170 [ 3.812916] ? qi_flush_dev_iotlb+0x124/0x180 [ 3.817261] qi_flush_dev_iotlb+0x124/0x180 [ 3.821437] iommu_flush_dev_iotlb+0x94/0xf0 [ 3.825698] iommu_flush_iova+0x10b/0x1c0 [ 3.829699] ? fq_ring_free+0x1d0/0x1d0 [ 3.833527] iova_domain_flush+0x25/0x40 [ 3.837448] fq_flush_timeout+0x55/0x160 [ 3.841368] ? fq_ring_free+0x1d0/0x1d0 [ 3.845200] ? fq_ring_free+0x1d0/0x1d0 [ 3.849034] call_timer_fn+0xbe/0x310 [ 3.852696] ? fq_ring_free+0x1d0/0x1d0 [ 3.856530] run_timer_softirq+0x223/0x6e0 [ 3.860625] ? sched_clock+0x5/0x10 [ 3.864108] ? sched_clock+0x5/0x10 [ 3.867594] __do_softirq+0x1b5/0x6f5 [ 3.871250] irq_exit+0xd4/0x130 [ 3.874470] smp_apic_timer_interrupt+0xb8/0x2f0 [ 3.879075] apic_timer_interrupt+0xf/0x20 [ 3.883159] [ 3.885255] RIP: 0010:poll_idle+0x60/0xe7 [ 3.889252] RSP: 0018:ffffb1b201943e30 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 [ 3.896802] RAX: 0000000080200000 RBX: 000000000000008e RCX: 000000000000001f [ 3.903918] RDX: 0000000000000000 RSI: 000000002819aa06 RDI: 0000000000000000 [ 3.911031] RBP: ffff9e93c6b33280 R08: 00000010f717d567 R09: 000000000010d205 [ 3.918146] R10: ffffb1b201943df8 R11: 0000000000000001 R12: 00000000e01b169d [ 3.925260] R13: 0000000000000000 R14: ffffffffb12aa400 R15: 0000000000000000 [ 3.932382] cpuidle_enter_state+0xb4/0x470 [ 3.936558] do_idle+0x222/0x310 [ 3.939779] cpu_startup_entry+0x78/0x90 [ 3.943693] start_secondary+0x205/0x2e0 [ 3.947607] secondary_startup_64+0xa5/0xb0 [ 3.951783] ================================================================================ Signed-off-by: Changbin Du Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index accf58388bdb..e4ae6003f470 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1345,7 +1345,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, struct qi_desc desc; if (mask) { - BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1)); + BUG_ON(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1)); addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else From a85894cd779455fab0086cfcb5c9f65c3706e1c6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 3 May 2018 15:25:17 +0200 Subject: [PATCH 6/7] iommu/vt-d: Use WARN_ON_ONCE instead of BUG_ON in qi_flush_dev_iotlb() A misaligned address is only worth a warning, and not stopping the while execution path with a BUG_ON(). Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index e4ae6003f470..460bed4fc5b1 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1345,7 +1345,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, struct qi_desc desc; if (mask) { - BUG_ON(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1)); + WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1)); addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else From 40fa84e10134ef5c892b628e02382349b5db3e0c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 4 Apr 2018 12:23:53 +0200 Subject: [PATCH 7/7] iommu: rockchip: fix building without CONFIG_OF We get a build error when compiling the iommu driver without CONFIG_OF: drivers/iommu/rockchip-iommu.c: In function 'rk_iommu_of_xlate': drivers/iommu/rockchip-iommu.c:1101:2: error: implicit declaration of function 'of_dev_put'; did you mean 'of_node_put'? [-Werror=implicit-function-declaration] This replaces the of_dev_put() with the equivalent platform_device_put(). Fixes: 5fd577c3eac3 ("iommu/rockchip: Use OF_IOMMU to attach devices automatically") Signed-off-by: Arnd Bergmann Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index c0f03af34832..0468acfa131f 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1098,7 +1098,7 @@ static int rk_iommu_of_xlate(struct device *dev, data->iommu = platform_get_drvdata(iommu_dev); dev->archdata.iommu = data; - of_dev_put(iommu_dev); + platform_device_put(iommu_dev); return 0; }