From ec0160891e387f4771f953b888b1fe951398e5d9 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Tue, 21 Jul 2020 14:26:09 -0600 Subject: [PATCH 1/3] irqdomain/treewide: Free firmware node after domain removal Commit 711419e504eb ("irqdomain: Add the missing assignment of domain->fwnode for named fwnode") unintentionally caused a dangling pointer page fault issue on firmware nodes that were freed after IRQ domain allocation. Commit e3beca48a45b fixed that dangling pointer issue by only freeing the firmware node after an IRQ domain allocation failure. That fix no longer frees the firmware node immediately, but leaves the firmware node allocated after the domain is removed. The firmware node must be kept around through irq_domain_remove, but should be freed it afterwards. Add the missing free operations after domain removal where where appropriate. Fixes: e3beca48a45b ("irqdomain/treewide: Keep firmware node unconditionally allocated") Signed-off-by: Jon Derrick Signed-off-by: Thomas Gleixner Reviewed-by: Andy Shevchenko Acked-by: Bjorn Helgaas # drivers/pci Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1595363169-7157-1-git-send-email-jonathan.derrick@intel.com --- arch/mips/pci/pci-xtalk-bridge.c | 3 +++ arch/x86/kernel/apic/io_apic.c | 5 +++++ drivers/iommu/intel/irq_remapping.c | 8 ++++++++ drivers/mfd/ioc3.c | 6 ++++++ drivers/pci/controller/vmd.c | 3 +++ 5 files changed, 25 insertions(+) diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 5958217861b8..9b3cc775c55e 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -728,6 +728,7 @@ err_free_resource: pci_free_resource_list(&host->windows); err_remove_domain: irq_domain_remove(domain); + irq_domain_free_fwnode(fn); return err; } @@ -735,8 +736,10 @@ static int bridge_remove(struct platform_device *pdev) { struct pci_bus *bus = platform_get_drvdata(pdev); struct bridge_controller *bc = BRIDGE_CONTROLLER(bus); + struct fwnode_handle *fn = bc->domain->fwnode; irq_domain_remove(bc->domain); + irq_domain_free_fwnode(fn); pci_lock_rescan_remove(); pci_stop_root_bus(bus); pci_remove_root_bus(bus); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 81ffcfbfaef2..21325a4a78b9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2335,8 +2335,13 @@ static int mp_irqdomain_create(int ioapic) static void ioapic_destroy_irqdomain(int idx) { + struct ioapic_domain_cfg *cfg = &ioapics[idx].irqdomain_cfg; + struct fwnode_handle *fn = ioapics[idx].irqdomain->fwnode; + if (ioapics[idx].irqdomain) { irq_domain_remove(ioapics[idx].irqdomain); + if (!cfg->dev) + irq_domain_free_fwnode(fn); ioapics[idx].irqdomain = NULL; } } diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 9564d23d094f..aa096b333a99 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -628,13 +628,21 @@ out_free_table: static void intel_teardown_irq_remapping(struct intel_iommu *iommu) { + struct fwnode_handle *fn; + if (iommu && iommu->ir_table) { if (iommu->ir_msi_domain) { + fn = iommu->ir_msi_domain->fwnode; + irq_domain_remove(iommu->ir_msi_domain); + irq_domain_free_fwnode(fn); iommu->ir_msi_domain = NULL; } if (iommu->ir_domain) { + fn = iommu->ir_domain->fwnode; + irq_domain_remove(iommu->ir_domain); + irq_domain_free_fwnode(fn); iommu->ir_domain = NULL; } free_pages((unsigned long)iommu->ir_table->base, diff --git a/drivers/mfd/ioc3.c b/drivers/mfd/ioc3.c index 74cee7cb0afc..d939ccc46509 100644 --- a/drivers/mfd/ioc3.c +++ b/drivers/mfd/ioc3.c @@ -616,7 +616,10 @@ static int ioc3_mfd_probe(struct pci_dev *pdev, /* Remove all already added MFD devices */ mfd_remove_devices(&ipd->pdev->dev); if (ipd->domain) { + struct fwnode_handle *fn = ipd->domain->fwnode; + irq_domain_remove(ipd->domain); + irq_domain_free_fwnode(fn); free_irq(ipd->domain_irq, (void *)ipd); } pci_iounmap(pdev, regs); @@ -643,7 +646,10 @@ static void ioc3_mfd_remove(struct pci_dev *pdev) /* Release resources */ mfd_remove_devices(&ipd->pdev->dev); if (ipd->domain) { + struct fwnode_handle *fn = ipd->domain->fwnode; + irq_domain_remove(ipd->domain); + irq_domain_free_fwnode(fn); free_irq(ipd->domain_irq, (void *)ipd); } pci_iounmap(pdev, ipd->regs); diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 9a64cf90c291..ebec0a6e77ed 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -560,6 +560,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) if (!vmd->bus) { pci_free_resource_list(&resources); irq_domain_remove(vmd->irq_domain); + irq_domain_free_fwnode(fn); return -ENODEV; } @@ -673,6 +674,7 @@ static void vmd_cleanup_srcu(struct vmd_dev *vmd) static void vmd_remove(struct pci_dev *dev) { struct vmd_dev *vmd = pci_get_drvdata(dev); + struct fwnode_handle *fn = vmd->irq_domain->fwnode; sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); pci_stop_root_bus(vmd->bus); @@ -680,6 +682,7 @@ static void vmd_remove(struct pci_dev *dev) vmd_cleanup_srcu(vmd); vmd_detach_resources(vmd); irq_domain_remove(vmd->irq_domain); + irq_domain_free_fwnode(fn); } #ifdef CONFIG_PM_SLEEP From f0c7baca180046824e07fc5f1326e83a8fd150c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 24 Jul 2020 22:44:41 +0200 Subject: [PATCH 2/3] genirq/affinity: Make affinity setting if activated opt-in John reported that on a RK3288 system the perf per CPU interrupts are all affine to CPU0 and provided the analysis: "It looks like what happens is that because the interrupts are not per-CPU in the hardware, armpmu_request_irq() calls irq_force_affinity() while the interrupt is deactivated and then request_irq() with IRQF_PERCPU | IRQF_NOBALANCING. Now when irq_startup() runs with IRQ_STARTUP_NORMAL, it calls irq_setup_affinity() which returns early because IRQF_PERCPU and IRQF_NOBALANCING are set, leaving the interrupt on its original CPU." This was broken by the recent commit which blocked interrupt affinity setting in hardware before activation of the interrupt. While this works in general, it does not work for this particular case. As contrary to the initial analysis not all interrupt chip drivers implement an activate callback, the safe cure is to make the deferred interrupt affinity setting at activation time opt-in. Implement the necessary core logic and make the two irqchip implementations for which this is required opt-in. In hindsight this would have been the right thing to do, but ... Fixes: baedb87d1b53 ("genirq/affinity: Handle affinity setting on inactive interrupts correctly") Reported-by: John Keeping Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/87blk4tzgm.fsf@nanos.tec.linutronix.de --- arch/x86/kernel/apic/vector.c | 4 ++++ drivers/irqchip/irq-gic-v3-its.c | 5 ++++- include/linux/irq.h | 13 +++++++++++++ kernel/irq/manage.c | 6 +++++- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 7649da2478d8..dae32d948bf2 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -560,6 +560,10 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, * as that can corrupt the affinity move state. */ irqd_set_handle_enforce_irqctx(irqd); + + /* Don't invoke affinity setter on deactivated interrupts */ + irqd_set_affinity_on_activate(irqd); + /* * Legacy vectors are already assigned when the IOAPIC * takes them over. They stay on the same vector. This is diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index beac4caefad9..103d850b5595 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3523,6 +3523,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, msi_alloc_info_t *info = args; struct its_device *its_dev = info->scratchpad[0].ptr; struct its_node *its = its_dev->its; + struct irq_data *irqd; irq_hw_number_t hwirq; int err; int i; @@ -3542,7 +3543,9 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, &its_irq_chip, its_dev); - irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i))); + irqd = irq_get_irq_data(virq + i); + irqd_set_single_target(irqd); + irqd_set_affinity_on_activate(irqd); pr_debug("ID:%d pID:%d vID:%d\n", (int)(hwirq + i - its_dev->event_map.lpi_base), (int)(hwirq + i), virq + i); diff --git a/include/linux/irq.h b/include/linux/irq.h index 8d5bc2c237d7..1b7f4dfee35b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -213,6 +213,8 @@ struct irq_data { * required * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked * from actual interrupt context. + * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call + * irq_chip::irq_set_affinity() when deactivated. */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -237,6 +239,7 @@ enum { IRQD_CAN_RESERVE = (1 << 26), IRQD_MSI_NOMASK_QUIRK = (1 << 27), IRQD_HANDLE_ENFORCE_IRQCTX = (1 << 28), + IRQD_AFFINITY_ON_ACTIVATE = (1 << 29), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -421,6 +424,16 @@ static inline bool irqd_msi_nomask_quirk(struct irq_data *d) return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; } +static inline void irqd_set_affinity_on_activate(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE; +} + +static inline bool irqd_affinity_on_activate(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2a9fec53e159..48c38e09c673 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -320,12 +320,16 @@ static bool irq_set_affinity_deactivated(struct irq_data *data, struct irq_desc *desc = irq_data_to_desc(data); /* + * Handle irq chips which can handle affinity only in activated + * state correctly + * * If the interrupt is not yet activated, just store the affinity * mask and do not call the chip driver at all. On activation the * driver has to make sure anyway that the interrupt is in a * useable state so startup works. */ - if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data)) + if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || + irqd_is_activated(data) || !irqd_affinity_on_activate(data)) return false; cpumask_copy(desc->irq_common_data.affinity, mask); From aa251fc5b936d3ddb4b4c4b36427eb9aa3347c82 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 25 Jul 2020 13:30:55 +0100 Subject: [PATCH 3/3] genirq/debugfs: Add missing irqchip flags Recently introduced irqchip flags lack the corresponding printouts in debugfs. Add them. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/874kpvydxc.wl-maz@kernel.org --- kernel/irq/debugfs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 4f9f844074db..b95ff5d5f4bd 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -112,6 +112,7 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_AFFINITY_SET), BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), + BIT_MASK_DESCR(IRQD_AFFINITY_ON_ACTIVATE), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), BIT_MASK_DESCR(IRQD_CAN_RESERVE), BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK), @@ -120,6 +121,10 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_WAKEUP_STATE), BIT_MASK_DESCR(IRQD_WAKEUP_ARMED), + + BIT_MASK_DESCR(IRQD_DEFAULT_TRIGGER_SET), + + BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX), }; static const struct irq_bit_descr irqdesc_states[] = {