From 23584c1ed3e15a6f4bfab8dc5a88d94ab929ee12 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 17 Nov 2021 23:22:09 +0100 Subject: [PATCH 1/3] PCI: pciehp: Fix infinite loop in IRQ handler upon power fault The Power Fault Detected bit in the Slot Status register differs from all other hotplug events in that it is sticky: It can only be cleared after turning off slot power. Per PCIe r5.0, sec. 6.7.1.8: If a power controller detects a main power fault on the hot-plug slot, it must automatically set its internal main power fault latch [...]. The main power fault latch is cleared when software turns off power to the hot-plug slot. The stickiness used to cause interrupt storms and infinite loops which were fixed in 2009 by commits 5651c48cfafe ("PCI pciehp: fix power fault interrupt storm problem") and 99f0169c17f3 ("PCI: pciehp: enable software notification on empty slots"). Unfortunately in 2020 the infinite loop issue was inadvertently reintroduced by commit 8edf5332c393 ("PCI: pciehp: Fix MSI interrupt race"): The hardirq handler pciehp_isr() clears the PFD bit until pciehp's power_fault_detected flag is set. That happens in the IRQ thread pciehp_ist(), which never learns of the event because the hardirq handler is stuck in an infinite loop. Fix by setting the power_fault_detected flag already in the hardirq handler. Link: https://bugzilla.kernel.org/show_bug.cgi?id=214989 Link: https://lore.kernel.org/linux-pci/DM8PR11MB5702255A6A92F735D90A4446868B9@DM8PR11MB5702.namprd11.prod.outlook.com Fixes: 8edf5332c393 ("PCI: pciehp: Fix MSI interrupt race") Link: https://lore.kernel.org/r/66eaeef31d4997ceea357ad93259f290ededecfd.1637187226.git.lukas@wunner.de Reported-by: Joseph Bao Tested-by: Joseph Bao Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v4.19+ Cc: Stuart Hayes --- drivers/pci/hotplug/pciehp_hpc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 83a0fa119cae..9535c61cbff3 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -642,6 +642,8 @@ read_status: */ if (ctrl->power_fault_detected) status &= ~PCI_EXP_SLTSTA_PFD; + else if (status & PCI_EXP_SLTSTA_PFD) + ctrl->power_fault_detected = true; events |= status; if (!events) { @@ -651,7 +653,7 @@ read_status: } if (status) { - pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events); + pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, status); /* * In MSI mode, all event bits must be zero before the port @@ -725,8 +727,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) } /* Check Power Fault Detected */ - if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) { - ctrl->power_fault_detected = 1; + if (events & PCI_EXP_SLTSTA_PFD) { ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl)); pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, PCI_EXP_SLTCTL_ATTN_IND_ON); From e1a6333e7f89ad6acef9e818c2500c2e5393536e Mon Sep 17 00:00:00 2001 From: Vihas Mak Date: Fri, 10 Dec 2021 03:06:18 +0530 Subject: [PATCH 2/3] PCI: ibmphp: Remove commented-out functions The functions get_max_adapter_speed() and get_bus_name() in ibmphp_core.c are commented-out and the fields .get_max_adapter_speed and .get_bus_name_status are removed from struct hotplug_slot_ops in pci_hotplug.h. Remove the commented-out functions. Link: https://lore.kernel.org/r/20211209213618.20522-1-makvihas@gmail.com Signed-off-by: Vihas Mak Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/TODO | 5 --- drivers/pci/hotplug/ibmphp_core.c | 74 ------------------------------- 2 files changed, 79 deletions(-) diff --git a/drivers/pci/hotplug/TODO b/drivers/pci/hotplug/TODO index cc6194aa24c1..88f217c82b4f 100644 --- a/drivers/pci/hotplug/TODO +++ b/drivers/pci/hotplug/TODO @@ -30,11 +30,6 @@ ibmphp: or ibmphp should store a pointer to its bus in struct slot. Probably the former. -* The functions get_max_adapter_speed() and get_bus_name() are commented out. - Can they be deleted? There are also forward declarations at the top of - ibmphp_core.c as well as pointers in ibmphp_hotplug_slot_ops, likewise - commented out. - * ibmphp_init_devno() takes a struct slot **, it could instead take a struct slot *. diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index 17124254d897..197997e264a2 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -50,14 +50,6 @@ static int irqs[16]; /* PIC mode IRQs we're using so far (in case MPS static int init_flag; -/* -static int get_max_adapter_speed_1 (struct hotplug_slot *, u8 *, u8); - -static inline int get_max_adapter_speed (struct hotplug_slot *hs, u8 *value) -{ - return get_max_adapter_speed_1 (hs, value, 1); -} -*/ static inline int get_cur_bus_info(struct slot **sl) { int rc = 1; @@ -401,69 +393,6 @@ static int get_max_bus_speed(struct slot *slot) return rc; } -/* -static int get_max_adapter_speed_1(struct hotplug_slot *hotplug_slot, u8 *value, u8 flag) -{ - int rc = -ENODEV; - struct slot *pslot; - struct slot myslot; - - debug("get_max_adapter_speed_1 - Entry hotplug_slot[%lx] pvalue[%lx]\n", - (ulong)hotplug_slot, (ulong) value); - - if (flag) - ibmphp_lock_operations(); - - if (hotplug_slot && value) { - pslot = hotplug_slot->private; - if (pslot) { - memcpy(&myslot, pslot, sizeof(struct slot)); - rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS, - &(myslot.status)); - - if (!(SLOT_LATCH (myslot.status)) && - (SLOT_PRESENT (myslot.status))) { - rc = ibmphp_hpc_readslot(pslot, - READ_EXTSLOTSTATUS, - &(myslot.ext_status)); - if (!rc) - *value = SLOT_SPEED(myslot.ext_status); - } else - *value = MAX_ADAPTER_NONE; - } - } - - if (flag) - ibmphp_unlock_operations(); - - debug("get_max_adapter_speed_1 - Exit rc[%d] value[%x]\n", rc, *value); - return rc; -} - -static int get_bus_name(struct hotplug_slot *hotplug_slot, char *value) -{ - int rc = -ENODEV; - struct slot *pslot = NULL; - - debug("get_bus_name - Entry hotplug_slot[%lx]\n", (ulong)hotplug_slot); - - ibmphp_lock_operations(); - - if (hotplug_slot) { - pslot = hotplug_slot->private; - if (pslot) { - rc = 0; - snprintf(value, 100, "Bus %x", pslot->bus); - } - } else - rc = -ENODEV; - - ibmphp_unlock_operations(); - debug("get_bus_name - Exit rc[%d] value[%x]\n", rc, *value); - return rc; -} -*/ - /**************************************************************************** * This routine will initialize the ops data structure used in the validate * function. It will also power off empty slots that are powered on since BIOS @@ -1231,9 +1160,6 @@ const struct hotplug_slot_ops ibmphp_hotplug_slot_ops = { .get_attention_status = get_attention_status, .get_latch_status = get_latch_status, .get_adapter_status = get_adapter_present, -/* .get_max_adapter_speed = get_max_adapter_speed, - .get_bus_name_status = get_bus_name, -*/ }; static void ibmphp_unload(void) From 085a9f43433f30cbe8a1ade62d9d7827c3217f4d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 17 Dec 2021 15:17:09 +0100 Subject: [PATCH 3/3] PCI: pciehp: Use down_read/write_nested(reset_lock) to fix lockdep errors Use down_read_nested() and down_write_nested() when taking the ctrl->reset_lock rw-sem, passing the number of PCIe hotplug controllers in the path to the PCI root bus as lock subclass parameter. This fixes the following false-positive lockdep report when unplugging a Lenovo X1C8 from a Lenovo 2nd gen TB3 dock: pcieport 0000:06:01.0: pciehp: Slot(1): Link Down pcieport 0000:06:01.0: pciehp: Slot(1): Card not present ============================================ WARNING: possible recursive locking detected 5.16.0-rc2+ #621 Not tainted -------------------------------------------- irq/124-pciehp/86 is trying to acquire lock: ffff8e5ac4299ef8 (&ctrl->reset_lock){.+.+}-{3:3}, at: pciehp_check_presence+0x23/0x80 but task is already holding lock: ffff8e5ac4298af8 (&ctrl->reset_lock){.+.+}-{3:3}, at: pciehp_ist+0xf3/0x180 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&ctrl->reset_lock); lock(&ctrl->reset_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by irq/124-pciehp/86: #0: ffff8e5ac4298af8 (&ctrl->reset_lock){.+.+}-{3:3}, at: pciehp_ist+0xf3/0x180 #1: ffffffffa3b024e8 (pci_rescan_remove_lock){+.+.}-{3:3}, at: pciehp_unconfigure_device+0x31/0x110 #2: ffff8e5ac1ee2248 (&dev->mutex){....}-{3:3}, at: device_release_driver+0x1c/0x40 stack backtrace: CPU: 4 PID: 86 Comm: irq/124-pciehp Not tainted 5.16.0-rc2+ #621 Hardware name: LENOVO 20U90SIT19/20U90SIT19, BIOS N2WET30W (1.20 ) 08/26/2021 Call Trace: dump_stack_lvl+0x59/0x73 __lock_acquire.cold+0xc5/0x2c6 lock_acquire+0xb5/0x2b0 down_read+0x3e/0x50 pciehp_check_presence+0x23/0x80 pciehp_runtime_resume+0x5c/0xa0 device_for_each_child+0x45/0x70 pcie_port_device_runtime_resume+0x20/0x30 pci_pm_runtime_resume+0xa7/0xc0 __rpm_callback+0x41/0x110 rpm_callback+0x59/0x70 rpm_resume+0x512/0x7b0 __pm_runtime_resume+0x4a/0x90 __device_release_driver+0x28/0x240 device_release_driver+0x26/0x40 pci_stop_bus_device+0x68/0x90 pci_stop_bus_device+0x2c/0x90 pci_stop_and_remove_bus_device+0xe/0x20 pciehp_unconfigure_device+0x6c/0x110 pciehp_disable_slot+0x5b/0xe0 pciehp_handle_presence_or_link_change+0xc3/0x2f0 pciehp_ist+0x179/0x180 This lockdep warning is triggered because with Thunderbolt, hotplug ports are nested. When removing multiple devices in a daisy-chain, each hotplug port's reset_lock may be acquired recursively. It's never the same lock, so the lockdep splat is a false positive. Because locks at the same hierarchy level are never acquired recursively, a per-level lockdep class is sufficient to fix the lockdep warning. The choice to use one lockdep subclass per pcie-hotplug controller in the path to the root-bus was made to conserve class keys because their number is limited and the complexity grows quadratically with number of keys according to Documentation/locking/lockdep-design.rst. Link: https://lore.kernel.org/linux-pci/20190402021933.GA2966@mit.edu/ Link: https://lore.kernel.org/linux-pci/de684a28-9038-8fc6-27ca-3f6f2f6400d7@redhat.com/ Link: https://lore.kernel.org/r/20211217141709.379663-1-hdegoede@redhat.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=208855 Reported-by: "Theodore Ts'o" Signed-off-by: Hans de Goede Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Cc: stable@vger.kernel.org --- drivers/pci/hotplug/pciehp.h | 3 +++ drivers/pci/hotplug/pciehp_core.c | 2 +- drivers/pci/hotplug/pciehp_hpc.c | 21 ++++++++++++++++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 918dccbc74b6..e0a614acee05 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -75,6 +75,8 @@ extern int pciehp_poll_time; * @reset_lock: prevents access to the Data Link Layer Link Active bit in the * Link Status register and to the Presence Detect State bit in the Slot * Status register during a slot reset which may cause them to flap + * @depth: Number of additional hotplug ports in the path to the root bus, + * used as lock subclass for @reset_lock * @ist_running: flag to keep user request waiting while IRQ thread is running * @request_result: result of last user request submitted to the IRQ thread * @requester: wait queue to wake up on completion of user request, @@ -106,6 +108,7 @@ struct controller { struct hotplug_slot hotplug_slot; /* hotplug core interface */ struct rw_semaphore reset_lock; + unsigned int depth; unsigned int ist_running; int request_result; wait_queue_head_t requester; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index f34114d45259..4042d87d539d 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -166,7 +166,7 @@ static void pciehp_check_presence(struct controller *ctrl) { int occupied; - down_read(&ctrl->reset_lock); + down_read_nested(&ctrl->reset_lock, ctrl->depth); mutex_lock(&ctrl->state_lock); occupied = pciehp_card_present_or_link_active(ctrl); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 9535c61cbff3..1d3108e6c128 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -583,7 +583,7 @@ static void pciehp_ignore_dpc_link_change(struct controller *ctrl, * the corresponding link change may have been ignored above. * Synthesize it to ensure that it is acted on. */ - down_read(&ctrl->reset_lock); + down_read_nested(&ctrl->reset_lock, ctrl->depth); if (!pciehp_check_link_active(ctrl)) pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); up_read(&ctrl->reset_lock); @@ -747,7 +747,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) * Disable requests have higher priority than Presence Detect Changed * or Data Link Layer State Changed events. */ - down_read(&ctrl->reset_lock); + down_read_nested(&ctrl->reset_lock, ctrl->depth); if (events & DISABLE_SLOT) pciehp_handle_disable_request(ctrl); else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) @@ -907,7 +907,7 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) if (probe) return 0; - down_write(&ctrl->reset_lock); + down_write_nested(&ctrl->reset_lock, ctrl->depth); if (!ATTN_BUTTN(ctrl)) { ctrl_mask |= PCI_EXP_SLTCTL_PDCE; @@ -963,6 +963,20 @@ static inline void dbg_ctrl(struct controller *ctrl) #define FLAG(x, y) (((x) & (y)) ? '+' : '-') +static inline int pcie_hotplug_depth(struct pci_dev *dev) +{ + struct pci_bus *bus = dev->bus; + int depth = 0; + + while (bus->parent) { + bus = bus->parent; + if (bus->self && bus->self->is_hotplug_bridge) + depth++; + } + + return depth; +} + struct controller *pcie_init(struct pcie_device *dev) { struct controller *ctrl; @@ -976,6 +990,7 @@ struct controller *pcie_init(struct pcie_device *dev) return NULL; ctrl->pcie = dev; + ctrl->depth = pcie_hotplug_depth(dev->port); pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap); if (pdev->hotplug_user_indicators)