2018-01-26 23:22:04 +03:00
|
|
|
// SPDX-License-Identifier: GPL-2.0+
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* PCI Express Hot Plug Controller Driver
|
|
|
|
*
|
|
|
|
* Copyright (C) 1995,2001 Compaq Computer Corporation
|
|
|
|
* Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
|
|
|
|
* Copyright (C) 2001 IBM Corp.
|
|
|
|
* Copyright (C) 2003-2004 Intel Corporation
|
|
|
|
*
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
2005-08-17 02:16:10 +04:00
|
|
|
* Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
|
2005-04-17 02:20:36 +04:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2019-05-08 02:24:51 +03:00
|
|
|
#define dev_fmt(fmt) "pciehp: " fmt
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/types.h>
|
2018-07-20 01:27:56 +03:00
|
|
|
#include <linux/pm_runtime.h>
|
2005-04-17 02:20:36 +04:00
|
|
|
#include <linux/pci.h>
|
|
|
|
#include "pciehp.h"
|
|
|
|
|
2007-08-10 03:09:36 +04:00
|
|
|
/* The following routines constitute the bulk of the
|
2005-04-17 02:20:36 +04:00
|
|
|
hotplug controller logic
|
|
|
|
*/
|
|
|
|
|
PCI: pciehp: Differentiate between surprise and safe removal
When removing PCI devices below a hotplug bridge, pciehp marks them as
disconnected if the card is no longer present in the slot or it quiesces
them if the card is still present (by disabling INTx interrupts, bus
mastering and SERR# reporting).
To detect whether the card is still present, pciehp checks the Presence
Detect State bit in the Slot Status register. The problem with this
approach is that even if the card is present, the link to it may be
down, and it that case it would be better to mark the devices as
disconnected instead of trying to quiesce them. Moreover, if the card
in the slot was quickly replaced by another one, the Presence Detect
State bit would be set, yet trying to quiesce the new card's devices
would be wrong and the correct thing to do is to mark the previous
card's devices as disconnected.
Instead of looking at the Presence Detect State bit, it is better to
differentiate whether the card was surprise removed versus safely
removed (via sysfs or an Attention Button press). On surprise removal,
the devices should be marked as disconnected, whereas on safe removal it
is correct to quiesce the devices.
The knowledge whether a surprise removal or a safe removal is at hand
does exist further up in the call stack: A surprise removal is
initiated by pciehp_handle_presence_or_link_change(), a safe removal by
pciehp_handle_disable_request().
Pass that information down to pciehp_unconfigure_device() and use it in
lieu of the Presence Detect State bit. While there, add kernel-doc to
pciehp_unconfigure_device() and pciehp_configure_device().
Tested-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Keith Busch <keith.busch@intel.com>
2018-07-31 08:50:37 +03:00
|
|
|
#define SAFE_REMOVAL true
|
|
|
|
#define SURPRISE_REMOVAL false
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
static void set_slot_off(struct controller *ctrl)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
2019-09-03 14:10:19 +03:00
|
|
|
/*
|
|
|
|
* Turn off slot, turn on attention indicator, turn off power
|
|
|
|
* indicator
|
|
|
|
*/
|
2008-04-26 01:39:06 +04:00
|
|
|
if (POWER_CTRL(ctrl)) {
|
2018-09-18 22:46:17 +03:00
|
|
|
pciehp_power_off_slot(ctrl);
|
2013-12-15 00:06:16 +04:00
|
|
|
|
2008-10-24 09:26:35 +04:00
|
|
|
/*
|
|
|
|
* After turning power off, we must wait for at least 1 second
|
|
|
|
* before taking any action that relies on power having been
|
|
|
|
* removed from the slot/adapter.
|
|
|
|
*/
|
|
|
|
msleep(1000);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2019-09-03 14:10:19 +03:00
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
|
|
|
|
PCI_EXP_SLTCTL_ATTN_IND_ON);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* board_added - Called after a board has been added to the system.
|
2018-09-18 22:46:17 +03:00
|
|
|
* @ctrl: PCIe hotplug controller where board is added
|
2005-04-17 02:20:36 +04:00
|
|
|
*
|
2007-11-28 20:04:30 +03:00
|
|
|
* Turns power on for the board.
|
|
|
|
* Configures board.
|
2005-04-17 02:20:36 +04:00
|
|
|
*/
|
2018-09-18 22:46:17 +03:00
|
|
|
static int board_added(struct controller *ctrl)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
2006-12-22 04:01:09 +03:00
|
|
|
int retval = 0;
|
2009-09-15 12:30:14 +04:00
|
|
|
struct pci_bus *parent = ctrl->pcie->port->subordinate;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2008-04-26 01:39:06 +04:00
|
|
|
if (POWER_CTRL(ctrl)) {
|
2005-04-17 02:20:36 +04:00
|
|
|
/* Power on slot */
|
2018-09-18 22:46:17 +03:00
|
|
|
retval = pciehp_power_on_slot(ctrl);
|
2006-12-22 04:01:09 +03:00
|
|
|
if (retval)
|
|
|
|
return retval;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
2007-08-10 03:09:36 +04:00
|
|
|
|
2019-09-03 14:10:21 +03:00
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
|
|
|
|
INDICATOR_NOOP);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2006-12-22 04:01:09 +03:00
|
|
|
/* Check link training status */
|
2009-09-15 12:30:48 +04:00
|
|
|
retval = pciehp_check_link_status(ctrl);
|
PCI: pciehp: Reduce noisiness on hot removal
When a PCIe card is hot-removed, the Presence Detect State and Data Link
Layer Link Active bits often do not clear simultaneously. I've seen delays
of up to 244 msec between the two events with Thunderbolt.
After pciehp has brought down the slot in response to the first event, the
other bit may still be set. It's not discernible whether it's set because
a new card is already in the slot or if it will soon clear. So pciehp
tries to bring up the slot and in the latter case fails with a bunch of
messages, some of them at KERN_ERR severity. If the slot is no longer
occupied, the messages are false positives and annoy users.
Stuart Hayes reports the following splat on hot removal:
KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Link Up
KERN_INFO pcieport 0000:3c:06.0: pciehp: Timeout waiting for Presence Detect
KERN_ERR pcieport 0000:3c:06.0: pciehp: link training error: status 0x0001
KERN_ERR pcieport 0000:3c:06.0: pciehp: Failed to check link status
Dongdong Liu complains about a similar splat:
KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Link Down
KERN_INFO iommu: Removing device 0000:87:00.0 from group 12
KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Card present
KERN_INFO pcieport 0000:80:10.0: Data Link Layer Link Active not set in 1000 msec
KERN_ERR pciehp 0000:80:10.0:pcie004: Failed to check link status
Users are particularly irritated to see a bringup attempt even though the
slot was explicitly brought down via sysfs. In a perfect world, we could
avoid this by setting Link Disable on slot bringdown and re-enabling it
upon a Presence Detect State change. In reality however, there are broken
hotplug ports which hardwire Presence Detect to zero, see 80696f991424
("PCI: pciehp: Tolerate Presence Detect hardwired to zero"). Conversely,
PCIe r1.0 hotplug ports hardwire Link Active to zero because Link Active
Reporting wasn't specified before PCIe r1.1. On unplug, some ports first
clear Presence then Link (see Stuart Hayes' splat) whereas others use the
inverse order (see Dongdong Liu's splat). To top it off, there are hotplug
ports which flap the Presence and Link bits on slot bringup, see
6c35a1ac3da6 ("PCI: pciehp: Tolerate initially unstable link").
pciehp is designed to work with all of these variants. Surplus attempts at
slot bringup are a lesser evil than not being able to bring up slots at
all. Although we could try to perfect the behavior for specific hotplug
controllers, we'd risk breaking others or increasing code complexity.
But we can certainly minimize annoyance by emitting only a single message
with KERN_INFO severity if bringup is unsuccessful:
* Drop the "Timeout waiting for Presence Detect" message in
pcie_wait_for_presence(). The sole caller of that function,
pciehp_check_link_status(), ignores the timeout and carries on. It emits
error messages of its own and I don't think this particular message adds
much value.
* There's a single error condition in pciehp_check_link_status() which
does not emit a message. Adding one allows dropping the "Failed to check
link status" message emitted by board_added() if
pciehp_check_link_status() returns a non-zero integer.
* Tone down all messages in pciehp_check_link_status() to KERN_INFO
severity and rephrase them to look as innocuous as possible. To this
end, move the message emitted by pcie_wait_for_link_delay() to its
callers.
As a result, Stuart Hayes' splat becomes:
KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Link Up
KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Cannot train link: status 0x0001
Dongdong Liu's splat becomes:
KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Card present
KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): No link
The messages now merely serve as information that presence or link bits
were set a little longer than expected. Bringup failures which are not
false positives are still reported, albeit no longer at KERN_ERR severity.
Link: https://lore.kernel.org/linux-pci/20200310182100.102987-1-stuart.w.hayes@gmail.com/
Link: https://lore.kernel.org/linux-pci/1547649064-19019-1-git-send-email-liudongdong3@huawei.com/
Link: https://lore.kernel.org/r/b45e46fd8a6aa6930aaac9d7718c2e4b787a4e5e.1595935071.git.lukas@wunner.de
Reported-by: Stuart Hayes <stuart.w.hayes@gmail.com>
Reported-by: Dongdong Liu <liudongdong3@huawei.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2020-09-18 00:13:20 +03:00
|
|
|
if (retval)
|
2009-11-13 09:14:10 +03:00
|
|
|
goto err_exit;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
/* Check for a power fault */
|
2018-09-18 22:46:17 +03:00
|
|
|
if (ctrl->power_fault_detected || pciehp_query_power_fault(ctrl)) {
|
|
|
|
ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
|
2009-09-15 12:33:30 +04:00
|
|
|
retval = -EIO;
|
2005-11-01 03:20:06 +03:00
|
|
|
goto err_exit;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
retval = pciehp_configure_device(ctrl);
|
2006-12-22 04:01:09 +03:00
|
|
|
if (retval) {
|
2017-10-13 21:35:46 +03:00
|
|
|
if (retval != -EEXIST) {
|
|
|
|
ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n",
|
|
|
|
pci_domain_nr(parent), parent->number);
|
2014-02-12 04:36:51 +04:00
|
|
|
goto err_exit;
|
2017-10-13 21:35:46 +03:00
|
|
|
}
|
2005-11-01 03:20:06 +03:00
|
|
|
}
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2019-09-03 14:10:19 +03:00
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
|
|
|
|
PCI_EXP_SLTCTL_ATTN_IND_OFF);
|
2005-04-17 02:20:36 +04:00
|
|
|
return 0;
|
2005-11-01 03:20:06 +03:00
|
|
|
|
|
|
|
err_exit:
|
2018-09-18 22:46:17 +03:00
|
|
|
set_slot_off(ctrl);
|
2006-12-22 04:01:09 +03:00
|
|
|
return retval;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2019-09-05 23:52:24 +03:00
|
|
|
* remove_board - Turn off slot and Power Indicator
|
2018-09-18 22:46:17 +03:00
|
|
|
* @ctrl: PCIe hotplug controller where board is being removed
|
PCI: pciehp: Differentiate between surprise and safe removal
When removing PCI devices below a hotplug bridge, pciehp marks them as
disconnected if the card is no longer present in the slot or it quiesces
them if the card is still present (by disabling INTx interrupts, bus
mastering and SERR# reporting).
To detect whether the card is still present, pciehp checks the Presence
Detect State bit in the Slot Status register. The problem with this
approach is that even if the card is present, the link to it may be
down, and it that case it would be better to mark the devices as
disconnected instead of trying to quiesce them. Moreover, if the card
in the slot was quickly replaced by another one, the Presence Detect
State bit would be set, yet trying to quiesce the new card's devices
would be wrong and the correct thing to do is to mark the previous
card's devices as disconnected.
Instead of looking at the Presence Detect State bit, it is better to
differentiate whether the card was surprise removed versus safely
removed (via sysfs or an Attention Button press). On surprise removal,
the devices should be marked as disconnected, whereas on safe removal it
is correct to quiesce the devices.
The knowledge whether a surprise removal or a safe removal is at hand
does exist further up in the call stack: A surprise removal is
initiated by pciehp_handle_presence_or_link_change(), a safe removal by
pciehp_handle_disable_request().
Pass that information down to pciehp_unconfigure_device() and use it in
lieu of the Presence Detect State bit. While there, add kernel-doc to
pciehp_unconfigure_device() and pciehp_configure_device().
Tested-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Keith Busch <keith.busch@intel.com>
2018-07-31 08:50:37 +03:00
|
|
|
* @safe_removal: whether the board is safely removed (versus surprise removed)
|
2005-04-17 02:20:36 +04:00
|
|
|
*/
|
2018-09-18 22:46:17 +03:00
|
|
|
static void remove_board(struct controller *ctrl, bool safe_removal)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
2018-09-18 22:46:17 +03:00
|
|
|
pciehp_unconfigure_device(ctrl, safe_removal);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2008-04-26 01:39:06 +04:00
|
|
|
if (POWER_CTRL(ctrl)) {
|
2018-09-18 22:46:17 +03:00
|
|
|
pciehp_power_off_slot(ctrl);
|
2013-12-15 00:06:16 +04:00
|
|
|
|
2008-10-24 09:26:35 +04:00
|
|
|
/*
|
|
|
|
* After turning power off, we must wait for at least 1 second
|
|
|
|
* before taking any action that relies on power having been
|
|
|
|
* removed from the slot/adapter.
|
|
|
|
*/
|
|
|
|
msleep(1000);
|
2019-03-12 15:05:48 +03:00
|
|
|
|
|
|
|
/* Ignore link or presence changes caused by power off */
|
|
|
|
atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC),
|
|
|
|
&ctrl->pending_events);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2019-09-03 14:10:21 +03:00
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
|
|
|
|
INDICATOR_NOOP);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
static int pciehp_enable_slot(struct controller *ctrl);
|
|
|
|
static int pciehp_disable_slot(struct controller *ctrl, bool safe_removal);
|
2018-07-20 01:27:47 +03:00
|
|
|
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
void pciehp_request(struct controller *ctrl, int action)
|
|
|
|
{
|
|
|
|
atomic_or(action, &ctrl->pending_events);
|
|
|
|
if (!pciehp_poll_mode)
|
|
|
|
irq_wake_thread(ctrl->pcie->irq, ctrl);
|
|
|
|
}
|
|
|
|
|
2015-10-12 22:10:12 +03:00
|
|
|
void pciehp_queue_pushbutton_work(struct work_struct *work)
|
|
|
|
{
|
2018-09-18 22:46:17 +03:00
|
|
|
struct controller *ctrl = container_of(work, struct controller,
|
2018-09-08 10:59:01 +03:00
|
|
|
button_work.work);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_lock(&ctrl->state_lock);
|
2018-09-18 22:46:17 +03:00
|
|
|
switch (ctrl->state) {
|
2007-03-07 02:02:26 +03:00
|
|
|
case BLINKINGOFF_STATE:
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
pciehp_request(ctrl, DISABLE_SLOT);
|
|
|
|
break;
|
2007-03-07 02:02:26 +03:00
|
|
|
case BLINKINGON_STATE:
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
|
|
|
|
break;
|
2007-03-07 02:02:26 +03:00
|
|
|
default:
|
2015-10-12 22:10:12 +03:00
|
|
|
break;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
void pciehp_handle_button_press(struct controller *ctrl)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_lock(&ctrl->state_lock);
|
2018-09-18 22:46:17 +03:00
|
|
|
switch (ctrl->state) {
|
2018-07-20 01:27:45 +03:00
|
|
|
case OFF_STATE:
|
|
|
|
case ON_STATE:
|
2018-09-18 22:46:17 +03:00
|
|
|
if (ctrl->state == ON_STATE) {
|
|
|
|
ctrl->state = BLINKINGOFF_STATE;
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s): Powering off due to button press\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
2007-03-07 02:02:26 +03:00
|
|
|
} else {
|
2018-09-18 22:46:17 +03:00
|
|
|
ctrl->state = BLINKINGON_STATE;
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s) Powering on due to button press\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
2007-03-07 02:02:26 +03:00
|
|
|
}
|
2019-09-03 14:10:19 +03:00
|
|
|
/* blink power indicator and turn off attention */
|
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
|
|
|
|
PCI_EXP_SLTCTL_ATTN_IND_OFF);
|
2018-09-08 10:59:01 +03:00
|
|
|
schedule_delayed_work(&ctrl->button_work, 5 * HZ);
|
2007-03-07 02:02:26 +03:00
|
|
|
break;
|
|
|
|
case BLINKINGOFF_STATE:
|
|
|
|
case BLINKINGON_STATE:
|
|
|
|
/*
|
|
|
|
* Cancel if we are still blinking; this means that we
|
|
|
|
* press the attention again before the 5 sec. limit
|
|
|
|
* expires to cancel hot-add or hot-remove
|
|
|
|
*/
|
2018-09-18 22:46:17 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s): Button cancel\n", slot_name(ctrl));
|
2018-09-08 10:59:01 +03:00
|
|
|
cancel_delayed_work(&ctrl->button_work);
|
2018-09-18 22:46:17 +03:00
|
|
|
if (ctrl->state == BLINKINGOFF_STATE) {
|
|
|
|
ctrl->state = ON_STATE;
|
2019-09-03 14:10:19 +03:00
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
|
|
|
|
PCI_EXP_SLTCTL_ATTN_IND_OFF);
|
2018-07-20 01:27:45 +03:00
|
|
|
} else {
|
2018-09-18 22:46:17 +03:00
|
|
|
ctrl->state = OFF_STATE;
|
2019-09-03 14:10:19 +03:00
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
|
|
|
|
PCI_EXP_SLTCTL_ATTN_IND_OFF);
|
2018-07-20 01:27:45 +03:00
|
|
|
}
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s): Action canceled due to button press\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
2007-03-07 02:02:26 +03:00
|
|
|
break;
|
|
|
|
default:
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_err(ctrl, "Slot(%s): Ignoring invalid state %#x\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl), ctrl->state);
|
2007-03-07 02:02:26 +03:00
|
|
|
break;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
void pciehp_handle_disable_request(struct controller *ctrl)
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
{
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_lock(&ctrl->state_lock);
|
2018-09-18 22:46:17 +03:00
|
|
|
switch (ctrl->state) {
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
case BLINKINGON_STATE:
|
|
|
|
case BLINKINGOFF_STATE:
|
2018-09-08 10:59:01 +03:00
|
|
|
cancel_delayed_work(&ctrl->button_work);
|
2018-07-28 08:22:00 +03:00
|
|
|
break;
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
}
|
2018-09-18 22:46:17 +03:00
|
|
|
ctrl->state = POWEROFF_STATE;
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
ctrl->request_result = pciehp_disable_slot(ctrl, SAFE_REMOVAL);
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
|
2014-02-05 06:29:10 +04:00
|
|
|
{
|
2019-10-29 20:00:22 +03:00
|
|
|
int present, link_active;
|
2018-07-20 01:27:41 +03:00
|
|
|
|
PCI: pciehp: Become resilient to missed events
A hotplug port's Slot Status register does not count how often each type
of event occurred, it only records the fact *that* an event has occurred.
Previously pciehp queued a work item for each event. But if it missed
an event, e.g. removal of a card in-between two back-to-back insertions,
it queued up the wrong work item or no work item at all. Commit
fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking
for new ones") sought to improve the situation by shrinking the window
during which events may be missed.
But Stefan Roese reports unbalanced Card present and Link Up events,
suggesting that we're still missing events if they occur very rapidly.
Bjorn Helgaas responds that he considers pciehp's event handling
"baroque" and calls for its simplification and rationalization:
https://lkml.kernel.org/r/20180202192045.GA53759@bhelgaas-glaptop.roam.corp.google.com
It gets worse once a hotplug port is runtime suspended: The port can
signal an interrupt while it and its parents are in D3hot, i.e. while
it is inaccessible. By the time we've runtime resumed all parents to D0
and read the port's Slot Status register, we may have missed an arbitrary
number of events. Event handling therefore needs to be reworked to
become resilient to missed events.
Assume that a Presence Detect Changed event has occurred.
Consider the following truth table:
- Slot is in OFF_STATE and is currently empty. => Do nothing.
(The event is trailing a Link Down or we've
missed an insertion and subsequent removal.)
- Slot is in OFF_STATE and is currently occupied. => Turn the slot on.
- Slot is in ON_STATE and is currently empty. => Turn the slot off.
- Slot is in ON_STATE and is currently occupied. => Turn the slot off,
(Be cautious and assume the card in then back on.
the slot isn't the same as before.)
This leads to the following simple algorithm:
1 If the slot is in ON_STATE, turn it off unconditionally.
2 If the slot is currently occupied, turn it on.
Because those actions are now carried out synchronously, rather than by
scheduled work items, pciehp reacts to the *current* situation and
missed events no longer matter.
Data Link Layer State Changed events can be handled identically to
Presence Detect Changed events. Note that in the above truth table,
a Link Up trailing a Card present event didn't have to be accounted for:
It is filtered out by pciehp_check_link_status().
As for Attention Button Pressed events, PCIe r4.0, sec 6.7.1.5 says:
"Once the Power Indicator begins blinking, a 5-second abort interval
exists during which a second depression of the Attention Button cancels
the operation." In other words, the user can only expect the system to
react to a button press after it starts blinking. Missed button presses
that occur in-between are irrelevant.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
2018-07-20 01:27:49 +03:00
|
|
|
/*
|
|
|
|
* If the slot is on and presence or link has changed, turn it off.
|
|
|
|
* Even if it's occupied again, we cannot assume the card is the same.
|
|
|
|
*/
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_lock(&ctrl->state_lock);
|
2018-09-18 22:46:17 +03:00
|
|
|
switch (ctrl->state) {
|
2014-02-05 06:29:10 +04:00
|
|
|
case BLINKINGOFF_STATE:
|
2018-09-08 10:59:01 +03:00
|
|
|
cancel_delayed_work(&ctrl->button_work);
|
2020-08-24 01:36:59 +03:00
|
|
|
fallthrough;
|
2018-07-20 01:27:45 +03:00
|
|
|
case ON_STATE:
|
2018-09-18 22:46:17 +03:00
|
|
|
ctrl->state = POWEROFF_STATE;
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
PCI: pciehp: Become resilient to missed events
A hotplug port's Slot Status register does not count how often each type
of event occurred, it only records the fact *that* an event has occurred.
Previously pciehp queued a work item for each event. But if it missed
an event, e.g. removal of a card in-between two back-to-back insertions,
it queued up the wrong work item or no work item at all. Commit
fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking
for new ones") sought to improve the situation by shrinking the window
during which events may be missed.
But Stefan Roese reports unbalanced Card present and Link Up events,
suggesting that we're still missing events if they occur very rapidly.
Bjorn Helgaas responds that he considers pciehp's event handling
"baroque" and calls for its simplification and rationalization:
https://lkml.kernel.org/r/20180202192045.GA53759@bhelgaas-glaptop.roam.corp.google.com
It gets worse once a hotplug port is runtime suspended: The port can
signal an interrupt while it and its parents are in D3hot, i.e. while
it is inaccessible. By the time we've runtime resumed all parents to D0
and read the port's Slot Status register, we may have missed an arbitrary
number of events. Event handling therefore needs to be reworked to
become resilient to missed events.
Assume that a Presence Detect Changed event has occurred.
Consider the following truth table:
- Slot is in OFF_STATE and is currently empty. => Do nothing.
(The event is trailing a Link Down or we've
missed an insertion and subsequent removal.)
- Slot is in OFF_STATE and is currently occupied. => Turn the slot on.
- Slot is in ON_STATE and is currently empty. => Turn the slot off.
- Slot is in ON_STATE and is currently occupied. => Turn the slot off,
(Be cautious and assume the card in then back on.
the slot isn't the same as before.)
This leads to the following simple algorithm:
1 If the slot is in ON_STATE, turn it off unconditionally.
2 If the slot is currently occupied, turn it on.
Because those actions are now carried out synchronously, rather than by
scheduled work items, pciehp reacts to the *current* situation and
missed events no longer matter.
Data Link Layer State Changed events can be handled identically to
Presence Detect Changed events. Note that in the above truth table,
a Link Up trailing a Card present event didn't have to be accounted for:
It is filtered out by pciehp_check_link_status().
As for Attention Button Pressed events, PCIe r4.0, sec 6.7.1.5 says:
"Once the Power Indicator begins blinking, a 5-second abort interval
exists during which a second depression of the Attention Button cancels
the operation." In other words, the user can only expect the system to
react to a button press after it starts blinking. Missed button presses
that occur in-between are irrelevant.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
2018-07-20 01:27:49 +03:00
|
|
|
if (events & PCI_EXP_SLTSTA_DLLSC)
|
|
|
|
ctrl_info(ctrl, "Slot(%s): Link Down\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
PCI: pciehp: Become resilient to missed events
A hotplug port's Slot Status register does not count how often each type
of event occurred, it only records the fact *that* an event has occurred.
Previously pciehp queued a work item for each event. But if it missed
an event, e.g. removal of a card in-between two back-to-back insertions,
it queued up the wrong work item or no work item at all. Commit
fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking
for new ones") sought to improve the situation by shrinking the window
during which events may be missed.
But Stefan Roese reports unbalanced Card present and Link Up events,
suggesting that we're still missing events if they occur very rapidly.
Bjorn Helgaas responds that he considers pciehp's event handling
"baroque" and calls for its simplification and rationalization:
https://lkml.kernel.org/r/20180202192045.GA53759@bhelgaas-glaptop.roam.corp.google.com
It gets worse once a hotplug port is runtime suspended: The port can
signal an interrupt while it and its parents are in D3hot, i.e. while
it is inaccessible. By the time we've runtime resumed all parents to D0
and read the port's Slot Status register, we may have missed an arbitrary
number of events. Event handling therefore needs to be reworked to
become resilient to missed events.
Assume that a Presence Detect Changed event has occurred.
Consider the following truth table:
- Slot is in OFF_STATE and is currently empty. => Do nothing.
(The event is trailing a Link Down or we've
missed an insertion and subsequent removal.)
- Slot is in OFF_STATE and is currently occupied. => Turn the slot on.
- Slot is in ON_STATE and is currently empty. => Turn the slot off.
- Slot is in ON_STATE and is currently occupied. => Turn the slot off,
(Be cautious and assume the card in then back on.
the slot isn't the same as before.)
This leads to the following simple algorithm:
1 If the slot is in ON_STATE, turn it off unconditionally.
2 If the slot is currently occupied, turn it on.
Because those actions are now carried out synchronously, rather than by
scheduled work items, pciehp reacts to the *current* situation and
missed events no longer matter.
Data Link Layer State Changed events can be handled identically to
Presence Detect Changed events. Note that in the above truth table,
a Link Up trailing a Card present event didn't have to be accounted for:
It is filtered out by pciehp_check_link_status().
As for Attention Button Pressed events, PCIe r4.0, sec 6.7.1.5 says:
"Once the Power Indicator begins blinking, a 5-second abort interval
exists during which a second depression of the Attention Button cancels
the operation." In other words, the user can only expect the system to
react to a button press after it starts blinking. Missed button presses
that occur in-between are irrelevant.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
2018-07-20 01:27:49 +03:00
|
|
|
if (events & PCI_EXP_SLTSTA_PDC)
|
|
|
|
ctrl_info(ctrl, "Slot(%s): Card not present\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
|
|
|
pciehp_disable_slot(ctrl, SURPRISE_REMOVAL);
|
2014-02-05 06:29:10 +04:00
|
|
|
break;
|
|
|
|
default:
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
2018-07-28 08:22:00 +03:00
|
|
|
break;
|
2014-02-05 06:29:10 +04:00
|
|
|
}
|
2018-07-20 01:27:41 +03:00
|
|
|
|
PCI: pciehp: Become resilient to missed events
A hotplug port's Slot Status register does not count how often each type
of event occurred, it only records the fact *that* an event has occurred.
Previously pciehp queued a work item for each event. But if it missed
an event, e.g. removal of a card in-between two back-to-back insertions,
it queued up the wrong work item or no work item at all. Commit
fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking
for new ones") sought to improve the situation by shrinking the window
during which events may be missed.
But Stefan Roese reports unbalanced Card present and Link Up events,
suggesting that we're still missing events if they occur very rapidly.
Bjorn Helgaas responds that he considers pciehp's event handling
"baroque" and calls for its simplification and rationalization:
https://lkml.kernel.org/r/20180202192045.GA53759@bhelgaas-glaptop.roam.corp.google.com
It gets worse once a hotplug port is runtime suspended: The port can
signal an interrupt while it and its parents are in D3hot, i.e. while
it is inaccessible. By the time we've runtime resumed all parents to D0
and read the port's Slot Status register, we may have missed an arbitrary
number of events. Event handling therefore needs to be reworked to
become resilient to missed events.
Assume that a Presence Detect Changed event has occurred.
Consider the following truth table:
- Slot is in OFF_STATE and is currently empty. => Do nothing.
(The event is trailing a Link Down or we've
missed an insertion and subsequent removal.)
- Slot is in OFF_STATE and is currently occupied. => Turn the slot on.
- Slot is in ON_STATE and is currently empty. => Turn the slot off.
- Slot is in ON_STATE and is currently occupied. => Turn the slot off,
(Be cautious and assume the card in then back on.
the slot isn't the same as before.)
This leads to the following simple algorithm:
1 If the slot is in ON_STATE, turn it off unconditionally.
2 If the slot is currently occupied, turn it on.
Because those actions are now carried out synchronously, rather than by
scheduled work items, pciehp reacts to the *current* situation and
missed events no longer matter.
Data Link Layer State Changed events can be handled identically to
Presence Detect Changed events. Note that in the above truth table,
a Link Up trailing a Card present event didn't have to be accounted for:
It is filtered out by pciehp_check_link_status().
As for Attention Button Pressed events, PCIe r4.0, sec 6.7.1.5 says:
"Once the Power Indicator begins blinking, a 5-second abort interval
exists during which a second depression of the Attention Button cancels
the operation." In other words, the user can only expect the system to
react to a button press after it starts blinking. Missed button presses
that occur in-between are irrelevant.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
2018-07-20 01:27:49 +03:00
|
|
|
/* Turn the slot on if it's occupied or link is up */
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_lock(&ctrl->state_lock);
|
2018-09-08 10:59:01 +03:00
|
|
|
present = pciehp_card_present(ctrl);
|
PCI: pciehp: Become resilient to missed events
A hotplug port's Slot Status register does not count how often each type
of event occurred, it only records the fact *that* an event has occurred.
Previously pciehp queued a work item for each event. But if it missed
an event, e.g. removal of a card in-between two back-to-back insertions,
it queued up the wrong work item or no work item at all. Commit
fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking
for new ones") sought to improve the situation by shrinking the window
during which events may be missed.
But Stefan Roese reports unbalanced Card present and Link Up events,
suggesting that we're still missing events if they occur very rapidly.
Bjorn Helgaas responds that he considers pciehp's event handling
"baroque" and calls for its simplification and rationalization:
https://lkml.kernel.org/r/20180202192045.GA53759@bhelgaas-glaptop.roam.corp.google.com
It gets worse once a hotplug port is runtime suspended: The port can
signal an interrupt while it and its parents are in D3hot, i.e. while
it is inaccessible. By the time we've runtime resumed all parents to D0
and read the port's Slot Status register, we may have missed an arbitrary
number of events. Event handling therefore needs to be reworked to
become resilient to missed events.
Assume that a Presence Detect Changed event has occurred.
Consider the following truth table:
- Slot is in OFF_STATE and is currently empty. => Do nothing.
(The event is trailing a Link Down or we've
missed an insertion and subsequent removal.)
- Slot is in OFF_STATE and is currently occupied. => Turn the slot on.
- Slot is in ON_STATE and is currently empty. => Turn the slot off.
- Slot is in ON_STATE and is currently occupied. => Turn the slot off,
(Be cautious and assume the card in then back on.
the slot isn't the same as before.)
This leads to the following simple algorithm:
1 If the slot is in ON_STATE, turn it off unconditionally.
2 If the slot is currently occupied, turn it on.
Because those actions are now carried out synchronously, rather than by
scheduled work items, pciehp reacts to the *current* situation and
missed events no longer matter.
Data Link Layer State Changed events can be handled identically to
Presence Detect Changed events. Note that in the above truth table,
a Link Up trailing a Card present event didn't have to be accounted for:
It is filtered out by pciehp_check_link_status().
As for Attention Button Pressed events, PCIe r4.0, sec 6.7.1.5 says:
"Once the Power Indicator begins blinking, a 5-second abort interval
exists during which a second depression of the Attention Button cancels
the operation." In other words, the user can only expect the system to
react to a button press after it starts blinking. Missed button presses
that occur in-between are irrelevant.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
2018-07-20 01:27:49 +03:00
|
|
|
link_active = pciehp_check_link_active(ctrl);
|
2019-10-29 20:00:22 +03:00
|
|
|
if (present <= 0 && link_active <= 0) {
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
PCI: pciehp: Become resilient to missed events
A hotplug port's Slot Status register does not count how often each type
of event occurred, it only records the fact *that* an event has occurred.
Previously pciehp queued a work item for each event. But if it missed
an event, e.g. removal of a card in-between two back-to-back insertions,
it queued up the wrong work item or no work item at all. Commit
fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking
for new ones") sought to improve the situation by shrinking the window
during which events may be missed.
But Stefan Roese reports unbalanced Card present and Link Up events,
suggesting that we're still missing events if they occur very rapidly.
Bjorn Helgaas responds that he considers pciehp's event handling
"baroque" and calls for its simplification and rationalization:
https://lkml.kernel.org/r/20180202192045.GA53759@bhelgaas-glaptop.roam.corp.google.com
It gets worse once a hotplug port is runtime suspended: The port can
signal an interrupt while it and its parents are in D3hot, i.e. while
it is inaccessible. By the time we've runtime resumed all parents to D0
and read the port's Slot Status register, we may have missed an arbitrary
number of events. Event handling therefore needs to be reworked to
become resilient to missed events.
Assume that a Presence Detect Changed event has occurred.
Consider the following truth table:
- Slot is in OFF_STATE and is currently empty. => Do nothing.
(The event is trailing a Link Down or we've
missed an insertion and subsequent removal.)
- Slot is in OFF_STATE and is currently occupied. => Turn the slot on.
- Slot is in ON_STATE and is currently empty. => Turn the slot off.
- Slot is in ON_STATE and is currently occupied. => Turn the slot off,
(Be cautious and assume the card in then back on.
the slot isn't the same as before.)
This leads to the following simple algorithm:
1 If the slot is in ON_STATE, turn it off unconditionally.
2 If the slot is currently occupied, turn it on.
Because those actions are now carried out synchronously, rather than by
scheduled work items, pciehp reacts to the *current* situation and
missed events no longer matter.
Data Link Layer State Changed events can be handled identically to
Presence Detect Changed events. Note that in the above truth table,
a Link Up trailing a Card present event didn't have to be accounted for:
It is filtered out by pciehp_check_link_status().
As for Attention Button Pressed events, PCIe r4.0, sec 6.7.1.5 says:
"Once the Power Indicator begins blinking, a 5-second abort interval
exists during which a second depression of the Attention Button cancels
the operation." In other words, the user can only expect the system to
react to a button press after it starts blinking. Missed button presses
that occur in-between are irrelevant.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
2018-07-20 01:27:49 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
switch (ctrl->state) {
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
case BLINKINGON_STATE:
|
2018-09-08 10:59:01 +03:00
|
|
|
cancel_delayed_work(&ctrl->button_work);
|
2020-08-24 01:36:59 +03:00
|
|
|
fallthrough;
|
PCI: pciehp: Become resilient to missed events
A hotplug port's Slot Status register does not count how often each type
of event occurred, it only records the fact *that* an event has occurred.
Previously pciehp queued a work item for each event. But if it missed
an event, e.g. removal of a card in-between two back-to-back insertions,
it queued up the wrong work item or no work item at all. Commit
fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking
for new ones") sought to improve the situation by shrinking the window
during which events may be missed.
But Stefan Roese reports unbalanced Card present and Link Up events,
suggesting that we're still missing events if they occur very rapidly.
Bjorn Helgaas responds that he considers pciehp's event handling
"baroque" and calls for its simplification and rationalization:
https://lkml.kernel.org/r/20180202192045.GA53759@bhelgaas-glaptop.roam.corp.google.com
It gets worse once a hotplug port is runtime suspended: The port can
signal an interrupt while it and its parents are in D3hot, i.e. while
it is inaccessible. By the time we've runtime resumed all parents to D0
and read the port's Slot Status register, we may have missed an arbitrary
number of events. Event handling therefore needs to be reworked to
become resilient to missed events.
Assume that a Presence Detect Changed event has occurred.
Consider the following truth table:
- Slot is in OFF_STATE and is currently empty. => Do nothing.
(The event is trailing a Link Down or we've
missed an insertion and subsequent removal.)
- Slot is in OFF_STATE and is currently occupied. => Turn the slot on.
- Slot is in ON_STATE and is currently empty. => Turn the slot off.
- Slot is in ON_STATE and is currently occupied. => Turn the slot off,
(Be cautious and assume the card in then back on.
the slot isn't the same as before.)
This leads to the following simple algorithm:
1 If the slot is in ON_STATE, turn it off unconditionally.
2 If the slot is currently occupied, turn it on.
Because those actions are now carried out synchronously, rather than by
scheduled work items, pciehp reacts to the *current* situation and
missed events no longer matter.
Data Link Layer State Changed events can be handled identically to
Presence Detect Changed events. Note that in the above truth table,
a Link Up trailing a Card present event didn't have to be accounted for:
It is filtered out by pciehp_check_link_status().
As for Attention Button Pressed events, PCIe r4.0, sec 6.7.1.5 says:
"Once the Power Indicator begins blinking, a 5-second abort interval
exists during which a second depression of the Attention Button cancels
the operation." In other words, the user can only expect the system to
react to a button press after it starts blinking. Missed button presses
that occur in-between are irrelevant.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
2018-07-20 01:27:49 +03:00
|
|
|
case OFF_STATE:
|
2018-09-18 22:46:17 +03:00
|
|
|
ctrl->state = POWERON_STATE;
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
PCI: pciehp: Become resilient to missed events
A hotplug port's Slot Status register does not count how often each type
of event occurred, it only records the fact *that* an event has occurred.
Previously pciehp queued a work item for each event. But if it missed
an event, e.g. removal of a card in-between two back-to-back insertions,
it queued up the wrong work item or no work item at all. Commit
fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking
for new ones") sought to improve the situation by shrinking the window
during which events may be missed.
But Stefan Roese reports unbalanced Card present and Link Up events,
suggesting that we're still missing events if they occur very rapidly.
Bjorn Helgaas responds that he considers pciehp's event handling
"baroque" and calls for its simplification and rationalization:
https://lkml.kernel.org/r/20180202192045.GA53759@bhelgaas-glaptop.roam.corp.google.com
It gets worse once a hotplug port is runtime suspended: The port can
signal an interrupt while it and its parents are in D3hot, i.e. while
it is inaccessible. By the time we've runtime resumed all parents to D0
and read the port's Slot Status register, we may have missed an arbitrary
number of events. Event handling therefore needs to be reworked to
become resilient to missed events.
Assume that a Presence Detect Changed event has occurred.
Consider the following truth table:
- Slot is in OFF_STATE and is currently empty. => Do nothing.
(The event is trailing a Link Down or we've
missed an insertion and subsequent removal.)
- Slot is in OFF_STATE and is currently occupied. => Turn the slot on.
- Slot is in ON_STATE and is currently empty. => Turn the slot off.
- Slot is in ON_STATE and is currently occupied. => Turn the slot off,
(Be cautious and assume the card in then back on.
the slot isn't the same as before.)
This leads to the following simple algorithm:
1 If the slot is in ON_STATE, turn it off unconditionally.
2 If the slot is currently occupied, turn it on.
Because those actions are now carried out synchronously, rather than by
scheduled work items, pciehp reacts to the *current* situation and
missed events no longer matter.
Data Link Layer State Changed events can be handled identically to
Presence Detect Changed events. Note that in the above truth table,
a Link Up trailing a Card present event didn't have to be accounted for:
It is filtered out by pciehp_check_link_status().
As for Attention Button Pressed events, PCIe r4.0, sec 6.7.1.5 says:
"Once the Power Indicator begins blinking, a 5-second abort interval
exists during which a second depression of the Attention Button cancels
the operation." In other words, the user can only expect the system to
react to a button press after it starts blinking. Missed button presses
that occur in-between are irrelevant.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
2018-07-20 01:27:49 +03:00
|
|
|
if (present)
|
|
|
|
ctrl_info(ctrl, "Slot(%s): Card present\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
PCI: pciehp: Become resilient to missed events
A hotplug port's Slot Status register does not count how often each type
of event occurred, it only records the fact *that* an event has occurred.
Previously pciehp queued a work item for each event. But if it missed
an event, e.g. removal of a card in-between two back-to-back insertions,
it queued up the wrong work item or no work item at all. Commit
fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking
for new ones") sought to improve the situation by shrinking the window
during which events may be missed.
But Stefan Roese reports unbalanced Card present and Link Up events,
suggesting that we're still missing events if they occur very rapidly.
Bjorn Helgaas responds that he considers pciehp's event handling
"baroque" and calls for its simplification and rationalization:
https://lkml.kernel.org/r/20180202192045.GA53759@bhelgaas-glaptop.roam.corp.google.com
It gets worse once a hotplug port is runtime suspended: The port can
signal an interrupt while it and its parents are in D3hot, i.e. while
it is inaccessible. By the time we've runtime resumed all parents to D0
and read the port's Slot Status register, we may have missed an arbitrary
number of events. Event handling therefore needs to be reworked to
become resilient to missed events.
Assume that a Presence Detect Changed event has occurred.
Consider the following truth table:
- Slot is in OFF_STATE and is currently empty. => Do nothing.
(The event is trailing a Link Down or we've
missed an insertion and subsequent removal.)
- Slot is in OFF_STATE and is currently occupied. => Turn the slot on.
- Slot is in ON_STATE and is currently empty. => Turn the slot off.
- Slot is in ON_STATE and is currently occupied. => Turn the slot off,
(Be cautious and assume the card in then back on.
the slot isn't the same as before.)
This leads to the following simple algorithm:
1 If the slot is in ON_STATE, turn it off unconditionally.
2 If the slot is currently occupied, turn it on.
Because those actions are now carried out synchronously, rather than by
scheduled work items, pciehp reacts to the *current* situation and
missed events no longer matter.
Data Link Layer State Changed events can be handled identically to
Presence Detect Changed events. Note that in the above truth table,
a Link Up trailing a Card present event didn't have to be accounted for:
It is filtered out by pciehp_check_link_status().
As for Attention Button Pressed events, PCIe r4.0, sec 6.7.1.5 says:
"Once the Power Indicator begins blinking, a 5-second abort interval
exists during which a second depression of the Attention Button cancels
the operation." In other words, the user can only expect the system to
react to a button press after it starts blinking. Missed button presses
that occur in-between are irrelevant.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
2018-07-20 01:27:49 +03:00
|
|
|
if (link_active)
|
|
|
|
ctrl_info(ctrl, "Slot(%s): Link Up\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
|
|
|
ctrl->request_result = pciehp_enable_slot(ctrl);
|
PCI: pciehp: Become resilient to missed events
A hotplug port's Slot Status register does not count how often each type
of event occurred, it only records the fact *that* an event has occurred.
Previously pciehp queued a work item for each event. But if it missed
an event, e.g. removal of a card in-between two back-to-back insertions,
it queued up the wrong work item or no work item at all. Commit
fad214b0aa72 ("PCI: pciehp: Process all hotplug events before looking
for new ones") sought to improve the situation by shrinking the window
during which events may be missed.
But Stefan Roese reports unbalanced Card present and Link Up events,
suggesting that we're still missing events if they occur very rapidly.
Bjorn Helgaas responds that he considers pciehp's event handling
"baroque" and calls for its simplification and rationalization:
https://lkml.kernel.org/r/20180202192045.GA53759@bhelgaas-glaptop.roam.corp.google.com
It gets worse once a hotplug port is runtime suspended: The port can
signal an interrupt while it and its parents are in D3hot, i.e. while
it is inaccessible. By the time we've runtime resumed all parents to D0
and read the port's Slot Status register, we may have missed an arbitrary
number of events. Event handling therefore needs to be reworked to
become resilient to missed events.
Assume that a Presence Detect Changed event has occurred.
Consider the following truth table:
- Slot is in OFF_STATE and is currently empty. => Do nothing.
(The event is trailing a Link Down or we've
missed an insertion and subsequent removal.)
- Slot is in OFF_STATE and is currently occupied. => Turn the slot on.
- Slot is in ON_STATE and is currently empty. => Turn the slot off.
- Slot is in ON_STATE and is currently occupied. => Turn the slot off,
(Be cautious and assume the card in then back on.
the slot isn't the same as before.)
This leads to the following simple algorithm:
1 If the slot is in ON_STATE, turn it off unconditionally.
2 If the slot is currently occupied, turn it on.
Because those actions are now carried out synchronously, rather than by
scheduled work items, pciehp reacts to the *current* situation and
missed events no longer matter.
Data Link Layer State Changed events can be handled identically to
Presence Detect Changed events. Note that in the above truth table,
a Link Up trailing a Card present event didn't have to be accounted for:
It is filtered out by pciehp_check_link_status().
As for Attention Button Pressed events, PCIe r4.0, sec 6.7.1.5 says:
"Once the Power Indicator begins blinking, a 5-second abort interval
exists during which a second depression of the Attention Button cancels
the operation." In other words, the user can only expect the system to
react to a button press after it starts blinking. Missed button presses
that occur in-between are irrelevant.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Mayurkumar Patel <mayurkumar.patel@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
2018-07-20 01:27:49 +03:00
|
|
|
break;
|
|
|
|
default:
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
2018-07-28 08:22:00 +03:00
|
|
|
break;
|
2007-03-07 02:02:26 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
static int __pciehp_enable_slot(struct controller *ctrl)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
|
|
|
u8 getstatus = 0;
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
if (MRL_SENS(ctrl)) {
|
|
|
|
pciehp_get_latch_status(ctrl, &getstatus);
|
2013-12-15 00:06:16 +04:00
|
|
|
if (getstatus) {
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s): Latch open\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
2006-09-20 04:04:33 +04:00
|
|
|
return -ENODEV;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
}
|
2007-08-10 03:09:36 +04:00
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
if (POWER_CTRL(ctrl)) {
|
|
|
|
pciehp_get_power_status(ctrl, &getstatus);
|
2013-12-15 00:06:16 +04:00
|
|
|
if (getstatus) {
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s): Already enabled\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
2016-11-19 11:32:46 +03:00
|
|
|
return 0;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
return board_added(ctrl);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
static int pciehp_enable_slot(struct controller *ctrl)
|
2018-07-20 01:27:40 +03:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2018-07-20 01:27:56 +03:00
|
|
|
pm_runtime_get_sync(&ctrl->pcie->port->dev);
|
2018-09-18 22:46:17 +03:00
|
|
|
ret = __pciehp_enable_slot(ctrl);
|
2018-07-20 01:27:40 +03:00
|
|
|
if (ret && ATTN_BUTTN(ctrl))
|
2019-09-03 14:10:21 +03:00
|
|
|
/* may be blinking */
|
|
|
|
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
|
|
|
|
INDICATOR_NOOP);
|
2018-07-20 01:27:56 +03:00
|
|
|
pm_runtime_put(&ctrl->pcie->port->dev);
|
2018-07-20 01:27:40 +03:00
|
|
|
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_lock(&ctrl->state_lock);
|
2018-09-18 22:46:17 +03:00
|
|
|
ctrl->state = ret ? OFF_STATE : ON_STATE;
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
2018-07-20 01:27:40 +03:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
static int __pciehp_disable_slot(struct controller *ctrl, bool safe_removal)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
|
|
|
u8 getstatus = 0;
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
if (POWER_CTRL(ctrl)) {
|
|
|
|
pciehp_get_power_status(ctrl, &getstatus);
|
2013-12-15 00:06:16 +04:00
|
|
|
if (!getstatus) {
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s): Already disabled\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
2006-09-20 04:04:33 +04:00
|
|
|
return -EINVAL;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
remove_board(ctrl, safe_removal);
|
2018-07-20 01:27:36 +03:00
|
|
|
return 0;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2018-09-18 22:46:17 +03:00
|
|
|
static int pciehp_disable_slot(struct controller *ctrl, bool safe_removal)
|
2018-07-20 01:27:40 +03:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2018-07-20 01:27:56 +03:00
|
|
|
pm_runtime_get_sync(&ctrl->pcie->port->dev);
|
2018-09-18 22:46:17 +03:00
|
|
|
ret = __pciehp_disable_slot(ctrl, safe_removal);
|
2018-07-20 01:27:56 +03:00
|
|
|
pm_runtime_put(&ctrl->pcie->port->dev);
|
2018-07-20 01:27:40 +03:00
|
|
|
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_lock(&ctrl->state_lock);
|
2018-09-18 22:46:17 +03:00
|
|
|
ctrl->state = OFF_STATE;
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
2018-07-20 01:27:40 +03:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-08-19 17:29:00 +03:00
|
|
|
int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
|
2007-03-07 02:02:26 +03:00
|
|
|
{
|
2018-09-08 10:59:01 +03:00
|
|
|
struct controller *ctrl = to_ctrl(hotplug_slot);
|
2007-03-07 02:02:26 +03:00
|
|
|
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_lock(&ctrl->state_lock);
|
2018-09-18 22:46:17 +03:00
|
|
|
switch (ctrl->state) {
|
2007-03-07 02:02:26 +03:00
|
|
|
case BLINKINGON_STATE:
|
2018-07-20 01:27:45 +03:00
|
|
|
case OFF_STATE:
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
/*
|
|
|
|
* The IRQ thread becomes a no-op if the user pulls out the
|
|
|
|
* card before the thread wakes up, so initialize to -ENODEV.
|
|
|
|
*/
|
|
|
|
ctrl->request_result = -ENODEV;
|
|
|
|
pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
|
|
|
|
wait_event(ctrl->requester,
|
2019-08-09 13:28:43 +03:00
|
|
|
!atomic_read(&ctrl->pending_events) &&
|
|
|
|
!ctrl->ist_running);
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
return ctrl->request_result;
|
2007-03-07 02:02:26 +03:00
|
|
|
case POWERON_STATE:
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
2007-03-07 02:02:26 +03:00
|
|
|
break;
|
|
|
|
case BLINKINGOFF_STATE:
|
2018-07-20 01:27:45 +03:00
|
|
|
case ON_STATE:
|
2007-03-07 02:02:26 +03:00
|
|
|
case POWEROFF_STATE:
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s): Already enabled\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
2007-03-07 02:02:26 +03:00
|
|
|
break;
|
|
|
|
default:
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_err(ctrl, "Slot(%s): Invalid state %#x\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl), ctrl->state);
|
2007-03-07 02:02:26 +03:00
|
|
|
break;
|
|
|
|
}
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
2007-03-07 02:02:26 +03:00
|
|
|
|
2018-07-20 01:27:40 +03:00
|
|
|
return -ENODEV;
|
2007-03-07 02:02:26 +03:00
|
|
|
}
|
|
|
|
|
2018-08-19 17:29:00 +03:00
|
|
|
int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
|
2007-03-07 02:02:26 +03:00
|
|
|
{
|
2018-09-08 10:59:01 +03:00
|
|
|
struct controller *ctrl = to_ctrl(hotplug_slot);
|
2007-03-07 02:02:26 +03:00
|
|
|
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_lock(&ctrl->state_lock);
|
2018-09-18 22:46:17 +03:00
|
|
|
switch (ctrl->state) {
|
2007-03-07 02:02:26 +03:00
|
|
|
case BLINKINGOFF_STATE:
|
2018-07-20 01:27:45 +03:00
|
|
|
case ON_STATE:
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
pciehp_request(ctrl, DISABLE_SLOT);
|
|
|
|
wait_event(ctrl->requester,
|
2019-08-09 13:28:43 +03:00
|
|
|
!atomic_read(&ctrl->pending_events) &&
|
|
|
|
!ctrl->ist_running);
|
PCI: pciehp: Enable/disable exclusively from IRQ thread
Besides the IRQ thread, there are several other places in the driver
which enable or disable the slot:
- pciehp_probe() enables the slot if it's occupied and the pciehp_force
module parameter is used.
- pciehp_resume() enables or disables the slot after system sleep.
- pciehp_queue_pushbutton_work() enables or disables the slot after the
5 second delay following an Attention Button press.
- pciehp_sysfs_enable_slot() and pciehp_sysfs_disable_slot() enable or
disable the slot on sysfs write.
This requires locking and complicates pciehp's state machine.
A simplification can be achieved by enabling and disabling the slot
exclusively from the IRQ thread.
Amend the functions listed above to request slot enable/disablement from
the IRQ thread by either synthesizing a Presence Detect Changed event or,
in the case of a disable user request (via sysfs or an Attention Button
press), submitting a newly introduced force disable request. The latter
is needed because the slot shall be forced off despite being occupied.
For this force disable request, avoid colliding with Slot Status register
bits by using a bit number greater than 16.
For synchronous execution of requests (on sysfs write), wait for the
request to finish and retrieve the result. There can only ever be one
sysfs write in flight due to the locking in kernfs_fop_write(), hence
there is no risk of returning the result of a different sysfs request to
user space.
The POWERON_STATE and POWEROFF_STATE is now no longer entered by the
above-listed functions, but solely by the IRQ thread when it begins a
power transition. Afterwards, it moves to STATIC_STATE. The same
applies to canceling the Attention Button work, it likewise becomes an
IRQ thread only operation.
An immediate consequence is that the POWERON_STATE and POWEROFF_STATE is
never observed by the IRQ thread itself, only by functions called in a
different context, such as pciehp_sysfs_enable_slot(). So remove
handling of these states from pciehp_handle_button_press() and
pciehp_handle_link_change() which are exclusively called from the IRQ
thread.
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2018-07-20 01:27:46 +03:00
|
|
|
return ctrl->request_result;
|
2007-03-07 02:02:26 +03:00
|
|
|
case POWEROFF_STATE:
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
2007-03-07 02:02:26 +03:00
|
|
|
break;
|
|
|
|
case BLINKINGON_STATE:
|
2018-07-20 01:27:45 +03:00
|
|
|
case OFF_STATE:
|
2007-03-07 02:02:26 +03:00
|
|
|
case POWERON_STATE:
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_info(ctrl, "Slot(%s): Already disabled\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl));
|
2007-03-07 02:02:26 +03:00
|
|
|
break;
|
|
|
|
default:
|
2016-09-08 23:19:58 +03:00
|
|
|
ctrl_err(ctrl, "Slot(%s): Invalid state %#x\n",
|
2018-09-18 22:46:17 +03:00
|
|
|
slot_name(ctrl), ctrl->state);
|
2007-03-07 02:02:26 +03:00
|
|
|
break;
|
|
|
|
}
|
2018-09-08 10:59:01 +03:00
|
|
|
mutex_unlock(&ctrl->state_lock);
|
2007-03-07 02:02:26 +03:00
|
|
|
|
2018-07-20 01:27:40 +03:00
|
|
|
return -ENODEV;
|
2007-03-07 02:02:26 +03:00
|
|
|
}
|