Merge 4.15-rc6 into char-misc-next
We want the fixes in here as well. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Коммит
b6a09416e8
|
@ -109,6 +109,7 @@ parameter is applicable::
|
||||||
IPV6 IPv6 support is enabled.
|
IPV6 IPv6 support is enabled.
|
||||||
ISAPNP ISA PnP code is enabled.
|
ISAPNP ISA PnP code is enabled.
|
||||||
ISDN Appropriate ISDN support is enabled.
|
ISDN Appropriate ISDN support is enabled.
|
||||||
|
ISOL CPU Isolation is enabled.
|
||||||
JOY Appropriate joystick support is enabled.
|
JOY Appropriate joystick support is enabled.
|
||||||
KGDB Kernel debugger support is enabled.
|
KGDB Kernel debugger support is enabled.
|
||||||
KVM Kernel Virtual Machine support is enabled.
|
KVM Kernel Virtual Machine support is enabled.
|
||||||
|
|
|
@ -328,11 +328,15 @@
|
||||||
not play well with APC CPU idle - disable it if you have
|
not play well with APC CPU idle - disable it if you have
|
||||||
APC and your system crashes randomly.
|
APC and your system crashes randomly.
|
||||||
|
|
||||||
apic= [APIC,X86-32] Advanced Programmable Interrupt Controller
|
apic= [APIC,X86] Advanced Programmable Interrupt Controller
|
||||||
Change the output verbosity whilst booting
|
Change the output verbosity whilst booting
|
||||||
Format: { quiet (default) | verbose | debug }
|
Format: { quiet (default) | verbose | debug }
|
||||||
Change the amount of debugging information output
|
Change the amount of debugging information output
|
||||||
when initialising the APIC and IO-APIC components.
|
when initialising the APIC and IO-APIC components.
|
||||||
|
For X86-32, this can also be used to specify an APIC
|
||||||
|
driver name.
|
||||||
|
Format: apic=driver_name
|
||||||
|
Examples: apic=bigsmp
|
||||||
|
|
||||||
apic_extnmi= [APIC,X86] External NMI delivery setting
|
apic_extnmi= [APIC,X86] External NMI delivery setting
|
||||||
Format: { bsp (default) | all | none }
|
Format: { bsp (default) | all | none }
|
||||||
|
@ -1737,7 +1741,7 @@
|
||||||
isapnp= [ISAPNP]
|
isapnp= [ISAPNP]
|
||||||
Format: <RDP>,<reset>,<pci_scan>,<verbosity>
|
Format: <RDP>,<reset>,<pci_scan>,<verbosity>
|
||||||
|
|
||||||
isolcpus= [KNL,SMP] Isolate a given set of CPUs from disturbance.
|
isolcpus= [KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance.
|
||||||
[Deprecated - use cpusets instead]
|
[Deprecated - use cpusets instead]
|
||||||
Format: [flag-list,]<cpu-list>
|
Format: [flag-list,]<cpu-list>
|
||||||
|
|
||||||
|
@ -2662,7 +2666,7 @@
|
||||||
Valid arguments: on, off
|
Valid arguments: on, off
|
||||||
Default: on
|
Default: on
|
||||||
|
|
||||||
nohz_full= [KNL,BOOT]
|
nohz_full= [KNL,BOOT,SMP,ISOL]
|
||||||
The argument is a cpu list, as described above.
|
The argument is a cpu list, as described above.
|
||||||
In kernels built with CONFIG_NO_HZ_FULL=y, set
|
In kernels built with CONFIG_NO_HZ_FULL=y, set
|
||||||
the specified list of CPUs whose tick will be stopped
|
the specified list of CPUs whose tick will be stopped
|
||||||
|
@ -2708,6 +2712,8 @@
|
||||||
steal time is computed, but won't influence scheduler
|
steal time is computed, but won't influence scheduler
|
||||||
behaviour
|
behaviour
|
||||||
|
|
||||||
|
nopti [X86-64] Disable kernel page table isolation
|
||||||
|
|
||||||
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
||||||
|
|
||||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||||
|
@ -3282,6 +3288,12 @@
|
||||||
pt. [PARIDE]
|
pt. [PARIDE]
|
||||||
See Documentation/blockdev/paride.txt.
|
See Documentation/blockdev/paride.txt.
|
||||||
|
|
||||||
|
pti= [X86_64]
|
||||||
|
Control user/kernel address space isolation:
|
||||||
|
on - enable
|
||||||
|
off - disable
|
||||||
|
auto - default setting
|
||||||
|
|
||||||
pty.legacy_count=
|
pty.legacy_count=
|
||||||
[KNL] Number of legacy pty's. Overwrites compiled-in
|
[KNL] Number of legacy pty's. Overwrites compiled-in
|
||||||
default number.
|
default number.
|
||||||
|
|
|
@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with
|
||||||
a sysfs attribute called "force_power".
|
a sysfs attribute called "force_power".
|
||||||
|
|
||||||
For example the intel-wmi-thunderbolt driver exposes this attribute in:
|
For example the intel-wmi-thunderbolt driver exposes this attribute in:
|
||||||
/sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
|
/sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
|
||||||
|
|
||||||
To force the power to on, write 1 to this attribute file.
|
To force the power to on, write 1 to this attribute file.
|
||||||
To disable force power, write 0 to this attribute file.
|
To disable force power, write 0 to this attribute file.
|
||||||
|
|
|
@ -75,3 +75,4 @@ stable kernels.
|
||||||
| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
|
| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
|
||||||
| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
|
| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
|
||||||
| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
|
| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
|
||||||
|
| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 |
|
||||||
|
|
|
@ -898,6 +898,13 @@ controller implements weight and absolute bandwidth limit models for
|
||||||
normal scheduling policy and absolute bandwidth allocation model for
|
normal scheduling policy and absolute bandwidth allocation model for
|
||||||
realtime scheduling policy.
|
realtime scheduling policy.
|
||||||
|
|
||||||
|
WARNING: cgroup2 doesn't yet support control of realtime processes and
|
||||||
|
the cpu controller can only be enabled when all RT processes are in
|
||||||
|
the root cgroup. Be aware that system management software may already
|
||||||
|
have placed RT processes into nonroot cgroups during the system boot
|
||||||
|
process, and these processes may need to be moved to the root cgroup
|
||||||
|
before the cpu controller can be enabled.
|
||||||
|
|
||||||
|
|
||||||
CPU Interface Files
|
CPU Interface Files
|
||||||
~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
|
@ -13,7 +13,6 @@ Required properties:
|
||||||
at25df321a
|
at25df321a
|
||||||
at25df641
|
at25df641
|
||||||
at26df081a
|
at26df081a
|
||||||
en25s64
|
|
||||||
mr25h128
|
mr25h128
|
||||||
mr25h256
|
mr25h256
|
||||||
mr25h10
|
mr25h10
|
||||||
|
@ -33,7 +32,6 @@ Required properties:
|
||||||
s25fl008k
|
s25fl008k
|
||||||
s25fl064k
|
s25fl064k
|
||||||
sst25vf040b
|
sst25vf040b
|
||||||
sst25wf040b
|
|
||||||
m25p40
|
m25p40
|
||||||
m25p80
|
m25p80
|
||||||
m25p16
|
m25p16
|
||||||
|
|
|
@ -73,7 +73,7 @@ Example:
|
||||||
compatible = "dlg,da7218";
|
compatible = "dlg,da7218";
|
||||||
reg = <0x1a>;
|
reg = <0x1a>;
|
||||||
interrupt-parent = <&gpio6>;
|
interrupt-parent = <&gpio6>;
|
||||||
interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
|
interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
|
||||||
wakeup-source;
|
wakeup-source;
|
||||||
|
|
||||||
VDD-supply = <®_audio>;
|
VDD-supply = <®_audio>;
|
||||||
|
|
|
@ -77,7 +77,7 @@ Example:
|
||||||
reg = <0x1a>;
|
reg = <0x1a>;
|
||||||
|
|
||||||
interrupt-parent = <&gpio6>;
|
interrupt-parent = <&gpio6>;
|
||||||
interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
|
interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
|
||||||
|
|
||||||
VDD-supply = <®_audio>;
|
VDD-supply = <®_audio>;
|
||||||
VDDMIC-supply = <®_audio>;
|
VDDMIC-supply = <®_audio>;
|
||||||
|
|
|
@ -12,24 +12,30 @@ Required properties:
|
||||||
- "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
|
- "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
|
||||||
- reg : Offset and length of the register set for the device
|
- reg : Offset and length of the register set for the device
|
||||||
- interrupts : Should contain CSPI/eCSPI interrupt
|
- interrupts : Should contain CSPI/eCSPI interrupt
|
||||||
- cs-gpios : Specifies the gpio pins to be used for chipselects.
|
|
||||||
- clocks : Clock specifiers for both ipg and per clocks.
|
- clocks : Clock specifiers for both ipg and per clocks.
|
||||||
- clock-names : Clock names should include both "ipg" and "per"
|
- clock-names : Clock names should include both "ipg" and "per"
|
||||||
See the clock consumer binding,
|
See the clock consumer binding,
|
||||||
Documentation/devicetree/bindings/clock/clock-bindings.txt
|
Documentation/devicetree/bindings/clock/clock-bindings.txt
|
||||||
- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
|
|
||||||
Documentation/devicetree/bindings/dma/dma.txt
|
|
||||||
- dma-names: DMA request names should include "tx" and "rx" if present.
|
|
||||||
|
|
||||||
Obsolete properties:
|
Recommended properties:
|
||||||
- fsl,spi-num-chipselects : Contains the number of the chipselect
|
- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt. While the native chip
|
||||||
|
select lines can be used, they appear to always generate a pulse between each
|
||||||
|
word of a transfer. Most use cases will require GPIO based chip selects to
|
||||||
|
generate a valid transaction.
|
||||||
|
|
||||||
Optional properties:
|
Optional properties:
|
||||||
|
- num-cs : Number of total chip selects, see spi-bus.txt.
|
||||||
|
- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
|
||||||
|
Documentation/devicetree/bindings/dma/dma.txt.
|
||||||
|
- dma-names: DMA request names, if present, should include "tx" and "rx".
|
||||||
- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
|
- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
|
||||||
controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
|
controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
|
||||||
the SPI_READY mode-flag needs to be set too.
|
the SPI_READY mode-flag needs to be set too.
|
||||||
Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
|
Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
|
||||||
|
|
||||||
|
Obsolete properties:
|
||||||
|
- fsl,spi-num-chipselects : Contains the number of the chipselect
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
ecspi@70010000 {
|
ecspi@70010000 {
|
||||||
|
|
|
@ -156,6 +156,40 @@ handle it in two different ways:
|
||||||
root of the overlay. Finally the directory is moved to the new
|
root of the overlay. Finally the directory is moved to the new
|
||||||
location.
|
location.
|
||||||
|
|
||||||
|
There are several ways to tune the "redirect_dir" feature.
|
||||||
|
|
||||||
|
Kernel config options:
|
||||||
|
|
||||||
|
- OVERLAY_FS_REDIRECT_DIR:
|
||||||
|
If this is enabled, then redirect_dir is turned on by default.
|
||||||
|
- OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW:
|
||||||
|
If this is enabled, then redirects are always followed by default. Enabling
|
||||||
|
this results in a less secure configuration. Enable this option only when
|
||||||
|
worried about backward compatibility with kernels that have the redirect_dir
|
||||||
|
feature and follow redirects even if turned off.
|
||||||
|
|
||||||
|
Module options (can also be changed through /sys/module/overlay/parameters/*):
|
||||||
|
|
||||||
|
- "redirect_dir=BOOL":
|
||||||
|
See OVERLAY_FS_REDIRECT_DIR kernel config option above.
|
||||||
|
- "redirect_always_follow=BOOL":
|
||||||
|
See OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW kernel config option above.
|
||||||
|
- "redirect_max=NUM":
|
||||||
|
The maximum number of bytes in an absolute redirect (default is 256).
|
||||||
|
|
||||||
|
Mount options:
|
||||||
|
|
||||||
|
- "redirect_dir=on":
|
||||||
|
Redirects are enabled.
|
||||||
|
- "redirect_dir=follow":
|
||||||
|
Redirects are not created, but followed.
|
||||||
|
- "redirect_dir=off":
|
||||||
|
Redirects are not created and only followed if "redirect_always_follow"
|
||||||
|
feature is enabled in the kernel/module config.
|
||||||
|
- "redirect_dir=nofollow":
|
||||||
|
Redirects are not created and not followed (equivalent to "redirect_dir=off"
|
||||||
|
if "redirect_always_follow" feature is not enabled).
|
||||||
|
|
||||||
Non-directories
|
Non-directories
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
|
|
@ -1,874 +0,0 @@
|
||||||
Crossrelease
|
|
||||||
============
|
|
||||||
|
|
||||||
Started by Byungchul Park <byungchul.park@lge.com>
|
|
||||||
|
|
||||||
Contents:
|
|
||||||
|
|
||||||
(*) Background
|
|
||||||
|
|
||||||
- What causes deadlock
|
|
||||||
- How lockdep works
|
|
||||||
|
|
||||||
(*) Limitation
|
|
||||||
|
|
||||||
- Limit lockdep
|
|
||||||
- Pros from the limitation
|
|
||||||
- Cons from the limitation
|
|
||||||
- Relax the limitation
|
|
||||||
|
|
||||||
(*) Crossrelease
|
|
||||||
|
|
||||||
- Introduce crossrelease
|
|
||||||
- Introduce commit
|
|
||||||
|
|
||||||
(*) Implementation
|
|
||||||
|
|
||||||
- Data structures
|
|
||||||
- How crossrelease works
|
|
||||||
|
|
||||||
(*) Optimizations
|
|
||||||
|
|
||||||
- Avoid duplication
|
|
||||||
- Lockless for hot paths
|
|
||||||
|
|
||||||
(*) APPENDIX A: What lockdep does to work aggresively
|
|
||||||
|
|
||||||
(*) APPENDIX B: How to avoid adding false dependencies
|
|
||||||
|
|
||||||
|
|
||||||
==========
|
|
||||||
Background
|
|
||||||
==========
|
|
||||||
|
|
||||||
What causes deadlock
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
A deadlock occurs when a context is waiting for an event to happen,
|
|
||||||
which is impossible because another (or the) context who can trigger the
|
|
||||||
event is also waiting for another (or the) event to happen, which is
|
|
||||||
also impossible due to the same reason.
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
A context going to trigger event C is waiting for event A to happen.
|
|
||||||
A context going to trigger event A is waiting for event B to happen.
|
|
||||||
A context going to trigger event B is waiting for event C to happen.
|
|
||||||
|
|
||||||
A deadlock occurs when these three wait operations run at the same time,
|
|
||||||
because event C cannot be triggered if event A does not happen, which in
|
|
||||||
turn cannot be triggered if event B does not happen, which in turn
|
|
||||||
cannot be triggered if event C does not happen. After all, no event can
|
|
||||||
be triggered since any of them never meets its condition to wake up.
|
|
||||||
|
|
||||||
A dependency might exist between two waiters and a deadlock might happen
|
|
||||||
due to an incorrect releationship between dependencies. Thus, we must
|
|
||||||
define what a dependency is first. A dependency exists between them if:
|
|
||||||
|
|
||||||
1. There are two waiters waiting for each event at a given time.
|
|
||||||
2. The only way to wake up each waiter is to trigger its event.
|
|
||||||
3. Whether one can be woken up depends on whether the other can.
|
|
||||||
|
|
||||||
Each wait in the example creates its dependency like:
|
|
||||||
|
|
||||||
Event C depends on event A.
|
|
||||||
Event A depends on event B.
|
|
||||||
Event B depends on event C.
|
|
||||||
|
|
||||||
NOTE: Precisely speaking, a dependency is one between whether a
|
|
||||||
waiter for an event can be woken up and whether another waiter for
|
|
||||||
another event can be woken up. However from now on, we will describe
|
|
||||||
a dependency as if it's one between an event and another event for
|
|
||||||
simplicity.
|
|
||||||
|
|
||||||
And they form circular dependencies like:
|
|
||||||
|
|
||||||
-> C -> A -> B -
|
|
||||||
/ \
|
|
||||||
\ /
|
|
||||||
----------------
|
|
||||||
|
|
||||||
where 'A -> B' means that event A depends on event B.
|
|
||||||
|
|
||||||
Such circular dependencies lead to a deadlock since no waiter can meet
|
|
||||||
its condition to wake up as described.
|
|
||||||
|
|
||||||
CONCLUSION
|
|
||||||
|
|
||||||
Circular dependencies cause a deadlock.
|
|
||||||
|
|
||||||
|
|
||||||
How lockdep works
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
Lockdep tries to detect a deadlock by checking dependencies created by
|
|
||||||
lock operations, acquire and release. Waiting for a lock corresponds to
|
|
||||||
waiting for an event, and releasing a lock corresponds to triggering an
|
|
||||||
event in the previous section.
|
|
||||||
|
|
||||||
In short, lockdep does:
|
|
||||||
|
|
||||||
1. Detect a new dependency.
|
|
||||||
2. Add the dependency into a global graph.
|
|
||||||
3. Check if that makes dependencies circular.
|
|
||||||
4. Report a deadlock or its possibility if so.
|
|
||||||
|
|
||||||
For example, consider a graph built by lockdep that looks like:
|
|
||||||
|
|
||||||
A -> B -
|
|
||||||
\
|
|
||||||
-> E
|
|
||||||
/
|
|
||||||
C -> D -
|
|
||||||
|
|
||||||
where A, B,..., E are different lock classes.
|
|
||||||
|
|
||||||
Lockdep will add a dependency into the graph on detection of a new
|
|
||||||
dependency. For example, it will add a dependency 'E -> C' when a new
|
|
||||||
dependency between lock E and lock C is detected. Then the graph will be:
|
|
||||||
|
|
||||||
A -> B -
|
|
||||||
\
|
|
||||||
-> E -
|
|
||||||
/ \
|
|
||||||
-> C -> D - \
|
|
||||||
/ /
|
|
||||||
\ /
|
|
||||||
------------------
|
|
||||||
|
|
||||||
where A, B,..., E are different lock classes.
|
|
||||||
|
|
||||||
This graph contains a subgraph which demonstrates circular dependencies:
|
|
||||||
|
|
||||||
-> E -
|
|
||||||
/ \
|
|
||||||
-> C -> D - \
|
|
||||||
/ /
|
|
||||||
\ /
|
|
||||||
------------------
|
|
||||||
|
|
||||||
where C, D and E are different lock classes.
|
|
||||||
|
|
||||||
This is the condition under which a deadlock might occur. Lockdep
|
|
||||||
reports it on detection after adding a new dependency. This is the way
|
|
||||||
how lockdep works.
|
|
||||||
|
|
||||||
CONCLUSION
|
|
||||||
|
|
||||||
Lockdep detects a deadlock or its possibility by checking if circular
|
|
||||||
dependencies were created after adding each new dependency.
|
|
||||||
|
|
||||||
|
|
||||||
==========
|
|
||||||
Limitation
|
|
||||||
==========
|
|
||||||
|
|
||||||
Limit lockdep
|
|
||||||
-------------
|
|
||||||
|
|
||||||
Limiting lockdep to work on only typical locks e.g. spin locks and
|
|
||||||
mutexes, which are released within the acquire context, the
|
|
||||||
implementation becomes simple but its capacity for detection becomes
|
|
||||||
limited. Let's check pros and cons in next section.
|
|
||||||
|
|
||||||
|
|
||||||
Pros from the limitation
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
Given the limitation, when acquiring a lock, locks in a held_locks
|
|
||||||
cannot be released if the context cannot acquire it so has to wait to
|
|
||||||
acquire it, which means all waiters for the locks in the held_locks are
|
|
||||||
stuck. It's an exact case to create dependencies between each lock in
|
|
||||||
the held_locks and the lock to acquire.
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
CONTEXT X
|
|
||||||
---------
|
|
||||||
acquire A
|
|
||||||
acquire B /* Add a dependency 'A -> B' */
|
|
||||||
release B
|
|
||||||
release A
|
|
||||||
|
|
||||||
where A and B are different lock classes.
|
|
||||||
|
|
||||||
When acquiring lock A, the held_locks of CONTEXT X is empty thus no
|
|
||||||
dependency is added. But when acquiring lock B, lockdep detects and adds
|
|
||||||
a new dependency 'A -> B' between lock A in the held_locks and lock B.
|
|
||||||
They can be simply added whenever acquiring each lock.
|
|
||||||
|
|
||||||
And data required by lockdep exists in a local structure, held_locks
|
|
||||||
embedded in task_struct. Forcing to access the data within the context,
|
|
||||||
lockdep can avoid racy problems without explicit locks while handling
|
|
||||||
the local data.
|
|
||||||
|
|
||||||
Lastly, lockdep only needs to keep locks currently being held, to build
|
|
||||||
a dependency graph. However, relaxing the limitation, it needs to keep
|
|
||||||
even locks already released, because a decision whether they created
|
|
||||||
dependencies might be long-deferred.
|
|
||||||
|
|
||||||
To sum up, we can expect several advantages from the limitation:
|
|
||||||
|
|
||||||
1. Lockdep can easily identify a dependency when acquiring a lock.
|
|
||||||
2. Races are avoidable while accessing local locks in a held_locks.
|
|
||||||
3. Lockdep only needs to keep locks currently being held.
|
|
||||||
|
|
||||||
CONCLUSION
|
|
||||||
|
|
||||||
Given the limitation, the implementation becomes simple and efficient.
|
|
||||||
|
|
||||||
|
|
||||||
Cons from the limitation
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
Given the limitation, lockdep is applicable only to typical locks. For
|
|
||||||
example, page locks for page access or completions for synchronization
|
|
||||||
cannot work with lockdep.
|
|
||||||
|
|
||||||
Can we detect deadlocks below, under the limitation?
|
|
||||||
|
|
||||||
Example 1:
|
|
||||||
|
|
||||||
CONTEXT X CONTEXT Y CONTEXT Z
|
|
||||||
--------- --------- ----------
|
|
||||||
mutex_lock A
|
|
||||||
lock_page B
|
|
||||||
lock_page B
|
|
||||||
mutex_lock A /* DEADLOCK */
|
|
||||||
unlock_page B held by X
|
|
||||||
unlock_page B
|
|
||||||
mutex_unlock A
|
|
||||||
mutex_unlock A
|
|
||||||
|
|
||||||
where A and B are different lock classes.
|
|
||||||
|
|
||||||
No, we cannot.
|
|
||||||
|
|
||||||
Example 2:
|
|
||||||
|
|
||||||
CONTEXT X CONTEXT Y
|
|
||||||
--------- ---------
|
|
||||||
mutex_lock A
|
|
||||||
mutex_lock A
|
|
||||||
wait_for_complete B /* DEADLOCK */
|
|
||||||
complete B
|
|
||||||
mutex_unlock A
|
|
||||||
mutex_unlock A
|
|
||||||
|
|
||||||
where A is a lock class and B is a completion variable.
|
|
||||||
|
|
||||||
No, we cannot.
|
|
||||||
|
|
||||||
CONCLUSION
|
|
||||||
|
|
||||||
Given the limitation, lockdep cannot detect a deadlock or its
|
|
||||||
possibility caused by page locks or completions.
|
|
||||||
|
|
||||||
|
|
||||||
Relax the limitation
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
Under the limitation, things to create dependencies are limited to
|
|
||||||
typical locks. However, synchronization primitives like page locks and
|
|
||||||
completions, which are allowed to be released in any context, also
|
|
||||||
create dependencies and can cause a deadlock. So lockdep should track
|
|
||||||
these locks to do a better job. We have to relax the limitation for
|
|
||||||
these locks to work with lockdep.
|
|
||||||
|
|
||||||
Detecting dependencies is very important for lockdep to work because
|
|
||||||
adding a dependency means adding an opportunity to check whether it
|
|
||||||
causes a deadlock. The more lockdep adds dependencies, the more it
|
|
||||||
thoroughly works. Thus Lockdep has to do its best to detect and add as
|
|
||||||
many true dependencies into a graph as possible.
|
|
||||||
|
|
||||||
For example, considering only typical locks, lockdep builds a graph like:
|
|
||||||
|
|
||||||
A -> B -
|
|
||||||
\
|
|
||||||
-> E
|
|
||||||
/
|
|
||||||
C -> D -
|
|
||||||
|
|
||||||
where A, B,..., E are different lock classes.
|
|
||||||
|
|
||||||
On the other hand, under the relaxation, additional dependencies might
|
|
||||||
be created and added. Assuming additional 'FX -> C' and 'E -> GX' are
|
|
||||||
added thanks to the relaxation, the graph will be:
|
|
||||||
|
|
||||||
A -> B -
|
|
||||||
\
|
|
||||||
-> E -> GX
|
|
||||||
/
|
|
||||||
FX -> C -> D -
|
|
||||||
|
|
||||||
where A, B,..., E, FX and GX are different lock classes, and a suffix
|
|
||||||
'X' is added on non-typical locks.
|
|
||||||
|
|
||||||
The latter graph gives us more chances to check circular dependencies
|
|
||||||
than the former. However, it might suffer performance degradation since
|
|
||||||
relaxing the limitation, with which design and implementation of lockdep
|
|
||||||
can be efficient, might introduce inefficiency inevitably. So lockdep
|
|
||||||
should provide two options, strong detection and efficient detection.
|
|
||||||
|
|
||||||
Choosing efficient detection:
|
|
||||||
|
|
||||||
Lockdep works with only locks restricted to be released within the
|
|
||||||
acquire context. However, lockdep works efficiently.
|
|
||||||
|
|
||||||
Choosing strong detection:
|
|
||||||
|
|
||||||
Lockdep works with all synchronization primitives. However, lockdep
|
|
||||||
suffers performance degradation.
|
|
||||||
|
|
||||||
CONCLUSION
|
|
||||||
|
|
||||||
Relaxing the limitation, lockdep can add additional dependencies giving
|
|
||||||
additional opportunities to check circular dependencies.
|
|
||||||
|
|
||||||
|
|
||||||
============
|
|
||||||
Crossrelease
|
|
||||||
============
|
|
||||||
|
|
||||||
Introduce crossrelease
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
In order to allow lockdep to handle additional dependencies by what
|
|
||||||
might be released in any context, namely 'crosslock', we have to be able
|
|
||||||
to identify those created by crosslocks. The proposed 'crossrelease'
|
|
||||||
feature provoides a way to do that.
|
|
||||||
|
|
||||||
Crossrelease feature has to do:
|
|
||||||
|
|
||||||
1. Identify dependencies created by crosslocks.
|
|
||||||
2. Add the dependencies into a dependency graph.
|
|
||||||
|
|
||||||
That's all. Once a meaningful dependency is added into graph, then
|
|
||||||
lockdep would work with the graph as it did. The most important thing
|
|
||||||
crossrelease feature has to do is to correctly identify and add true
|
|
||||||
dependencies into the global graph.
|
|
||||||
|
|
||||||
A dependency e.g. 'A -> B' can be identified only in the A's release
|
|
||||||
context because a decision required to identify the dependency can be
|
|
||||||
made only in the release context. That is to decide whether A can be
|
|
||||||
released so that a waiter for A can be woken up. It cannot be made in
|
|
||||||
other than the A's release context.
|
|
||||||
|
|
||||||
It's no matter for typical locks because each acquire context is same as
|
|
||||||
its release context, thus lockdep can decide whether a lock can be
|
|
||||||
released in the acquire context. However for crosslocks, lockdep cannot
|
|
||||||
make the decision in the acquire context but has to wait until the
|
|
||||||
release context is identified.
|
|
||||||
|
|
||||||
Therefore, deadlocks by crosslocks cannot be detected just when it
|
|
||||||
happens, because those cannot be identified until the crosslocks are
|
|
||||||
released. However, deadlock possibilities can be detected and it's very
|
|
||||||
worth. See 'APPENDIX A' section to check why.
|
|
||||||
|
|
||||||
CONCLUSION
|
|
||||||
|
|
||||||
Using crossrelease feature, lockdep can work with what might be released
|
|
||||||
in any context, namely crosslock.
|
|
||||||
|
|
||||||
|
|
||||||
Introduce commit
|
|
||||||
----------------
|
|
||||||
|
|
||||||
Since crossrelease defers the work adding true dependencies of
|
|
||||||
crosslocks until they are actually released, crossrelease has to queue
|
|
||||||
all acquisitions which might create dependencies with the crosslocks.
|
|
||||||
Then it identifies dependencies using the queued data in batches at a
|
|
||||||
proper time. We call it 'commit'.
|
|
||||||
|
|
||||||
There are four types of dependencies:
|
|
||||||
|
|
||||||
1. TT type: 'typical lock A -> typical lock B'
|
|
||||||
|
|
||||||
Just when acquiring B, lockdep can see it's in the A's release
|
|
||||||
context. So the dependency between A and B can be identified
|
|
||||||
immediately. Commit is unnecessary.
|
|
||||||
|
|
||||||
2. TC type: 'typical lock A -> crosslock BX'
|
|
||||||
|
|
||||||
Just when acquiring BX, lockdep can see it's in the A's release
|
|
||||||
context. So the dependency between A and BX can be identified
|
|
||||||
immediately. Commit is unnecessary, too.
|
|
||||||
|
|
||||||
3. CT type: 'crosslock AX -> typical lock B'
|
|
||||||
|
|
||||||
When acquiring B, lockdep cannot identify the dependency because
|
|
||||||
there's no way to know if it's in the AX's release context. It has
|
|
||||||
to wait until the decision can be made. Commit is necessary.
|
|
||||||
|
|
||||||
4. CC type: 'crosslock AX -> crosslock BX'
|
|
||||||
|
|
||||||
When acquiring BX, lockdep cannot identify the dependency because
|
|
||||||
there's no way to know if it's in the AX's release context. It has
|
|
||||||
to wait until the decision can be made. Commit is necessary.
|
|
||||||
But, handling CC type is not implemented yet. It's a future work.
|
|
||||||
|
|
||||||
Lockdep can work without commit for typical locks, but commit step is
|
|
||||||
necessary once crosslocks are involved. Introducing commit, lockdep
|
|
||||||
performs three steps. What lockdep does in each step is:
|
|
||||||
|
|
||||||
1. Acquisition: For typical locks, lockdep does what it originally did
|
|
||||||
and queues the lock so that CT type dependencies can be checked using
|
|
||||||
it at the commit step. For crosslocks, it saves data which will be
|
|
||||||
used at the commit step and increases a reference count for it.
|
|
||||||
|
|
||||||
2. Commit: No action is reauired for typical locks. For crosslocks,
|
|
||||||
lockdep adds CT type dependencies using the data saved at the
|
|
||||||
acquisition step.
|
|
||||||
|
|
||||||
3. Release: No changes are required for typical locks. When a crosslock
|
|
||||||
is released, it decreases a reference count for it.
|
|
||||||
|
|
||||||
CONCLUSION
|
|
||||||
|
|
||||||
Crossrelease introduces commit step to handle dependencies of crosslocks
|
|
||||||
in batches at a proper time.
|
|
||||||
|
|
||||||
|
|
||||||
==============
|
|
||||||
Implementation
|
|
||||||
==============
|
|
||||||
|
|
||||||
Data structures
|
|
||||||
---------------
|
|
||||||
|
|
||||||
Crossrelease introduces two main data structures.
|
|
||||||
|
|
||||||
1. hist_lock
|
|
||||||
|
|
||||||
This is an array embedded in task_struct, for keeping lock history so
|
|
||||||
that dependencies can be added using them at the commit step. Since
|
|
||||||
it's local data, it can be accessed locklessly in the owner context.
|
|
||||||
The array is filled at the acquisition step and consumed at the
|
|
||||||
commit step. And it's managed in circular manner.
|
|
||||||
|
|
||||||
2. cross_lock
|
|
||||||
|
|
||||||
One per lockdep_map exists. This is for keeping data of crosslocks
|
|
||||||
and used at the commit step.
|
|
||||||
|
|
||||||
|
|
||||||
How crossrelease works
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
It's the key of how crossrelease works, to defer necessary works to an
|
|
||||||
appropriate point in time and perform in at once at the commit step.
|
|
||||||
Let's take a look with examples step by step, starting from how lockdep
|
|
||||||
works without crossrelease for typical locks.
|
|
||||||
|
|
||||||
acquire A /* Push A onto held_locks */
|
|
||||||
acquire B /* Push B onto held_locks and add 'A -> B' */
|
|
||||||
acquire C /* Push C onto held_locks and add 'B -> C' */
|
|
||||||
release C /* Pop C from held_locks */
|
|
||||||
release B /* Pop B from held_locks */
|
|
||||||
release A /* Pop A from held_locks */
|
|
||||||
|
|
||||||
where A, B and C are different lock classes.
|
|
||||||
|
|
||||||
NOTE: This document assumes that readers already understand how
|
|
||||||
lockdep works without crossrelease thus omits details. But there's
|
|
||||||
one thing to note. Lockdep pretends to pop a lock from held_locks
|
|
||||||
when releasing it. But it's subtly different from the original pop
|
|
||||||
operation because lockdep allows other than the top to be poped.
|
|
||||||
|
|
||||||
In this case, lockdep adds 'the top of held_locks -> the lock to acquire'
|
|
||||||
dependency every time acquiring a lock.
|
|
||||||
|
|
||||||
After adding 'A -> B', a dependency graph will be:
|
|
||||||
|
|
||||||
A -> B
|
|
||||||
|
|
||||||
where A and B are different lock classes.
|
|
||||||
|
|
||||||
And after adding 'B -> C', the graph will be:
|
|
||||||
|
|
||||||
A -> B -> C
|
|
||||||
|
|
||||||
where A, B and C are different lock classes.
|
|
||||||
|
|
||||||
Let's performs commit step even for typical locks to add dependencies.
|
|
||||||
Of course, commit step is not necessary for them, however, it would work
|
|
||||||
well because this is a more general way.
|
|
||||||
|
|
||||||
acquire A
|
|
||||||
/*
|
|
||||||
* Queue A into hist_locks
|
|
||||||
*
|
|
||||||
* In hist_locks: A
|
|
||||||
* In graph: Empty
|
|
||||||
*/
|
|
||||||
|
|
||||||
acquire B
|
|
||||||
/*
|
|
||||||
* Queue B into hist_locks
|
|
||||||
*
|
|
||||||
* In hist_locks: A, B
|
|
||||||
* In graph: Empty
|
|
||||||
*/
|
|
||||||
|
|
||||||
acquire C
|
|
||||||
/*
|
|
||||||
* Queue C into hist_locks
|
|
||||||
*
|
|
||||||
* In hist_locks: A, B, C
|
|
||||||
* In graph: Empty
|
|
||||||
*/
|
|
||||||
|
|
||||||
commit C
|
|
||||||
/*
|
|
||||||
* Add 'C -> ?'
|
|
||||||
* Answer the following to decide '?'
|
|
||||||
* What has been queued since acquire C: Nothing
|
|
||||||
*
|
|
||||||
* In hist_locks: A, B, C
|
|
||||||
* In graph: Empty
|
|
||||||
*/
|
|
||||||
|
|
||||||
release C
|
|
||||||
|
|
||||||
commit B
|
|
||||||
/*
|
|
||||||
* Add 'B -> ?'
|
|
||||||
* Answer the following to decide '?'
|
|
||||||
* What has been queued since acquire B: C
|
|
||||||
*
|
|
||||||
* In hist_locks: A, B, C
|
|
||||||
* In graph: 'B -> C'
|
|
||||||
*/
|
|
||||||
|
|
||||||
release B
|
|
||||||
|
|
||||||
commit A
|
|
||||||
/*
|
|
||||||
* Add 'A -> ?'
|
|
||||||
* Answer the following to decide '?'
|
|
||||||
* What has been queued since acquire A: B, C
|
|
||||||
*
|
|
||||||
* In hist_locks: A, B, C
|
|
||||||
* In graph: 'B -> C', 'A -> B', 'A -> C'
|
|
||||||
*/
|
|
||||||
|
|
||||||
release A
|
|
||||||
|
|
||||||
where A, B and C are different lock classes.
|
|
||||||
|
|
||||||
In this case, dependencies are added at the commit step as described.
|
|
||||||
|
|
||||||
After commits for A, B and C, the graph will be:
|
|
||||||
|
|
||||||
A -> B -> C
|
|
||||||
|
|
||||||
where A, B and C are different lock classes.
|
|
||||||
|
|
||||||
NOTE: A dependency 'A -> C' is optimized out.
|
|
||||||
|
|
||||||
We can see the former graph built without commit step is same as the
|
|
||||||
latter graph built using commit steps. Of course the former way leads to
|
|
||||||
earlier finish for building the graph, which means we can detect a
|
|
||||||
deadlock or its possibility sooner. So the former way would be prefered
|
|
||||||
when possible. But we cannot avoid using the latter way for crosslocks.
|
|
||||||
|
|
||||||
Let's look at how commit steps work for crosslocks. In this case, the
|
|
||||||
commit step is performed only on crosslock AX as real. And it assumes
|
|
||||||
that the AX release context is different from the AX acquire context.
|
|
||||||
|
|
||||||
BX RELEASE CONTEXT BX ACQUIRE CONTEXT
|
|
||||||
------------------ ------------------
|
|
||||||
acquire A
|
|
||||||
/*
|
|
||||||
* Push A onto held_locks
|
|
||||||
* Queue A into hist_locks
|
|
||||||
*
|
|
||||||
* In held_locks: A
|
|
||||||
* In hist_locks: A
|
|
||||||
* In graph: Empty
|
|
||||||
*/
|
|
||||||
|
|
||||||
acquire BX
|
|
||||||
/*
|
|
||||||
* Add 'the top of held_locks -> BX'
|
|
||||||
*
|
|
||||||
* In held_locks: A
|
|
||||||
* In hist_locks: A
|
|
||||||
* In graph: 'A -> BX'
|
|
||||||
*/
|
|
||||||
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
It must be guaranteed that the following operations are seen after
|
|
||||||
acquiring BX globally. It can be done by things like barrier.
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
acquire C
|
|
||||||
/*
|
|
||||||
* Push C onto held_locks
|
|
||||||
* Queue C into hist_locks
|
|
||||||
*
|
|
||||||
* In held_locks: C
|
|
||||||
* In hist_locks: C
|
|
||||||
* In graph: 'A -> BX'
|
|
||||||
*/
|
|
||||||
|
|
||||||
release C
|
|
||||||
/*
|
|
||||||
* Pop C from held_locks
|
|
||||||
*
|
|
||||||
* In held_locks: Empty
|
|
||||||
* In hist_locks: C
|
|
||||||
* In graph: 'A -> BX'
|
|
||||||
*/
|
|
||||||
acquire D
|
|
||||||
/*
|
|
||||||
* Push D onto held_locks
|
|
||||||
* Queue D into hist_locks
|
|
||||||
* Add 'the top of held_locks -> D'
|
|
||||||
*
|
|
||||||
* In held_locks: A, D
|
|
||||||
* In hist_locks: A, D
|
|
||||||
* In graph: 'A -> BX', 'A -> D'
|
|
||||||
*/
|
|
||||||
acquire E
|
|
||||||
/*
|
|
||||||
* Push E onto held_locks
|
|
||||||
* Queue E into hist_locks
|
|
||||||
*
|
|
||||||
* In held_locks: E
|
|
||||||
* In hist_locks: C, E
|
|
||||||
* In graph: 'A -> BX', 'A -> D'
|
|
||||||
*/
|
|
||||||
|
|
||||||
release E
|
|
||||||
/*
|
|
||||||
* Pop E from held_locks
|
|
||||||
*
|
|
||||||
* In held_locks: Empty
|
|
||||||
* In hist_locks: D, E
|
|
||||||
* In graph: 'A -> BX', 'A -> D'
|
|
||||||
*/
|
|
||||||
release D
|
|
||||||
/*
|
|
||||||
* Pop D from held_locks
|
|
||||||
*
|
|
||||||
* In held_locks: A
|
|
||||||
* In hist_locks: A, D
|
|
||||||
* In graph: 'A -> BX', 'A -> D'
|
|
||||||
*/
|
|
||||||
commit BX
|
|
||||||
/*
|
|
||||||
* Add 'BX -> ?'
|
|
||||||
* What has been queued since acquire BX: C, E
|
|
||||||
*
|
|
||||||
* In held_locks: Empty
|
|
||||||
* In hist_locks: D, E
|
|
||||||
* In graph: 'A -> BX', 'A -> D',
|
|
||||||
* 'BX -> C', 'BX -> E'
|
|
||||||
*/
|
|
||||||
|
|
||||||
release BX
|
|
||||||
/*
|
|
||||||
* In held_locks: Empty
|
|
||||||
* In hist_locks: D, E
|
|
||||||
* In graph: 'A -> BX', 'A -> D',
|
|
||||||
* 'BX -> C', 'BX -> E'
|
|
||||||
*/
|
|
||||||
release A
|
|
||||||
/*
|
|
||||||
* Pop A from held_locks
|
|
||||||
*
|
|
||||||
* In held_locks: Empty
|
|
||||||
* In hist_locks: A, D
|
|
||||||
* In graph: 'A -> BX', 'A -> D',
|
|
||||||
* 'BX -> C', 'BX -> E'
|
|
||||||
*/
|
|
||||||
|
|
||||||
where A, BX, C,..., E are different lock classes, and a suffix 'X' is
|
|
||||||
added on crosslocks.
|
|
||||||
|
|
||||||
Crossrelease considers all acquisitions after acqiuring BX are
|
|
||||||
candidates which might create dependencies with BX. True dependencies
|
|
||||||
will be determined when identifying the release context of BX. Meanwhile,
|
|
||||||
all typical locks are queued so that they can be used at the commit step.
|
|
||||||
And then two dependencies 'BX -> C' and 'BX -> E' are added at the
|
|
||||||
commit step when identifying the release context.
|
|
||||||
|
|
||||||
The final graph will be, with crossrelease:
|
|
||||||
|
|
||||||
-> C
|
|
||||||
/
|
|
||||||
-> BX -
|
|
||||||
/ \
|
|
||||||
A - -> E
|
|
||||||
\
|
|
||||||
-> D
|
|
||||||
|
|
||||||
where A, BX, C,..., E are different lock classes, and a suffix 'X' is
|
|
||||||
added on crosslocks.
|
|
||||||
|
|
||||||
However, the final graph will be, without crossrelease:
|
|
||||||
|
|
||||||
A -> D
|
|
||||||
|
|
||||||
where A and D are different lock classes.
|
|
||||||
|
|
||||||
The former graph has three more dependencies, 'A -> BX', 'BX -> C' and
|
|
||||||
'BX -> E' giving additional opportunities to check if they cause
|
|
||||||
deadlocks. This way lockdep can detect a deadlock or its possibility
|
|
||||||
caused by crosslocks.
|
|
||||||
|
|
||||||
CONCLUSION
|
|
||||||
|
|
||||||
We checked how crossrelease works with several examples.
|
|
||||||
|
|
||||||
|
|
||||||
=============
|
|
||||||
Optimizations
|
|
||||||
=============
|
|
||||||
|
|
||||||
Avoid duplication
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
Crossrelease feature uses a cache like what lockdep already uses for
|
|
||||||
dependency chains, but this time it's for caching CT type dependencies.
|
|
||||||
Once that dependency is cached, the same will never be added again.
|
|
||||||
|
|
||||||
|
|
||||||
Lockless for hot paths
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
To keep all locks for later use at the commit step, crossrelease adopts
|
|
||||||
a local array embedded in task_struct, which makes access to the data
|
|
||||||
lockless by forcing it to happen only within the owner context. It's
|
|
||||||
like how lockdep handles held_locks. Lockless implmentation is important
|
|
||||||
since typical locks are very frequently acquired and released.
|
|
||||||
|
|
||||||
|
|
||||||
=================================================
|
|
||||||
APPENDIX A: What lockdep does to work aggresively
|
|
||||||
=================================================
|
|
||||||
|
|
||||||
A deadlock actually occurs when all wait operations creating circular
|
|
||||||
dependencies run at the same time. Even though they don't, a potential
|
|
||||||
deadlock exists if the problematic dependencies exist. Thus it's
|
|
||||||
meaningful to detect not only an actual deadlock but also its potential
|
|
||||||
possibility. The latter is rather valuable. When a deadlock occurs
|
|
||||||
actually, we can identify what happens in the system by some means or
|
|
||||||
other even without lockdep. However, there's no way to detect possiblity
|
|
||||||
without lockdep unless the whole code is parsed in head. It's terrible.
|
|
||||||
Lockdep does the both, and crossrelease only focuses on the latter.
|
|
||||||
|
|
||||||
Whether or not a deadlock actually occurs depends on several factors.
|
|
||||||
For example, what order contexts are switched in is a factor. Assuming
|
|
||||||
circular dependencies exist, a deadlock would occur when contexts are
|
|
||||||
switched so that all wait operations creating the dependencies run
|
|
||||||
simultaneously. Thus to detect a deadlock possibility even in the case
|
|
||||||
that it has not occured yet, lockdep should consider all possible
|
|
||||||
combinations of dependencies, trying to:
|
|
||||||
|
|
||||||
1. Use a global dependency graph.
|
|
||||||
|
|
||||||
Lockdep combines all dependencies into one global graph and uses them,
|
|
||||||
regardless of which context generates them or what order contexts are
|
|
||||||
switched in. Aggregated dependencies are only considered so they are
|
|
||||||
prone to be circular if a problem exists.
|
|
||||||
|
|
||||||
2. Check dependencies between classes instead of instances.
|
|
||||||
|
|
||||||
What actually causes a deadlock are instances of lock. However,
|
|
||||||
lockdep checks dependencies between classes instead of instances.
|
|
||||||
This way lockdep can detect a deadlock which has not happened but
|
|
||||||
might happen in future by others but the same class.
|
|
||||||
|
|
||||||
3. Assume all acquisitions lead to waiting.
|
|
||||||
|
|
||||||
Although locks might be acquired without waiting which is essential
|
|
||||||
to create dependencies, lockdep assumes all acquisitions lead to
|
|
||||||
waiting since it might be true some time or another.
|
|
||||||
|
|
||||||
CONCLUSION
|
|
||||||
|
|
||||||
Lockdep detects not only an actual deadlock but also its possibility,
|
|
||||||
and the latter is more valuable.
|
|
||||||
|
|
||||||
|
|
||||||
==================================================
|
|
||||||
APPENDIX B: How to avoid adding false dependencies
|
|
||||||
==================================================
|
|
||||||
|
|
||||||
Remind what a dependency is. A dependency exists if:
|
|
||||||
|
|
||||||
1. There are two waiters waiting for each event at a given time.
|
|
||||||
2. The only way to wake up each waiter is to trigger its event.
|
|
||||||
3. Whether one can be woken up depends on whether the other can.
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
acquire A
|
|
||||||
acquire B /* A dependency 'A -> B' exists */
|
|
||||||
release B
|
|
||||||
release A
|
|
||||||
|
|
||||||
where A and B are different lock classes.
|
|
||||||
|
|
||||||
A depedency 'A -> B' exists since:
|
|
||||||
|
|
||||||
1. A waiter for A and a waiter for B might exist when acquiring B.
|
|
||||||
2. Only way to wake up each is to release what it waits for.
|
|
||||||
3. Whether the waiter for A can be woken up depends on whether the
|
|
||||||
other can. IOW, TASK X cannot release A if it fails to acquire B.
|
|
||||||
|
|
||||||
For another example:
|
|
||||||
|
|
||||||
TASK X TASK Y
|
|
||||||
------ ------
|
|
||||||
acquire AX
|
|
||||||
acquire B /* A dependency 'AX -> B' exists */
|
|
||||||
release B
|
|
||||||
release AX held by Y
|
|
||||||
|
|
||||||
where AX and B are different lock classes, and a suffix 'X' is added
|
|
||||||
on crosslocks.
|
|
||||||
|
|
||||||
Even in this case involving crosslocks, the same rule can be applied. A
|
|
||||||
depedency 'AX -> B' exists since:
|
|
||||||
|
|
||||||
1. A waiter for AX and a waiter for B might exist when acquiring B.
|
|
||||||
2. Only way to wake up each is to release what it waits for.
|
|
||||||
3. Whether the waiter for AX can be woken up depends on whether the
|
|
||||||
other can. IOW, TASK X cannot release AX if it fails to acquire B.
|
|
||||||
|
|
||||||
Let's take a look at more complicated example:
|
|
||||||
|
|
||||||
TASK X TASK Y
|
|
||||||
------ ------
|
|
||||||
acquire B
|
|
||||||
release B
|
|
||||||
fork Y
|
|
||||||
acquire AX
|
|
||||||
acquire C /* A dependency 'AX -> C' exists */
|
|
||||||
release C
|
|
||||||
release AX held by Y
|
|
||||||
|
|
||||||
where AX, B and C are different lock classes, and a suffix 'X' is
|
|
||||||
added on crosslocks.
|
|
||||||
|
|
||||||
Does a dependency 'AX -> B' exist? Nope.
|
|
||||||
|
|
||||||
Two waiters are essential to create a dependency. However, waiters for
|
|
||||||
AX and B to create 'AX -> B' cannot exist at the same time in this
|
|
||||||
example. Thus the dependency 'AX -> B' cannot be created.
|
|
||||||
|
|
||||||
It would be ideal if the full set of true ones can be considered. But
|
|
||||||
we can ensure nothing but what actually happened. Relying on what
|
|
||||||
actually happens at runtime, we can anyway add only true ones, though
|
|
||||||
they might be a subset of true ones. It's similar to how lockdep works
|
|
||||||
for typical locks. There might be more true dependencies than what
|
|
||||||
lockdep has detected in runtime. Lockdep has no choice but to rely on
|
|
||||||
what actually happens. Crossrelease also relies on it.
|
|
||||||
|
|
||||||
CONCLUSION
|
|
||||||
|
|
||||||
Relying on what actually happens, lockdep can avoid adding false
|
|
||||||
dependencies.
|
|
|
@ -98,5 +98,25 @@ request is made for a page in an old zpool, it is uncompressed using its
|
||||||
original compressor. Once all pages are removed from an old zpool, the zpool
|
original compressor. Once all pages are removed from an old zpool, the zpool
|
||||||
and its compressor are freed.
|
and its compressor are freed.
|
||||||
|
|
||||||
|
Some of the pages in zswap are same-value filled pages (i.e. contents of the
|
||||||
|
page have same value or repetitive pattern). These pages include zero-filled
|
||||||
|
pages and they are handled differently. During store operation, a page is
|
||||||
|
checked if it is a same-value filled page before compressing it. If true, the
|
||||||
|
compressed length of the page is set to zero and the pattern or same-filled
|
||||||
|
value is stored.
|
||||||
|
|
||||||
|
Same-value filled pages identification feature is enabled by default and can be
|
||||||
|
disabled at boot time by setting the "same_filled_pages_enabled" attribute to 0,
|
||||||
|
e.g. zswap.same_filled_pages_enabled=0. It can also be enabled and disabled at
|
||||||
|
runtime using the sysfs "same_filled_pages_enabled" attribute, e.g.
|
||||||
|
|
||||||
|
echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
|
||||||
|
|
||||||
|
When zswap same-filled page identification is disabled at runtime, it will stop
|
||||||
|
checking for the same-value filled pages during store operation. However, the
|
||||||
|
existing pages which are marked as same-value filled pages remain stored
|
||||||
|
unchanged in zswap until they are either loaded or invalidated.
|
||||||
|
|
||||||
A debugfs interface is provided for various statistic about pool size, number
|
A debugfs interface is provided for various statistic about pool size, number
|
||||||
of pages stored, and various counters for the reasons pages are rejected.
|
of pages stored, same-value filled pages and various counters for the reasons
|
||||||
|
pages are rejected.
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
|
|
||||||
<previous description obsolete, deleted>
|
|
||||||
|
|
||||||
Virtual memory map with 4 level page tables:
|
Virtual memory map with 4 level page tables:
|
||||||
|
|
||||||
0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
|
0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
|
||||||
|
@ -14,13 +12,16 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
|
||||||
... unused hole ...
|
... unused hole ...
|
||||||
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
|
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
|
||||||
... unused hole ...
|
... unused hole ...
|
||||||
|
fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
|
||||||
|
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
|
||||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||||
... unused hole ...
|
... unused hole ...
|
||||||
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
|
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
|
||||||
... unused hole ...
|
... unused hole ...
|
||||||
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
|
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
|
||||||
ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
|
ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable)
|
||||||
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
|
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
|
||||||
|
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
|
||||||
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
||||||
|
|
||||||
Virtual memory map with 5 level page tables:
|
Virtual memory map with 5 level page tables:
|
||||||
|
@ -29,26 +30,29 @@ Virtual memory map with 5 level page tables:
|
||||||
hole caused by [56:63] sign extension
|
hole caused by [56:63] sign extension
|
||||||
ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
|
ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
|
||||||
ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
|
ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
|
||||||
ff90000000000000 - ff91ffffffffffff (=49 bits) hole
|
ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
|
||||||
ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
|
ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
|
||||||
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
|
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
|
||||||
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
|
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
|
||||||
... unused hole ...
|
... unused hole ...
|
||||||
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
|
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
|
||||||
... unused hole ...
|
... unused hole ...
|
||||||
|
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
|
||||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||||
... unused hole ...
|
... unused hole ...
|
||||||
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
|
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
|
||||||
... unused hole ...
|
... unused hole ...
|
||||||
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
|
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
|
||||||
ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
|
ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
|
||||||
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
|
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
|
||||||
|
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
|
||||||
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
||||||
|
|
||||||
Architecture defines a 64-bit virtual address. Implementations can support
|
Architecture defines a 64-bit virtual address. Implementations can support
|
||||||
less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
|
less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
|
||||||
through to the most-significant implemented bit are set to either all ones
|
through to the most-significant implemented bit are sign extended.
|
||||||
or all zero. This causes hole between user space and kernel addresses.
|
This causes hole between user space and kernel addresses if you interpret them
|
||||||
|
as unsigned.
|
||||||
|
|
||||||
The direct mapping covers all memory in the system up to the highest
|
The direct mapping covers all memory in the system up to the highest
|
||||||
memory address (this means in some cases it can also include PCI memory
|
memory address (this means in some cases it can also include PCI memory
|
||||||
|
@ -58,9 +62,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
|
||||||
the processes using the page fault handler, with init_top_pgt as
|
the processes using the page fault handler, with init_top_pgt as
|
||||||
reference.
|
reference.
|
||||||
|
|
||||||
Current X86-64 implementations support up to 46 bits of address space (64 TB),
|
|
||||||
which is our current limit. This expands into MBZ space in the page tables.
|
|
||||||
|
|
||||||
We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
|
We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
|
||||||
memory window (this size is arbitrary, it can be raised later if needed).
|
memory window (this size is arbitrary, it can be raised later if needed).
|
||||||
The mappings are not part of any other kernel PGD and are only available
|
The mappings are not part of any other kernel PGD and are only available
|
||||||
|
@ -72,5 +73,3 @@ following fixmap section.
|
||||||
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
|
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
|
||||||
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
|
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
|
||||||
Their order is preserved but their base will be offset early at boot time.
|
Their order is preserved but their base will be offset early at boot time.
|
||||||
|
|
||||||
-Andi Kleen, Jul 2004
|
|
||||||
|
|
20
MAINTAINERS
20
MAINTAINERS
|
@ -2621,24 +2621,22 @@ F: fs/bfs/
|
||||||
F: include/uapi/linux/bfs_fs.h
|
F: include/uapi/linux/bfs_fs.h
|
||||||
|
|
||||||
BLACKFIN ARCHITECTURE
|
BLACKFIN ARCHITECTURE
|
||||||
M: Steven Miao <realmz6@gmail.com>
|
|
||||||
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
||||||
T: git git://git.code.sf.net/p/adi-linux/code
|
T: git git://git.code.sf.net/p/adi-linux/code
|
||||||
W: http://blackfin.uclinux.org
|
W: http://blackfin.uclinux.org
|
||||||
S: Supported
|
S: Orphan
|
||||||
F: arch/blackfin/
|
F: arch/blackfin/
|
||||||
|
|
||||||
BLACKFIN EMAC DRIVER
|
BLACKFIN EMAC DRIVER
|
||||||
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
||||||
W: http://blackfin.uclinux.org
|
W: http://blackfin.uclinux.org
|
||||||
S: Supported
|
S: Orphan
|
||||||
F: drivers/net/ethernet/adi/
|
F: drivers/net/ethernet/adi/
|
||||||
|
|
||||||
BLACKFIN MEDIA DRIVER
|
BLACKFIN MEDIA DRIVER
|
||||||
M: Scott Jiang <scott.jiang.linux@gmail.com>
|
|
||||||
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
||||||
W: http://blackfin.uclinux.org/
|
W: http://blackfin.uclinux.org/
|
||||||
S: Supported
|
S: Orphan
|
||||||
F: drivers/media/platform/blackfin/
|
F: drivers/media/platform/blackfin/
|
||||||
F: drivers/media/i2c/adv7183*
|
F: drivers/media/i2c/adv7183*
|
||||||
F: drivers/media/i2c/vs6624*
|
F: drivers/media/i2c/vs6624*
|
||||||
|
@ -2646,25 +2644,25 @@ F: drivers/media/i2c/vs6624*
|
||||||
BLACKFIN RTC DRIVER
|
BLACKFIN RTC DRIVER
|
||||||
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
||||||
W: http://blackfin.uclinux.org
|
W: http://blackfin.uclinux.org
|
||||||
S: Supported
|
S: Orphan
|
||||||
F: drivers/rtc/rtc-bfin.c
|
F: drivers/rtc/rtc-bfin.c
|
||||||
|
|
||||||
BLACKFIN SDH DRIVER
|
BLACKFIN SDH DRIVER
|
||||||
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
||||||
W: http://blackfin.uclinux.org
|
W: http://blackfin.uclinux.org
|
||||||
S: Supported
|
S: Orphan
|
||||||
F: drivers/mmc/host/bfin_sdh.c
|
F: drivers/mmc/host/bfin_sdh.c
|
||||||
|
|
||||||
BLACKFIN SERIAL DRIVER
|
BLACKFIN SERIAL DRIVER
|
||||||
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
||||||
W: http://blackfin.uclinux.org
|
W: http://blackfin.uclinux.org
|
||||||
S: Supported
|
S: Orphan
|
||||||
F: drivers/tty/serial/bfin_uart.c
|
F: drivers/tty/serial/bfin_uart.c
|
||||||
|
|
||||||
BLACKFIN WATCHDOG DRIVER
|
BLACKFIN WATCHDOG DRIVER
|
||||||
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
|
||||||
W: http://blackfin.uclinux.org
|
W: http://blackfin.uclinux.org
|
||||||
S: Supported
|
S: Orphan
|
||||||
F: drivers/watchdog/bfin_wdt.c
|
F: drivers/watchdog/bfin_wdt.c
|
||||||
|
|
||||||
BLINKM RGB LED DRIVER
|
BLINKM RGB LED DRIVER
|
||||||
|
@ -5431,7 +5429,7 @@ F: drivers/media/tuners/fc2580*
|
||||||
|
|
||||||
FCOE SUBSYSTEM (libfc, libfcoe, fcoe)
|
FCOE SUBSYSTEM (libfc, libfcoe, fcoe)
|
||||||
M: Johannes Thumshirn <jth@kernel.org>
|
M: Johannes Thumshirn <jth@kernel.org>
|
||||||
L: fcoe-devel@open-fcoe.org
|
L: linux-scsi@vger.kernel.org
|
||||||
W: www.Open-FCoE.org
|
W: www.Open-FCoE.org
|
||||||
S: Supported
|
S: Supported
|
||||||
F: drivers/scsi/libfc/
|
F: drivers/scsi/libfc/
|
||||||
|
@ -13141,6 +13139,7 @@ F: drivers/dma/dw/
|
||||||
|
|
||||||
SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
|
SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
|
||||||
M: Jie Deng <jiedeng@synopsys.com>
|
M: Jie Deng <jiedeng@synopsys.com>
|
||||||
|
M: Jose Abreu <Jose.Abreu@synopsys.com>
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
S: Supported
|
S: Supported
|
||||||
F: drivers/net/ethernet/synopsys/
|
F: drivers/net/ethernet/synopsys/
|
||||||
|
@ -13516,6 +13515,7 @@ M: Mika Westerberg <mika.westerberg@linux.intel.com>
|
||||||
M: Yehezkel Bernat <yehezkel.bernat@intel.com>
|
M: Yehezkel Bernat <yehezkel.bernat@intel.com>
|
||||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
F: Documentation/admin-guide/thunderbolt.rst
|
||||||
F: drivers/thunderbolt/
|
F: drivers/thunderbolt/
|
||||||
F: include/linux/thunderbolt.h
|
F: include/linux/thunderbolt.h
|
||||||
|
|
||||||
|
|
5
Makefile
5
Makefile
|
@ -2,7 +2,7 @@
|
||||||
VERSION = 4
|
VERSION = 4
|
||||||
PATCHLEVEL = 15
|
PATCHLEVEL = 15
|
||||||
SUBLEVEL = 0
|
SUBLEVEL = 0
|
||||||
EXTRAVERSION = -rc3
|
EXTRAVERSION = -rc6
|
||||||
NAME = Fearless Coyote
|
NAME = Fearless Coyote
|
||||||
|
|
||||||
# *DOCUMENTATION*
|
# *DOCUMENTATION*
|
||||||
|
@ -789,6 +789,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
|
||||||
# disable invalid "can't wrap" optimizations for signed / pointers
|
# disable invalid "can't wrap" optimizations for signed / pointers
|
||||||
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
|
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
|
||||||
|
|
||||||
|
# Make sure -fstack-check isn't enabled (like gentoo apparently did)
|
||||||
|
KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)
|
||||||
|
|
||||||
# conserve stack if available
|
# conserve stack if available
|
||||||
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
|
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,7 @@
|
||||||
switch0port10: port@10 {
|
switch0port10: port@10 {
|
||||||
reg = <10>;
|
reg = <10>;
|
||||||
label = "dsa";
|
label = "dsa";
|
||||||
phy-mode = "xgmii";
|
phy-mode = "xaui";
|
||||||
link = <&switch1port10>;
|
link = <&switch1port10>;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@ -208,7 +208,7 @@
|
||||||
switch1port10: port@10 {
|
switch1port10: port@10 {
|
||||||
reg = <10>;
|
reg = <10>;
|
||||||
label = "dsa";
|
label = "dsa";
|
||||||
phy-mode = "xgmii";
|
phy-mode = "xaui";
|
||||||
link = <&switch0port10>;
|
link = <&switch0port10>;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
|
@ -85,7 +85,11 @@
|
||||||
.pushsection .text.fixup,"ax"
|
.pushsection .text.fixup,"ax"
|
||||||
.align 4
|
.align 4
|
||||||
9001: mov r4, #-EFAULT
|
9001: mov r4, #-EFAULT
|
||||||
|
#ifdef CONFIG_CPU_SW_DOMAIN_PAN
|
||||||
|
ldr r5, [sp, #9*4] @ *err_ptr
|
||||||
|
#else
|
||||||
ldr r5, [sp, #8*4] @ *err_ptr
|
ldr r5, [sp, #8*4] @ *err_ptr
|
||||||
|
#endif
|
||||||
str r4, [r5]
|
str r4, [r5]
|
||||||
ldmia sp, {r1, r2} @ retrieve dst, len
|
ldmia sp, {r1, r2} @ retrieve dst, len
|
||||||
add r2, r2, r1
|
add r2, r2, r1
|
||||||
|
|
|
@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065
|
||||||
|
|
||||||
If unsure, say Y.
|
If unsure, say Y.
|
||||||
|
|
||||||
|
|
||||||
config SOCIONEXT_SYNQUACER_PREITS
|
config SOCIONEXT_SYNQUACER_PREITS
|
||||||
bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
|
bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
|
||||||
default y
|
default y
|
||||||
|
@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802
|
||||||
a 128kB offset to be applied to the target address in this commands.
|
a 128kB offset to be applied to the target address in this commands.
|
||||||
|
|
||||||
If unsure, say Y.
|
If unsure, say Y.
|
||||||
|
|
||||||
|
config QCOM_FALKOR_ERRATUM_E1041
|
||||||
|
bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
|
||||||
|
default y
|
||||||
|
help
|
||||||
|
Falkor CPU may speculatively fetch instructions from an improper
|
||||||
|
memory location when MMU translation is changed from SCTLR_ELn[M]=1
|
||||||
|
to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem.
|
||||||
|
|
||||||
|
If unsure, say Y.
|
||||||
|
|
||||||
endmenu
|
endmenu
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -512,4 +512,14 @@ alternative_else_nop_endif
|
||||||
#endif
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Errata workaround prior to disable MMU. Insert an ISB immediately prior
|
||||||
|
* to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
|
||||||
|
*/
|
||||||
|
.macro pre_disable_mmu_workaround
|
||||||
|
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
|
||||||
|
isb
|
||||||
|
#endif
|
||||||
|
.endm
|
||||||
|
|
||||||
#endif /* __ASM_ASSEMBLER_H */
|
#endif /* __ASM_ASSEMBLER_H */
|
||||||
|
|
|
@ -60,6 +60,9 @@ enum ftr_type {
|
||||||
#define FTR_VISIBLE true /* Feature visible to the user space */
|
#define FTR_VISIBLE true /* Feature visible to the user space */
|
||||||
#define FTR_HIDDEN false /* Feature is hidden from the user */
|
#define FTR_HIDDEN false /* Feature is hidden from the user */
|
||||||
|
|
||||||
|
#define FTR_VISIBLE_IF_IS_ENABLED(config) \
|
||||||
|
(IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN)
|
||||||
|
|
||||||
struct arm64_ftr_bits {
|
struct arm64_ftr_bits {
|
||||||
bool sign; /* Value is signed ? */
|
bool sign; /* Value is signed ? */
|
||||||
bool visible;
|
bool visible;
|
||||||
|
|
|
@ -91,6 +91,7 @@
|
||||||
#define BRCM_CPU_PART_VULCAN 0x516
|
#define BRCM_CPU_PART_VULCAN 0x516
|
||||||
|
|
||||||
#define QCOM_CPU_PART_FALKOR_V1 0x800
|
#define QCOM_CPU_PART_FALKOR_V1 0x800
|
||||||
|
#define QCOM_CPU_PART_FALKOR 0xC00
|
||||||
|
|
||||||
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
|
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
|
||||||
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
|
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
|
||||||
|
@ -99,6 +100,7 @@
|
||||||
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
|
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
|
||||||
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
|
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
|
||||||
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
|
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
|
||||||
|
#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
|
||||||
|
|
||||||
#ifndef __ASSEMBLY__
|
#ifndef __ASSEMBLY__
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,8 @@
|
||||||
#include <asm/cmpxchg.h>
|
#include <asm/cmpxchg.h>
|
||||||
#include <asm/fixmap.h>
|
#include <asm/fixmap.h>
|
||||||
#include <linux/mmdebug.h>
|
#include <linux/mmdebug.h>
|
||||||
|
#include <linux/mm_types.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
|
||||||
extern void __pte_error(const char *file, int line, unsigned long val);
|
extern void __pte_error(const char *file, int line, unsigned long val);
|
||||||
extern void __pmd_error(const char *file, int line, unsigned long val);
|
extern void __pmd_error(const char *file, int line, unsigned long val);
|
||||||
|
@ -149,12 +151,20 @@ static inline pte_t pte_mkwrite(pte_t pte)
|
||||||
|
|
||||||
static inline pte_t pte_mkclean(pte_t pte)
|
static inline pte_t pte_mkclean(pte_t pte)
|
||||||
{
|
{
|
||||||
return clear_pte_bit(pte, __pgprot(PTE_DIRTY));
|
pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
|
||||||
|
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
|
||||||
|
|
||||||
|
return pte;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline pte_t pte_mkdirty(pte_t pte)
|
static inline pte_t pte_mkdirty(pte_t pte)
|
||||||
{
|
{
|
||||||
return set_pte_bit(pte, __pgprot(PTE_DIRTY));
|
pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
|
||||||
|
|
||||||
|
if (pte_write(pte))
|
||||||
|
pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
|
||||||
|
|
||||||
|
return pte;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline pte_t pte_mkold(pte_t pte)
|
static inline pte_t pte_mkold(pte_t pte)
|
||||||
|
@ -207,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct mm_struct;
|
|
||||||
struct vm_area_struct;
|
|
||||||
|
|
||||||
extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
|
extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -238,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||||
* hardware updates of the pte (ptep_set_access_flags safely changes
|
* hardware updates of the pte (ptep_set_access_flags safely changes
|
||||||
* valid ptes without going through an invalid entry).
|
* valid ptes without going through an invalid entry).
|
||||||
*/
|
*/
|
||||||
if (pte_valid(*ptep) && pte_valid(pte)) {
|
if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
|
||||||
|
(mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
|
||||||
VM_WARN_ONCE(!pte_young(pte),
|
VM_WARN_ONCE(!pte_young(pte),
|
||||||
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
|
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
|
||||||
__func__, pte_val(*ptep), pte_val(pte));
|
__func__, pte_val(*ptep), pte_val(pte));
|
||||||
|
@ -641,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
|
||||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ptep_set_wrprotect - mark read-only while preserving the hardware update of
|
* ptep_set_wrprotect - mark read-only while trasferring potential hardware
|
||||||
* the Access Flag.
|
* dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
|
||||||
*/
|
*/
|
||||||
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
|
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
|
||||||
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
|
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
|
||||||
{
|
{
|
||||||
pte_t old_pte, pte;
|
pte_t old_pte, pte;
|
||||||
|
|
||||||
/*
|
|
||||||
* ptep_set_wrprotect() is only called on CoW mappings which are
|
|
||||||
* private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
|
|
||||||
* PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
|
|
||||||
* !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
|
|
||||||
* protection_map[]. There is no race with the hardware update of the
|
|
||||||
* dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
|
|
||||||
* is set.
|
|
||||||
*/
|
|
||||||
VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
|
|
||||||
"%s: potential race with hardware DBM", __func__);
|
|
||||||
pte = READ_ONCE(*ptep);
|
pte = READ_ONCE(*ptep);
|
||||||
do {
|
do {
|
||||||
old_pte = pte;
|
old_pte = pte;
|
||||||
|
/*
|
||||||
|
* If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
|
||||||
|
* clear), set the PTE_DIRTY bit.
|
||||||
|
*/
|
||||||
|
if (pte_hw_dirty(pte))
|
||||||
|
pte = pte_mkdirty(pte);
|
||||||
pte = pte_wrprotect(pte);
|
pte = pte_wrprotect(pte);
|
||||||
pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
|
pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
|
||||||
pte_val(old_pte), pte_val(pte));
|
pte_val(old_pte), pte_val(pte));
|
||||||
|
|
|
@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart)
|
||||||
mrs x12, sctlr_el1
|
mrs x12, sctlr_el1
|
||||||
ldr x13, =SCTLR_ELx_FLAGS
|
ldr x13, =SCTLR_ELx_FLAGS
|
||||||
bic x12, x12, x13
|
bic x12, x12, x13
|
||||||
|
pre_disable_mmu_workaround
|
||||||
msr sctlr_el1, x12
|
msr sctlr_el1, x12
|
||||||
isb
|
isb
|
||||||
|
|
||||||
|
|
|
@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
|
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
|
||||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
|
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
|
||||||
|
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
|
||||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
|
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
|
||||||
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
|
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
|
||||||
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
|
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
|
||||||
|
|
|
@ -96,6 +96,7 @@ ENTRY(entry)
|
||||||
mrs x0, sctlr_el2
|
mrs x0, sctlr_el2
|
||||||
bic x0, x0, #1 << 0 // clear SCTLR.M
|
bic x0, x0, #1 << 0 // clear SCTLR.M
|
||||||
bic x0, x0, #1 << 2 // clear SCTLR.C
|
bic x0, x0, #1 << 2 // clear SCTLR.C
|
||||||
|
pre_disable_mmu_workaround
|
||||||
msr sctlr_el2, x0
|
msr sctlr_el2, x0
|
||||||
isb
|
isb
|
||||||
b 2f
|
b 2f
|
||||||
|
@ -103,6 +104,7 @@ ENTRY(entry)
|
||||||
mrs x0, sctlr_el1
|
mrs x0, sctlr_el1
|
||||||
bic x0, x0, #1 << 0 // clear SCTLR.M
|
bic x0, x0, #1 << 0 // clear SCTLR.M
|
||||||
bic x0, x0, #1 << 2 // clear SCTLR.C
|
bic x0, x0, #1 << 2 // clear SCTLR.C
|
||||||
|
pre_disable_mmu_workaround
|
||||||
msr sctlr_el1, x0
|
msr sctlr_el1, x0
|
||||||
isb
|
isb
|
||||||
2:
|
2:
|
||||||
|
|
|
@ -1043,7 +1043,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
|
||||||
|
|
||||||
local_bh_disable();
|
local_bh_disable();
|
||||||
|
|
||||||
current->thread.fpsimd_state = *state;
|
current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd;
|
||||||
if (system_supports_sve() && test_thread_flag(TIF_SVE))
|
if (system_supports_sve() && test_thread_flag(TIF_SVE))
|
||||||
fpsimd_to_sve(current);
|
fpsimd_to_sve(current);
|
||||||
|
|
||||||
|
|
|
@ -750,6 +750,7 @@ __primary_switch:
|
||||||
* to take into account by discarding the current kernel mapping and
|
* to take into account by discarding the current kernel mapping and
|
||||||
* creating a new one.
|
* creating a new one.
|
||||||
*/
|
*/
|
||||||
|
pre_disable_mmu_workaround
|
||||||
msr sctlr_el1, x20 // disable the MMU
|
msr sctlr_el1, x20 // disable the MMU
|
||||||
isb
|
isb
|
||||||
bl __create_page_tables // recreate kernel mapping
|
bl __create_page_tables // recreate kernel mapping
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
#include <linux/perf_event.h>
|
#include <linux/perf_event.h>
|
||||||
#include <linux/ptrace.h>
|
#include <linux/ptrace.h>
|
||||||
#include <linux/smp.h>
|
#include <linux/smp.h>
|
||||||
|
#include <linux/uaccess.h>
|
||||||
|
|
||||||
#include <asm/compat.h>
|
#include <asm/compat.h>
|
||||||
#include <asm/current.h>
|
#include <asm/current.h>
|
||||||
|
@ -36,7 +37,6 @@
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
#include <asm/cputype.h>
|
#include <asm/cputype.h>
|
||||||
#include <asm/system_misc.h>
|
#include <asm/system_misc.h>
|
||||||
#include <asm/uaccess.h>
|
|
||||||
|
|
||||||
/* Breakpoint currently in use for each BRP. */
|
/* Breakpoint currently in use for each BRP. */
|
||||||
static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
|
static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
|
||||||
|
|
|
@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel)
|
||||||
mrs x0, sctlr_el2
|
mrs x0, sctlr_el2
|
||||||
ldr x1, =SCTLR_ELx_FLAGS
|
ldr x1, =SCTLR_ELx_FLAGS
|
||||||
bic x0, x0, x1
|
bic x0, x0, x1
|
||||||
|
pre_disable_mmu_workaround
|
||||||
msr sctlr_el2, x0
|
msr sctlr_el2, x0
|
||||||
isb
|
isb
|
||||||
1:
|
1:
|
||||||
|
|
|
@ -151,6 +151,7 @@ reset:
|
||||||
mrs x5, sctlr_el2
|
mrs x5, sctlr_el2
|
||||||
ldr x6, =SCTLR_ELx_FLAGS
|
ldr x6, =SCTLR_ELx_FLAGS
|
||||||
bic x5, x5, x6 // Clear SCTL_M and etc
|
bic x5, x5, x6 // Clear SCTL_M and etc
|
||||||
|
pre_disable_mmu_workaround
|
||||||
msr sctlr_el2, x5
|
msr sctlr_el2, x5
|
||||||
isb
|
isb
|
||||||
|
|
||||||
|
|
|
@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
|
||||||
{
|
{
|
||||||
u64 reg;
|
u64 reg;
|
||||||
|
|
||||||
|
/* Clear pmscr in case of early return */
|
||||||
|
*pmscr_el1 = 0;
|
||||||
|
|
||||||
/* SPE present on this CPU? */
|
/* SPE present on this CPU? */
|
||||||
if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
|
if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
|
||||||
ID_AA64DFR0_PMSVER_SHIFT))
|
ID_AA64DFR0_PMSVER_SHIFT))
|
||||||
|
|
|
@ -389,7 +389,7 @@ void ptdump_check_wx(void)
|
||||||
.check_wx = true,
|
.check_wx = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
walk_pgd(&st, &init_mm, 0);
|
walk_pgd(&st, &init_mm, VA_START);
|
||||||
note_page(&st, 0, 0, 0);
|
note_page(&st, 0, 0, 0);
|
||||||
if (st.wx_pages || st.uxn_pages)
|
if (st.wx_pages || st.uxn_pages)
|
||||||
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
|
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
|
||||||
|
|
|
@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
struct siginfo info;
|
struct siginfo info;
|
||||||
const struct fault_info *inf;
|
const struct fault_info *inf;
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
inf = esr_to_fault_info(esr);
|
inf = esr_to_fault_info(esr);
|
||||||
pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
|
pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
|
||||||
|
@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
|
||||||
if (interrupts_enabled(regs))
|
if (interrupts_enabled(regs))
|
||||||
nmi_enter();
|
nmi_enter();
|
||||||
|
|
||||||
ret = ghes_notify_sea();
|
ghes_notify_sea();
|
||||||
|
|
||||||
if (interrupts_enabled(regs))
|
if (interrupts_enabled(regs))
|
||||||
nmi_exit();
|
nmi_exit();
|
||||||
|
@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
|
||||||
info.si_addr = (void __user *)addr;
|
info.si_addr = (void __user *)addr;
|
||||||
arm64_notify_die("", regs, &info, esr);
|
arm64_notify_die("", regs, &info, esr);
|
||||||
|
|
||||||
return ret;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct fault_info fault_info[] = {
|
static const struct fault_info fault_info[] = {
|
||||||
|
|
|
@ -476,6 +476,8 @@ void __init arm64_memblock_init(void)
|
||||||
|
|
||||||
reserve_elfcorehdr();
|
reserve_elfcorehdr();
|
||||||
|
|
||||||
|
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
|
||||||
|
|
||||||
dma_contiguous_reserve(arm64_dma_phys_limit);
|
dma_contiguous_reserve(arm64_dma_phys_limit);
|
||||||
|
|
||||||
memblock_allow_resize();
|
memblock_allow_resize();
|
||||||
|
@ -502,7 +504,6 @@ void __init bootmem_init(void)
|
||||||
sparse_init();
|
sparse_init();
|
||||||
zone_sizes_init(min, max);
|
zone_sizes_init(min, max);
|
||||||
|
|
||||||
high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
|
|
||||||
memblock_dump_all();
|
memblock_dump_all();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -123,8 +123,8 @@ int puts(const char *s)
|
||||||
while ((nuline = strchr(s, '\n')) != NULL) {
|
while ((nuline = strchr(s, '\n')) != NULL) {
|
||||||
if (nuline != s)
|
if (nuline != s)
|
||||||
pdc_iodc_print(s, nuline - s);
|
pdc_iodc_print(s, nuline - s);
|
||||||
pdc_iodc_print("\r\n", 2);
|
pdc_iodc_print("\r\n", 2);
|
||||||
s = nuline + 1;
|
s = nuline + 1;
|
||||||
}
|
}
|
||||||
if (*s != '\0')
|
if (*s != '\0')
|
||||||
pdc_iodc_print(s, strlen(s));
|
pdc_iodc_print(s, strlen(s));
|
||||||
|
|
|
@ -35,7 +35,12 @@ struct thread_info {
|
||||||
|
|
||||||
/* thread information allocation */
|
/* thread information allocation */
|
||||||
|
|
||||||
|
#ifdef CONFIG_IRQSTACKS
|
||||||
|
#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */
|
||||||
|
#else
|
||||||
#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */
|
#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Be sure to hunt all references to this down when you change the size of
|
/* Be sure to hunt all references to this down when you change the size of
|
||||||
* the kernel stack */
|
* the kernel stack */
|
||||||
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
||||||
|
|
|
@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
|
||||||
STREG %r19,PT_SR7(%r16)
|
STREG %r19,PT_SR7(%r16)
|
||||||
|
|
||||||
intr_return:
|
intr_return:
|
||||||
/* NOTE: Need to enable interrupts incase we schedule. */
|
|
||||||
ssm PSW_SM_I, %r0
|
|
||||||
|
|
||||||
/* check for reschedule */
|
/* check for reschedule */
|
||||||
mfctl %cr30,%r1
|
mfctl %cr30,%r1
|
||||||
LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
|
LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
|
||||||
|
@ -907,6 +904,11 @@ intr_check_sig:
|
||||||
LDREG PT_IASQ1(%r16), %r20
|
LDREG PT_IASQ1(%r16), %r20
|
||||||
cmpib,COND(=),n 0,%r20,intr_restore /* backward */
|
cmpib,COND(=),n 0,%r20,intr_restore /* backward */
|
||||||
|
|
||||||
|
/* NOTE: We need to enable interrupts if we have to deliver
|
||||||
|
* signals. We used to do this earlier but it caused kernel
|
||||||
|
* stack overflows. */
|
||||||
|
ssm PSW_SM_I, %r0
|
||||||
|
|
||||||
copy %r0, %r25 /* long in_syscall = 0 */
|
copy %r0, %r25 /* long in_syscall = 0 */
|
||||||
#ifdef CONFIG_64BIT
|
#ifdef CONFIG_64BIT
|
||||||
ldo -16(%r30),%r29 /* Reference param save area */
|
ldo -16(%r30),%r29 /* Reference param save area */
|
||||||
|
@ -958,6 +960,10 @@ intr_do_resched:
|
||||||
cmpib,COND(=) 0, %r20, intr_do_preempt
|
cmpib,COND(=) 0, %r20, intr_do_preempt
|
||||||
nop
|
nop
|
||||||
|
|
||||||
|
/* NOTE: We need to enable interrupts if we schedule. We used
|
||||||
|
* to do this earlier but it caused kernel stack overflows. */
|
||||||
|
ssm PSW_SM_I, %r0
|
||||||
|
|
||||||
#ifdef CONFIG_64BIT
|
#ifdef CONFIG_64BIT
|
||||||
ldo -16(%r30),%r29 /* Reference param save area */
|
ldo -16(%r30),%r29 /* Reference param save area */
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
|
||||||
|
|
||||||
|
|
||||||
__INITRODATA
|
__INITRODATA
|
||||||
|
.align 4
|
||||||
.export os_hpmc_size
|
.export os_hpmc_size
|
||||||
os_hpmc_size:
|
os_hpmc_size:
|
||||||
.word .os_hpmc_end-.os_hpmc
|
.word .os_hpmc_end-.os_hpmc
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/kallsyms.h>
|
#include <linux/kallsyms.h>
|
||||||
#include <linux/sort.h>
|
#include <linux/sort.h>
|
||||||
#include <linux/sched.h>
|
|
||||||
|
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <asm/assembly.h>
|
#include <asm/assembly.h>
|
||||||
|
|
|
@ -16,9 +16,7 @@
|
||||||
#include <linux/preempt.h>
|
#include <linux/preempt.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
|
|
||||||
#include <asm/processor.h>
|
|
||||||
#include <asm/delay.h>
|
#include <asm/delay.h>
|
||||||
|
|
||||||
#include <asm/special_insns.h> /* for mfctl() */
|
#include <asm/special_insns.h> /* for mfctl() */
|
||||||
#include <asm/processor.h> /* for boot_cpu_data */
|
#include <asm/processor.h> /* for boot_cpu_data */
|
||||||
|
|
||||||
|
|
|
@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void arch_dup_mmap(struct mm_struct *oldmm,
|
static inline int arch_dup_mmap(struct mm_struct *oldmm,
|
||||||
struct mm_struct *mm)
|
struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef CONFIG_PPC_BOOK3S_64
|
#ifndef CONFIG_PPC_BOOK3S_64
|
||||||
|
|
|
@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
|
||||||
|
|
||||||
printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
|
printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
|
||||||
regs->nip, regs->link, regs->ctr);
|
regs->nip, regs->link, regs->ctr);
|
||||||
printk("REGS: %p TRAP: %04lx %s (%s)\n",
|
printk("REGS: %px TRAP: %04lx %s (%s)\n",
|
||||||
regs, regs->trap, print_tainted(), init_utsname()->release);
|
regs, regs->trap, print_tainted(), init_utsname()->release);
|
||||||
printk("MSR: "REG" ", regs->msr);
|
printk("MSR: "REG" ", regs->msr);
|
||||||
print_msr_bits(regs->msr);
|
print_msr_bits(regs->msr);
|
||||||
|
|
|
@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
/* Return the per-cpu state for state saving/migration */
|
/* Return the per-cpu state for state saving/migration */
|
||||||
return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
|
return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
|
||||||
(u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
|
(u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
|
||||||
|
(u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
|
int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
|
||||||
|
@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Restore P and Q. If the interrupt was pending, we
|
* Restore P and Q. If the interrupt was pending, we
|
||||||
* force both P and Q, which will trigger a resend.
|
* force Q and !P, which will trigger a resend.
|
||||||
*
|
*
|
||||||
* That means that a guest that had both an interrupt
|
* That means that a guest that had both an interrupt
|
||||||
* pending (queued) and Q set will restore with only
|
* pending (queued) and Q set will restore with only
|
||||||
|
@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
|
||||||
* is perfectly fine as coalescing interrupts that haven't
|
* is perfectly fine as coalescing interrupts that haven't
|
||||||
* been presented yet is always allowed.
|
* been presented yet is always allowed.
|
||||||
*/
|
*/
|
||||||
if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
|
if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
|
||||||
state->old_p = true;
|
state->old_p = true;
|
||||||
if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
|
if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
|
||||||
state->old_q = true;
|
state->old_q = true;
|
||||||
|
|
|
@ -763,7 +763,8 @@ emit_clear:
|
||||||
func = (u8 *) __bpf_call_base + imm;
|
func = (u8 *) __bpf_call_base + imm;
|
||||||
|
|
||||||
/* Save skb pointer if we need to re-cache skb data */
|
/* Save skb pointer if we need to re-cache skb data */
|
||||||
if (bpf_helper_changes_pkt_data(func))
|
if ((ctx->seen & SEEN_SKB) &&
|
||||||
|
bpf_helper_changes_pkt_data(func))
|
||||||
PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
|
PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
|
||||||
|
|
||||||
bpf_jit_emit_func_call(image, ctx, (u64)func);
|
bpf_jit_emit_func_call(image, ctx, (u64)func);
|
||||||
|
@ -772,7 +773,8 @@ emit_clear:
|
||||||
PPC_MR(b2p[BPF_REG_0], 3);
|
PPC_MR(b2p[BPF_REG_0], 3);
|
||||||
|
|
||||||
/* refresh skb cache */
|
/* refresh skb cache */
|
||||||
if (bpf_helper_changes_pkt_data(func)) {
|
if ((ctx->seen & SEEN_SKB) &&
|
||||||
|
bpf_helper_changes_pkt_data(func)) {
|
||||||
/* reload skb pointer to r3 */
|
/* reload skb pointer to r3 */
|
||||||
PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
|
PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
|
||||||
bpf_jit_emit_skb_loads(image, ctx);
|
bpf_jit_emit_skb_loads(image, ctx);
|
||||||
|
|
|
@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
|
||||||
int ret;
|
int ret;
|
||||||
__u64 target;
|
__u64 target;
|
||||||
|
|
||||||
if (is_kernel_addr(addr))
|
if (is_kernel_addr(addr)) {
|
||||||
return branch_target((unsigned int *)addr);
|
if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return branch_target(&instr);
|
||||||
|
}
|
||||||
|
|
||||||
/* Userspace: need copy instruction here then translate it */
|
/* Userspace: need copy instruction here then translate it */
|
||||||
pagefault_disable();
|
pagefault_disable();
|
||||||
|
|
|
@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
|
||||||
if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
|
if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check whether nest_imc is registered. We could end up here if the
|
||||||
|
* cpuhotplug callback registration fails. i.e, callback invokes the
|
||||||
|
* offline path for all successfully registered nodes. At this stage,
|
||||||
|
* nest_imc pmu will not be registered and we should return here.
|
||||||
|
*
|
||||||
|
* We return with a zero since this is not an offline failure. And
|
||||||
|
* cpuhp_setup_state() returns the actual failure reason to the caller,
|
||||||
|
* which in turn will call the cleanup routine.
|
||||||
|
*/
|
||||||
|
if (!nest_pmus)
|
||||||
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now that this cpu is one of the designated,
|
* Now that this cpu is one of the designated,
|
||||||
* find a next cpu a) which is online and b) in same chip.
|
* find a next cpu a) which is online and b) in same chip.
|
||||||
|
@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
|
||||||
if (nest_pmus == 1) {
|
if (nest_pmus == 1) {
|
||||||
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
|
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
|
||||||
kfree(nest_imc_refc);
|
kfree(nest_imc_refc);
|
||||||
|
kfree(per_nest_pmu_arr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nest_pmus > 0)
|
if (nest_pmus > 0)
|
||||||
|
@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
|
||||||
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
|
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
|
||||||
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
|
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
|
||||||
kfree(pmu_ptr);
|
kfree(pmu_ptr);
|
||||||
kfree(per_nest_pmu_arr);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
|
||||||
ret = nest_pmu_cpumask_init();
|
ret = nest_pmu_cpumask_init();
|
||||||
if (ret) {
|
if (ret) {
|
||||||
mutex_unlock(&nest_init_lock);
|
mutex_unlock(&nest_init_lock);
|
||||||
|
kfree(nest_imc_refc);
|
||||||
|
kfree(per_nest_pmu_arr);
|
||||||
goto err_free;
|
goto err_free;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct lock_class_key fsl_msi_irq_class;
|
static struct lock_class_key fsl_msi_irq_class;
|
||||||
|
static struct lock_class_key fsl_msi_irq_request_class;
|
||||||
|
|
||||||
static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
|
static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
|
||||||
int offset, int irq_index)
|
int offset, int irq_index)
|
||||||
|
@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
|
||||||
dev_err(&dev->dev, "No memory for MSI cascade data\n");
|
dev_err(&dev->dev, "No memory for MSI cascade data\n");
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class);
|
irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
|
||||||
|
&fsl_msi_irq_request_class);
|
||||||
cascade_data->index = offset;
|
cascade_data->index = offset;
|
||||||
cascade_data->msi_data = msi;
|
cascade_data->msi_data = msi;
|
||||||
cascade_data->virq = virt_msir;
|
cascade_data->virq = virt_msir;
|
||||||
|
|
|
@ -38,6 +38,25 @@
|
||||||
#define smp_rmb() RISCV_FENCE(r,r)
|
#define smp_rmb() RISCV_FENCE(r,r)
|
||||||
#define smp_wmb() RISCV_FENCE(w,w)
|
#define smp_wmb() RISCV_FENCE(w,w)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is a very specific barrier: it's currently only used in two places in
|
||||||
|
* the kernel, both in the scheduler. See include/linux/spinlock.h for the two
|
||||||
|
* orderings it guarantees, but the "critical section is RCsc" guarantee
|
||||||
|
* mandates a barrier on RISC-V. The sequence looks like:
|
||||||
|
*
|
||||||
|
* lr.aq lock
|
||||||
|
* sc lock <= LOCKED
|
||||||
|
* smp_mb__after_spinlock()
|
||||||
|
* // critical section
|
||||||
|
* lr lock
|
||||||
|
* sc.rl lock <= UNLOCKED
|
||||||
|
*
|
||||||
|
* The AQ/RL pair provides a RCpc critical section, but there's not really any
|
||||||
|
* way we can take advantage of that here because the ordering is only enforced
|
||||||
|
* on that one lock. Thus, we're just doing a full fence.
|
||||||
|
*/
|
||||||
|
#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw)
|
||||||
|
|
||||||
#include <asm-generic/barrier.h>
|
#include <asm-generic/barrier.h>
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
|
|
|
@ -38,10 +38,6 @@
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
#include <asm/thread_info.h>
|
#include <asm/thread_info.h>
|
||||||
|
|
||||||
#ifdef CONFIG_HVC_RISCV_SBI
|
|
||||||
#include <asm/hvc_riscv_sbi.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_DUMMY_CONSOLE
|
#ifdef CONFIG_DUMMY_CONSOLE
|
||||||
struct screen_info screen_info = {
|
struct screen_info screen_info = {
|
||||||
.orig_video_lines = 30,
|
.orig_video_lines = 30,
|
||||||
|
@ -212,13 +208,6 @@ static void __init setup_bootmem(void)
|
||||||
|
|
||||||
void __init setup_arch(char **cmdline_p)
|
void __init setup_arch(char **cmdline_p)
|
||||||
{
|
{
|
||||||
#if defined(CONFIG_HVC_RISCV_SBI)
|
|
||||||
if (likely(early_console == NULL)) {
|
|
||||||
early_console = &riscv_sbi_early_console_dev;
|
|
||||||
register_console(early_console);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_CMDLINE_BOOL
|
#ifdef CONFIG_CMDLINE_BOOL
|
||||||
#ifdef CONFIG_CMDLINE_OVERRIDE
|
#ifdef CONFIG_CMDLINE_OVERRIDE
|
||||||
strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
|
strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
|
||||||
|
|
|
@ -70,7 +70,7 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
|
||||||
bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
|
bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
|
||||||
|
|
||||||
/* Check the reserved flags. */
|
/* Check the reserved flags. */
|
||||||
if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL))
|
if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
flush_icache_mm(mm, local);
|
flush_icache_mm(mm, local);
|
||||||
|
|
|
@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud)
|
||||||
return pud;
|
return pud;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define pud_write pud_write
|
|
||||||
static inline int pud_write(pud_t pud)
|
|
||||||
{
|
|
||||||
return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline pud_t pud_mkclean(pud_t pud)
|
static inline pud_t pud_mkclean(pud_t pud)
|
||||||
{
|
{
|
||||||
if (pud_large(pud)) {
|
if (pud_large(pud)) {
|
||||||
|
|
|
@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
groups_sort(group_info);
|
||||||
retval = set_current_groups(group_info);
|
retval = set_current_groups(group_info);
|
||||||
put_group_info(group_info);
|
put_group_info(group_info);
|
||||||
|
|
||||||
|
|
|
@ -55,8 +55,7 @@ struct bpf_jit {
|
||||||
#define SEEN_LITERAL 8 /* code uses literals */
|
#define SEEN_LITERAL 8 /* code uses literals */
|
||||||
#define SEEN_FUNC 16 /* calls C functions */
|
#define SEEN_FUNC 16 /* calls C functions */
|
||||||
#define SEEN_TAIL_CALL 32 /* code uses tail calls */
|
#define SEEN_TAIL_CALL 32 /* code uses tail calls */
|
||||||
#define SEEN_SKB_CHANGE 64 /* code changes skb data */
|
#define SEEN_REG_AX 64 /* code uses constant blinding */
|
||||||
#define SEEN_REG_AX 128 /* code uses constant blinding */
|
|
||||||
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
|
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
|
||||||
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
|
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
|
||||||
REG_15, 152);
|
REG_15, 152);
|
||||||
}
|
}
|
||||||
if (jit->seen & SEEN_SKB)
|
if (jit->seen & SEEN_SKB) {
|
||||||
emit_load_skb_data_hlen(jit);
|
emit_load_skb_data_hlen(jit);
|
||||||
if (jit->seen & SEEN_SKB_CHANGE)
|
|
||||||
/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
|
/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
|
||||||
EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
|
EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
|
||||||
STK_OFF_SKBP);
|
STK_OFF_SKBP);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
|
||||||
EMIT2(0x0d00, REG_14, REG_W1);
|
EMIT2(0x0d00, REG_14, REG_W1);
|
||||||
/* lgr %b0,%r2: load return value into %b0 */
|
/* lgr %b0,%r2: load return value into %b0 */
|
||||||
EMIT4(0xb9040000, BPF_REG_0, REG_2);
|
EMIT4(0xb9040000, BPF_REG_0, REG_2);
|
||||||
if (bpf_helper_changes_pkt_data((void *)func)) {
|
if ((jit->seen & SEEN_SKB) &&
|
||||||
jit->seen |= SEEN_SKB_CHANGE;
|
bpf_helper_changes_pkt_data((void *)func)) {
|
||||||
/* lg %b1,ST_OFF_SKBP(%r15) */
|
/* lg %b1,ST_OFF_SKBP(%r15) */
|
||||||
EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
|
EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
|
||||||
REG_15, STK_OFF_SKBP);
|
REG_15, STK_OFF_SKBP);
|
||||||
|
|
|
@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
|
||||||
.previous
|
.previous
|
||||||
|
|
||||||
ENTRY(__arch_hweight64)
|
ENTRY(__arch_hweight64)
|
||||||
sethi %hi(__sw_hweight16), %g1
|
sethi %hi(__sw_hweight64), %g1
|
||||||
jmpl %g1 + %lo(__sw_hweight16), %g0
|
jmpl %g1 + %lo(__sw_hweight64), %g0
|
||||||
nop
|
nop
|
||||||
ENDPROC(__arch_hweight64)
|
ENDPROC(__arch_hweight64)
|
||||||
EXPORT_SYMBOL(__arch_hweight64)
|
EXPORT_SYMBOL(__arch_hweight64)
|
||||||
|
|
|
@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
|
||||||
if (!printk_ratelimit())
|
if (!printk_ratelimit())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
|
printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
|
||||||
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
||||||
tsk->comm, task_pid_nr(tsk), address,
|
tsk->comm, task_pid_nr(tsk), address,
|
||||||
(void *)regs->pc, (void *)regs->u_regs[UREG_I7],
|
(void *)regs->pc, (void *)regs->u_regs[UREG_I7],
|
||||||
|
|
|
@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
|
||||||
if (!printk_ratelimit())
|
if (!printk_ratelimit())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
|
printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
|
||||||
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
||||||
tsk->comm, task_pid_nr(tsk), address,
|
tsk->comm, task_pid_nr(tsk), address,
|
||||||
(void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
|
(void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
|
||||||
|
|
|
@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
|
||||||
if (!(pmd_val(pmd) & _PAGE_VALID))
|
if (!(pmd_val(pmd) & _PAGE_VALID))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (!pmd_access_permitted(pmd, write))
|
if (write && !pmd_write(pmd))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
refs = 0;
|
refs = 0;
|
||||||
|
@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
|
||||||
if (!(pud_val(pud) & _PAGE_VALID))
|
if (!(pud_val(pud) & _PAGE_VALID))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (!pud_access_permitted(pud, write))
|
if (write && !pud_write(pud))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
refs = 0;
|
refs = 0;
|
||||||
|
|
|
@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
||||||
u8 *func = ((u8 *)__bpf_call_base) + imm;
|
u8 *func = ((u8 *)__bpf_call_base) + imm;
|
||||||
|
|
||||||
ctx->saw_call = true;
|
ctx->saw_call = true;
|
||||||
|
if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
|
||||||
|
emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
|
||||||
|
|
||||||
emit_call((u32 *)func, ctx);
|
emit_call((u32 *)func, ctx);
|
||||||
emit_nop(ctx);
|
emit_nop(ctx);
|
||||||
|
|
||||||
emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
|
emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
|
||||||
|
|
||||||
if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
|
if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
|
||||||
load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
|
load_skb_regs(ctx, L7);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
generic-y += barrier.h
|
generic-y += barrier.h
|
||||||
|
generic-y += bpf_perf_event.h
|
||||||
generic-y += bug.h
|
generic-y += bug.h
|
||||||
generic-y += clkdev.h
|
generic-y += clkdev.h
|
||||||
generic-y += current.h
|
generic-y += current.h
|
||||||
|
|
|
@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
|
||||||
/*
|
/*
|
||||||
* Needed since we do not use the asm-generic/mm_hooks.h:
|
* Needed since we do not use the asm-generic/mm_hooks.h:
|
||||||
*/
|
*/
|
||||||
static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
uml_setup_stubs(mm);
|
uml_setup_stubs(mm);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
extern void arch_exit_mmap(struct mm_struct *mm);
|
extern void arch_exit_mmap(struct mm_struct *mm);
|
||||||
static inline void arch_unmap(struct mm_struct *mm,
|
static inline void arch_unmap(struct mm_struct *mm,
|
||||||
|
|
|
@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
|
||||||
if (!printk_ratelimit())
|
if (!printk_ratelimit())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
|
printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
|
||||||
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
||||||
tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
|
tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
|
||||||
(void *)UPT_IP(regs), (void *)UPT_SP(regs),
|
(void *)UPT_IP(regs), (void *)UPT_SP(regs),
|
||||||
|
|
|
@ -81,9 +81,10 @@ do { \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
static inline void arch_dup_mmap(struct mm_struct *oldmm,
|
static inline int arch_dup_mmap(struct mm_struct *oldmm,
|
||||||
struct mm_struct *mm)
|
struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void arch_unmap(struct mm_struct *mm,
|
static inline void arch_unmap(struct mm_struct *mm,
|
||||||
|
|
|
@ -926,7 +926,8 @@ config MAXSMP
|
||||||
config NR_CPUS
|
config NR_CPUS
|
||||||
int "Maximum number of CPUs" if SMP && !MAXSMP
|
int "Maximum number of CPUs" if SMP && !MAXSMP
|
||||||
range 2 8 if SMP && X86_32 && !X86_BIGSMP
|
range 2 8 if SMP && X86_32 && !X86_BIGSMP
|
||||||
range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
|
range 2 64 if SMP && X86_32 && X86_BIGSMP
|
||||||
|
range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
|
||||||
range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
|
range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
|
||||||
default "1" if !SMP
|
default "1" if !SMP
|
||||||
default "8192" if MAXSMP
|
default "8192" if MAXSMP
|
||||||
|
|
|
@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
|
||||||
config UNWINDER_GUESS
|
config UNWINDER_GUESS
|
||||||
bool "Guess unwinder"
|
bool "Guess unwinder"
|
||||||
depends on EXPERT
|
depends on EXPERT
|
||||||
|
depends on !STACKDEPOT
|
||||||
---help---
|
---help---
|
||||||
This option enables the "guess" unwinder for unwinding kernel stack
|
This option enables the "guess" unwinder for unwinding kernel stack
|
||||||
traces. It scans the stack and reports every kernel text address it
|
traces. It scans the stack and reports every kernel text address it
|
||||||
|
|
|
@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
|
||||||
ifdef CONFIG_X86_64
|
ifdef CONFIG_X86_64
|
||||||
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
|
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
|
||||||
vmlinux-objs-y += $(obj)/mem_encrypt.o
|
vmlinux-objs-y += $(obj)/mem_encrypt.o
|
||||||
|
vmlinux-objs-y += $(obj)/pgtable_64.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
|
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
|
||||||
|
|
|
@ -305,10 +305,18 @@ ENTRY(startup_64)
|
||||||
leaq boot_stack_end(%rbx), %rsp
|
leaq boot_stack_end(%rbx), %rsp
|
||||||
|
|
||||||
#ifdef CONFIG_X86_5LEVEL
|
#ifdef CONFIG_X86_5LEVEL
|
||||||
/* Check if 5-level paging has already enabled */
|
/*
|
||||||
movq %cr4, %rax
|
* Check if we need to enable 5-level paging.
|
||||||
testl $X86_CR4_LA57, %eax
|
* RSI holds real mode data and need to be preserved across
|
||||||
jnz lvl5
|
* a function call.
|
||||||
|
*/
|
||||||
|
pushq %rsi
|
||||||
|
call l5_paging_required
|
||||||
|
popq %rsi
|
||||||
|
|
||||||
|
/* If l5_paging_required() returned zero, we're done here. */
|
||||||
|
cmpq $0, %rax
|
||||||
|
je lvl5
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At this point we are in long mode with 4-level paging enabled,
|
* At this point we are in long mode with 4-level paging enabled,
|
||||||
|
|
|
@ -169,6 +169,16 @@ void __puthex(unsigned long value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool l5_supported(void)
|
||||||
|
{
|
||||||
|
/* Check if leaf 7 is supported. */
|
||||||
|
if (native_cpuid_eax(0) < 7)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Check if la57 is supported. */
|
||||||
|
return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
|
||||||
|
}
|
||||||
|
|
||||||
#if CONFIG_X86_NEED_RELOCS
|
#if CONFIG_X86_NEED_RELOCS
|
||||||
static void handle_relocations(void *output, unsigned long output_len,
|
static void handle_relocations(void *output, unsigned long output_len,
|
||||||
unsigned long virt_addr)
|
unsigned long virt_addr)
|
||||||
|
@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
|
||||||
console_init();
|
console_init();
|
||||||
debug_putstr("early console in extract_kernel\n");
|
debug_putstr("early console in extract_kernel\n");
|
||||||
|
|
||||||
|
if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
|
||||||
|
error("This linux kernel as configured requires 5-level paging\n"
|
||||||
|
"This CPU does not support the required 'cr4.la57' feature\n"
|
||||||
|
"Unable to boot - please use a kernel appropriate for your CPU\n");
|
||||||
|
}
|
||||||
|
|
||||||
free_mem_ptr = heap; /* Heap */
|
free_mem_ptr = heap; /* Heap */
|
||||||
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
|
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,9 @@
|
||||||
*/
|
*/
|
||||||
#undef CONFIG_AMD_MEM_ENCRYPT
|
#undef CONFIG_AMD_MEM_ENCRYPT
|
||||||
|
|
||||||
|
/* No PAGE_TABLE_ISOLATION support needed either: */
|
||||||
|
#undef CONFIG_PAGE_TABLE_ISOLATION
|
||||||
|
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
|
|
||||||
/* These actually do the work of building the kernel identity maps. */
|
/* These actually do the work of building the kernel identity maps. */
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
#include <asm/processor.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* __force_order is used by special_insns.h asm code to force instruction
|
||||||
|
* serialization.
|
||||||
|
*
|
||||||
|
* It is not referenced from the code, but GCC < 5 with -fPIE would fail
|
||||||
|
* due to an undefined symbol. Define it to make these ancient GCCs work.
|
||||||
|
*/
|
||||||
|
unsigned long __force_order;
|
||||||
|
|
||||||
|
int l5_paging_required(void)
|
||||||
|
{
|
||||||
|
/* Check if leaf 7 is supported. */
|
||||||
|
|
||||||
|
if (native_cpuid_eax(0) < 7)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Check if la57 is supported. */
|
||||||
|
if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Check if 5-level paging has already been enabled. */
|
||||||
|
if (native_read_cr4() & X86_CR4_LA57)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
|
@ -44,9 +44,9 @@ FDINITRD=$6
|
||||||
|
|
||||||
# Make sure the files actually exist
|
# Make sure the files actually exist
|
||||||
verify "$FBZIMAGE"
|
verify "$FBZIMAGE"
|
||||||
verify "$MTOOLSRC"
|
|
||||||
|
|
||||||
genbzdisk() {
|
genbzdisk() {
|
||||||
|
verify "$MTOOLSRC"
|
||||||
mformat a:
|
mformat a:
|
||||||
syslinux $FIMAGE
|
syslinux $FIMAGE
|
||||||
echo "$KCMDLINE" | mcopy - a:syslinux.cfg
|
echo "$KCMDLINE" | mcopy - a:syslinux.cfg
|
||||||
|
@ -57,6 +57,7 @@ genbzdisk() {
|
||||||
}
|
}
|
||||||
|
|
||||||
genfdimage144() {
|
genfdimage144() {
|
||||||
|
verify "$MTOOLSRC"
|
||||||
dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
|
dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
|
||||||
mformat v:
|
mformat v:
|
||||||
syslinux $FIMAGE
|
syslinux $FIMAGE
|
||||||
|
@ -68,6 +69,7 @@ genfdimage144() {
|
||||||
}
|
}
|
||||||
|
|
||||||
genfdimage288() {
|
genfdimage288() {
|
||||||
|
verify "$MTOOLSRC"
|
||||||
dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
|
dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
|
||||||
mformat w:
|
mformat w:
|
||||||
syslinux $FIMAGE
|
syslinux $FIMAGE
|
||||||
|
@ -78,39 +80,43 @@ genfdimage288() {
|
||||||
mcopy $FBZIMAGE w:linux
|
mcopy $FBZIMAGE w:linux
|
||||||
}
|
}
|
||||||
|
|
||||||
genisoimage() {
|
geniso() {
|
||||||
tmp_dir=`dirname $FIMAGE`/isoimage
|
tmp_dir=`dirname $FIMAGE`/isoimage
|
||||||
rm -rf $tmp_dir
|
rm -rf $tmp_dir
|
||||||
mkdir $tmp_dir
|
mkdir $tmp_dir
|
||||||
for i in lib lib64 share end ; do
|
for i in lib lib64 share ; do
|
||||||
for j in syslinux ISOLINUX ; do
|
for j in syslinux ISOLINUX ; do
|
||||||
if [ -f /usr/$i/$j/isolinux.bin ] ; then
|
if [ -f /usr/$i/$j/isolinux.bin ] ; then
|
||||||
isolinux=/usr/$i/$j/isolinux.bin
|
isolinux=/usr/$i/$j/isolinux.bin
|
||||||
cp $isolinux $tmp_dir
|
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
for j in syslinux syslinux/modules/bios ; do
|
for j in syslinux syslinux/modules/bios ; do
|
||||||
if [ -f /usr/$i/$j/ldlinux.c32 ]; then
|
if [ -f /usr/$i/$j/ldlinux.c32 ]; then
|
||||||
ldlinux=/usr/$i/$j/ldlinux.c32
|
ldlinux=/usr/$i/$j/ldlinux.c32
|
||||||
cp $ldlinux $tmp_dir
|
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
|
if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
if [ $i = end -a -z "$isolinux" ] ; then
|
|
||||||
echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
done
|
||||||
|
if [ -z "$isolinux" ] ; then
|
||||||
|
echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ -z "$ldlinux" ] ; then
|
||||||
|
echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
cp $isolinux $tmp_dir
|
||||||
|
cp $ldlinux $tmp_dir
|
||||||
cp $FBZIMAGE $tmp_dir/linux
|
cp $FBZIMAGE $tmp_dir/linux
|
||||||
echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
|
echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
|
||||||
if [ -f "$FDINITRD" ] ; then
|
if [ -f "$FDINITRD" ] ; then
|
||||||
cp "$FDINITRD" $tmp_dir/initrd.img
|
cp "$FDINITRD" $tmp_dir/initrd.img
|
||||||
fi
|
fi
|
||||||
mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \
|
genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
|
||||||
-c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \
|
-b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
|
||||||
$tmp_dir
|
-boot-info-table $tmp_dir
|
||||||
isohybrid $FIMAGE 2>/dev/null || true
|
isohybrid $FIMAGE 2>/dev/null || true
|
||||||
rm -rf $tmp_dir
|
rm -rf $tmp_dir
|
||||||
}
|
}
|
||||||
|
@ -119,6 +125,6 @@ case $1 in
|
||||||
bzdisk) genbzdisk;;
|
bzdisk) genbzdisk;;
|
||||||
fdimage144) genfdimage144;;
|
fdimage144) genfdimage144;;
|
||||||
fdimage288) genfdimage288;;
|
fdimage288) genfdimage288;;
|
||||||
isoimage) genisoimage;;
|
isoimage) geniso;;
|
||||||
*) echo 'Unknown image format'; exit 1;
|
*) echo 'Unknown image format'; exit 1;
|
||||||
esac
|
esac
|
||||||
|
|
|
@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
|
||||||
|
|
||||||
salsa20_ivsetup(ctx, walk.iv);
|
salsa20_ivsetup(ctx, walk.iv);
|
||||||
|
|
||||||
if (likely(walk.nbytes == nbytes))
|
|
||||||
{
|
|
||||||
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
|
|
||||||
walk.dst.virt.addr, nbytes);
|
|
||||||
return blkcipher_walk_done(desc, &walk, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (walk.nbytes >= 64) {
|
while (walk.nbytes >= 64) {
|
||||||
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
|
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
|
||||||
walk.dst.virt.addr,
|
walk.dst.virt.addr,
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
#include <linux/jump_label.h>
|
#include <linux/jump_label.h>
|
||||||
#include <asm/unwind_hints.h>
|
#include <asm/unwind_hints.h>
|
||||||
|
#include <asm/cpufeatures.h>
|
||||||
|
#include <asm/page_types.h>
|
||||||
|
#include <asm/percpu.h>
|
||||||
|
#include <asm/asm-offsets.h>
|
||||||
|
#include <asm/processor-flags.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
|
@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with
|
||||||
#endif
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
|
||||||
|
* halves:
|
||||||
|
*/
|
||||||
|
#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
|
||||||
|
#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
|
||||||
|
|
||||||
|
.macro SET_NOFLUSH_BIT reg:req
|
||||||
|
bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro ADJUST_KERNEL_CR3 reg:req
|
||||||
|
ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
|
||||||
|
/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
|
||||||
|
andq $(~PTI_SWITCH_MASK), \reg
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
|
||||||
|
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
|
||||||
|
mov %cr3, \scratch_reg
|
||||||
|
ADJUST_KERNEL_CR3 \scratch_reg
|
||||||
|
mov \scratch_reg, %cr3
|
||||||
|
.Lend_\@:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#define THIS_CPU_user_pcid_flush_mask \
|
||||||
|
PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
|
||||||
|
|
||||||
|
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
|
||||||
|
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
|
||||||
|
mov %cr3, \scratch_reg
|
||||||
|
|
||||||
|
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test if the ASID needs a flush.
|
||||||
|
*/
|
||||||
|
movq \scratch_reg, \scratch_reg2
|
||||||
|
andq $(0x7FF), \scratch_reg /* mask ASID */
|
||||||
|
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||||
|
jnc .Lnoflush_\@
|
||||||
|
|
||||||
|
/* Flush needed, clear the bit */
|
||||||
|
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||||
|
movq \scratch_reg2, \scratch_reg
|
||||||
|
jmp .Lwrcr3_\@
|
||||||
|
|
||||||
|
.Lnoflush_\@:
|
||||||
|
movq \scratch_reg2, \scratch_reg
|
||||||
|
SET_NOFLUSH_BIT \scratch_reg
|
||||||
|
|
||||||
|
.Lwrcr3_\@:
|
||||||
|
/* Flip the PGD and ASID to the user version */
|
||||||
|
orq $(PTI_SWITCH_MASK), \scratch_reg
|
||||||
|
mov \scratch_reg, %cr3
|
||||||
|
.Lend_\@:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
|
||||||
|
pushq %rax
|
||||||
|
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
|
||||||
|
popq %rax
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
|
||||||
|
ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
|
||||||
|
movq %cr3, \scratch_reg
|
||||||
|
movq \scratch_reg, \save_reg
|
||||||
|
/*
|
||||||
|
* Is the "switch mask" all zero? That means that both of
|
||||||
|
* these are zero:
|
||||||
|
*
|
||||||
|
* 1. The user/kernel PCID bit, and
|
||||||
|
* 2. The user/kernel "bit" that points CR3 to the
|
||||||
|
* bottom half of the 8k PGD
|
||||||
|
*
|
||||||
|
* That indicates a kernel CR3 value, not a user CR3.
|
||||||
|
*/
|
||||||
|
testq $(PTI_SWITCH_MASK), \scratch_reg
|
||||||
|
jz .Ldone_\@
|
||||||
|
|
||||||
|
ADJUST_KERNEL_CR3 \scratch_reg
|
||||||
|
movq \scratch_reg, %cr3
|
||||||
|
|
||||||
|
.Ldone_\@:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro RESTORE_CR3 scratch_reg:req save_reg:req
|
||||||
|
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
|
||||||
|
|
||||||
|
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
|
||||||
|
|
||||||
|
/*
|
||||||
|
* KERNEL pages can always resume with NOFLUSH as we do
|
||||||
|
* explicit flushes.
|
||||||
|
*/
|
||||||
|
bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
|
||||||
|
jnc .Lnoflush_\@
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if there's a pending flush for the user ASID we're
|
||||||
|
* about to set.
|
||||||
|
*/
|
||||||
|
movq \save_reg, \scratch_reg
|
||||||
|
andq $(0x7FF), \scratch_reg
|
||||||
|
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||||
|
jnc .Lnoflush_\@
|
||||||
|
|
||||||
|
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||||
|
jmp .Lwrcr3_\@
|
||||||
|
|
||||||
|
.Lnoflush_\@:
|
||||||
|
SET_NOFLUSH_BIT \save_reg
|
||||||
|
|
||||||
|
.Lwrcr3_\@:
|
||||||
|
/*
|
||||||
|
* The CR3 write could be avoided when not changing its value,
|
||||||
|
* but would require a CR3 read *and* a scratch register.
|
||||||
|
*/
|
||||||
|
movq \save_reg, %cr3
|
||||||
|
.Lend_\@:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
|
||||||
|
|
||||||
|
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
|
||||||
|
.endm
|
||||||
|
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
|
||||||
|
.endm
|
||||||
|
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
|
||||||
|
.endm
|
||||||
|
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
|
||||||
|
.endm
|
||||||
|
.macro RESTORE_CR3 scratch_reg:req save_reg:req
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* CONFIG_X86_64 */
|
#endif /* CONFIG_X86_64 */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -941,9 +941,10 @@ ENTRY(debug)
|
||||||
movl %esp, %eax # pt_regs pointer
|
movl %esp, %eax # pt_regs pointer
|
||||||
|
|
||||||
/* Are we currently on the SYSENTER stack? */
|
/* Are we currently on the SYSENTER stack? */
|
||||||
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
|
movl PER_CPU_VAR(cpu_entry_area), %ecx
|
||||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
|
||||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
|
||||||
|
cmpl $SIZEOF_entry_stack, %ecx
|
||||||
jb .Ldebug_from_sysenter_stack
|
jb .Ldebug_from_sysenter_stack
|
||||||
|
|
||||||
TRACE_IRQS_OFF
|
TRACE_IRQS_OFF
|
||||||
|
@ -984,9 +985,10 @@ ENTRY(nmi)
|
||||||
movl %esp, %eax # pt_regs pointer
|
movl %esp, %eax # pt_regs pointer
|
||||||
|
|
||||||
/* Are we currently on the SYSENTER stack? */
|
/* Are we currently on the SYSENTER stack? */
|
||||||
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
|
movl PER_CPU_VAR(cpu_entry_area), %ecx
|
||||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
|
||||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
|
||||||
|
cmpl $SIZEOF_entry_stack, %ecx
|
||||||
jb .Lnmi_from_sysenter_stack
|
jb .Lnmi_from_sysenter_stack
|
||||||
|
|
||||||
/* Not on SYSENTER stack. */
|
/* Not on SYSENTER stack. */
|
||||||
|
|
|
@ -23,7 +23,6 @@
|
||||||
#include <asm/segment.h>
|
#include <asm/segment.h>
|
||||||
#include <asm/cache.h>
|
#include <asm/cache.h>
|
||||||
#include <asm/errno.h>
|
#include <asm/errno.h>
|
||||||
#include "calling.h"
|
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
#include <asm/msr.h>
|
#include <asm/msr.h>
|
||||||
#include <asm/unistd.h>
|
#include <asm/unistd.h>
|
||||||
|
@ -40,6 +39,8 @@
|
||||||
#include <asm/frame.h>
|
#include <asm/frame.h>
|
||||||
#include <linux/err.h>
|
#include <linux/err.h>
|
||||||
|
|
||||||
|
#include "calling.h"
|
||||||
|
|
||||||
.code64
|
.code64
|
||||||
.section .entry.text, "ax"
|
.section .entry.text, "ax"
|
||||||
|
|
||||||
|
@ -140,6 +141,67 @@ END(native_usergs_sysret64)
|
||||||
* with them due to bugs in both AMD and Intel CPUs.
|
* with them due to bugs in both AMD and Intel CPUs.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
.pushsection .entry_trampoline, "ax"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The code in here gets remapped into cpu_entry_area's trampoline. This means
|
||||||
|
* that the assembler and linker have the wrong idea as to where this code
|
||||||
|
* lives (and, in fact, it's mapped more than once, so it's not even at a
|
||||||
|
* fixed address). So we can't reference any symbols outside the entry
|
||||||
|
* trampoline and expect it to work.
|
||||||
|
*
|
||||||
|
* Instead, we carefully abuse %rip-relative addressing.
|
||||||
|
* _entry_trampoline(%rip) refers to the start of the remapped) entry
|
||||||
|
* trampoline. We can thus find cpu_entry_area with this macro:
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define CPU_ENTRY_AREA \
|
||||||
|
_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
|
||||||
|
|
||||||
|
/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
|
||||||
|
#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
|
||||||
|
SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
|
||||||
|
|
||||||
|
ENTRY(entry_SYSCALL_64_trampoline)
|
||||||
|
UNWIND_HINT_EMPTY
|
||||||
|
swapgs
|
||||||
|
|
||||||
|
/* Stash the user RSP. */
|
||||||
|
movq %rsp, RSP_SCRATCH
|
||||||
|
|
||||||
|
/* Note: using %rsp as a scratch reg. */
|
||||||
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
|
||||||
|
|
||||||
|
/* Load the top of the task stack into RSP */
|
||||||
|
movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
|
||||||
|
|
||||||
|
/* Start building the simulated IRET frame. */
|
||||||
|
pushq $__USER_DS /* pt_regs->ss */
|
||||||
|
pushq RSP_SCRATCH /* pt_regs->sp */
|
||||||
|
pushq %r11 /* pt_regs->flags */
|
||||||
|
pushq $__USER_CS /* pt_regs->cs */
|
||||||
|
pushq %rcx /* pt_regs->ip */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* x86 lacks a near absolute jump, and we can't jump to the real
|
||||||
|
* entry text with a relative jump. We could push the target
|
||||||
|
* address and then use retq, but this destroys the pipeline on
|
||||||
|
* many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
|
||||||
|
* spill RDI and restore it in a second-stage trampoline.
|
||||||
|
*/
|
||||||
|
pushq %rdi
|
||||||
|
movq $entry_SYSCALL_64_stage2, %rdi
|
||||||
|
jmp *%rdi
|
||||||
|
END(entry_SYSCALL_64_trampoline)
|
||||||
|
|
||||||
|
.popsection
|
||||||
|
|
||||||
|
ENTRY(entry_SYSCALL_64_stage2)
|
||||||
|
UNWIND_HINT_EMPTY
|
||||||
|
popq %rdi
|
||||||
|
jmp entry_SYSCALL_64_after_hwframe
|
||||||
|
END(entry_SYSCALL_64_stage2)
|
||||||
|
|
||||||
ENTRY(entry_SYSCALL_64)
|
ENTRY(entry_SYSCALL_64)
|
||||||
UNWIND_HINT_EMPTY
|
UNWIND_HINT_EMPTY
|
||||||
/*
|
/*
|
||||||
|
@ -149,6 +211,10 @@ ENTRY(entry_SYSCALL_64)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
swapgs
|
swapgs
|
||||||
|
/*
|
||||||
|
* This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
|
||||||
|
* is not required to switch CR3.
|
||||||
|
*/
|
||||||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||||
|
|
||||||
|
@ -330,8 +396,25 @@ syscall_return_via_sysret:
|
||||||
popq %rsi /* skip rcx */
|
popq %rsi /* skip rcx */
|
||||||
popq %rdx
|
popq %rdx
|
||||||
popq %rsi
|
popq %rsi
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now all regs are restored except RSP and RDI.
|
||||||
|
* Save old stack pointer and switch to trampoline stack.
|
||||||
|
*/
|
||||||
|
movq %rsp, %rdi
|
||||||
|
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
|
||||||
|
|
||||||
|
pushq RSP-RDI(%rdi) /* RSP */
|
||||||
|
pushq (%rdi) /* RDI */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We are on the trampoline stack. All regs except RDI are live.
|
||||||
|
* We can do future final exit work right here.
|
||||||
|
*/
|
||||||
|
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
|
||||||
|
|
||||||
popq %rdi
|
popq %rdi
|
||||||
movq RSP-ORIG_RAX(%rsp), %rsp
|
popq %rsp
|
||||||
USERGS_SYSRET64
|
USERGS_SYSRET64
|
||||||
END(entry_SYSCALL_64)
|
END(entry_SYSCALL_64)
|
||||||
|
|
||||||
|
@ -466,12 +549,13 @@ END(irq_entries_start)
|
||||||
|
|
||||||
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
|
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||||
#ifdef CONFIG_DEBUG_ENTRY
|
#ifdef CONFIG_DEBUG_ENTRY
|
||||||
pushfq
|
pushq %rax
|
||||||
testl $X86_EFLAGS_IF, (%rsp)
|
SAVE_FLAGS(CLBR_RAX)
|
||||||
|
testl $X86_EFLAGS_IF, %eax
|
||||||
jz .Lokay_\@
|
jz .Lokay_\@
|
||||||
ud2
|
ud2
|
||||||
.Lokay_\@:
|
.Lokay_\@:
|
||||||
addq $8, %rsp
|
popq %rax
|
||||||
#endif
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -563,6 +647,13 @@ END(irq_entries_start)
|
||||||
/* 0(%rsp): ~(interrupt number) */
|
/* 0(%rsp): ~(interrupt number) */
|
||||||
.macro interrupt func
|
.macro interrupt func
|
||||||
cld
|
cld
|
||||||
|
|
||||||
|
testb $3, CS-ORIG_RAX(%rsp)
|
||||||
|
jz 1f
|
||||||
|
SWAPGS
|
||||||
|
call switch_to_thread_stack
|
||||||
|
1:
|
||||||
|
|
||||||
ALLOC_PT_GPREGS_ON_STACK
|
ALLOC_PT_GPREGS_ON_STACK
|
||||||
SAVE_C_REGS
|
SAVE_C_REGS
|
||||||
SAVE_EXTRA_REGS
|
SAVE_EXTRA_REGS
|
||||||
|
@ -572,12 +663,8 @@ END(irq_entries_start)
|
||||||
jz 1f
|
jz 1f
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IRQ from user mode. Switch to kernel gsbase and inform context
|
* IRQ from user mode.
|
||||||
* tracking that we're in kernel mode.
|
*
|
||||||
*/
|
|
||||||
SWAPGS
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to tell lockdep that IRQs are off. We can't do this until
|
* We need to tell lockdep that IRQs are off. We can't do this until
|
||||||
* we fix gsbase, and we should do it before enter_from_user_mode
|
* we fix gsbase, and we should do it before enter_from_user_mode
|
||||||
* (which can take locks). Since TRACE_IRQS_OFF idempotent,
|
* (which can take locks). Since TRACE_IRQS_OFF idempotent,
|
||||||
|
@ -630,10 +717,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
|
||||||
ud2
|
ud2
|
||||||
1:
|
1:
|
||||||
#endif
|
#endif
|
||||||
SWAPGS
|
|
||||||
POP_EXTRA_REGS
|
POP_EXTRA_REGS
|
||||||
POP_C_REGS
|
popq %r11
|
||||||
addq $8, %rsp /* skip regs->orig_ax */
|
popq %r10
|
||||||
|
popq %r9
|
||||||
|
popq %r8
|
||||||
|
popq %rax
|
||||||
|
popq %rcx
|
||||||
|
popq %rdx
|
||||||
|
popq %rsi
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
|
||||||
|
* Save old stack pointer and switch to trampoline stack.
|
||||||
|
*/
|
||||||
|
movq %rsp, %rdi
|
||||||
|
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
|
||||||
|
|
||||||
|
/* Copy the IRET frame to the trampoline stack. */
|
||||||
|
pushq 6*8(%rdi) /* SS */
|
||||||
|
pushq 5*8(%rdi) /* RSP */
|
||||||
|
pushq 4*8(%rdi) /* EFLAGS */
|
||||||
|
pushq 3*8(%rdi) /* CS */
|
||||||
|
pushq 2*8(%rdi) /* RIP */
|
||||||
|
|
||||||
|
/* Push user RDI on the trampoline stack. */
|
||||||
|
pushq (%rdi)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We are on the trampoline stack. All regs except RDI are live.
|
||||||
|
* We can do future final exit work right here.
|
||||||
|
*/
|
||||||
|
|
||||||
|
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
|
||||||
|
|
||||||
|
/* Restore RDI. */
|
||||||
|
popq %rdi
|
||||||
|
SWAPGS
|
||||||
INTERRUPT_RETURN
|
INTERRUPT_RETURN
|
||||||
|
|
||||||
|
|
||||||
|
@ -713,7 +833,9 @@ native_irq_return_ldt:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
pushq %rdi /* Stash user RDI */
|
pushq %rdi /* Stash user RDI */
|
||||||
SWAPGS
|
SWAPGS /* to kernel GS */
|
||||||
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
|
||||||
|
|
||||||
movq PER_CPU_VAR(espfix_waddr), %rdi
|
movq PER_CPU_VAR(espfix_waddr), %rdi
|
||||||
movq %rax, (0*8)(%rdi) /* user RAX */
|
movq %rax, (0*8)(%rdi) /* user RAX */
|
||||||
movq (1*8)(%rsp), %rax /* user RIP */
|
movq (1*8)(%rsp), %rax /* user RIP */
|
||||||
|
@ -729,7 +851,6 @@ native_irq_return_ldt:
|
||||||
/* Now RAX == RSP. */
|
/* Now RAX == RSP. */
|
||||||
|
|
||||||
andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
|
andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
|
||||||
popq %rdi /* Restore user RDI */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* espfix_stack[31:16] == 0. The page tables are set up such that
|
* espfix_stack[31:16] == 0. The page tables are set up such that
|
||||||
|
@ -740,7 +861,11 @@ native_irq_return_ldt:
|
||||||
* still points to an RO alias of the ESPFIX stack.
|
* still points to an RO alias of the ESPFIX stack.
|
||||||
*/
|
*/
|
||||||
orq PER_CPU_VAR(espfix_stack), %rax
|
orq PER_CPU_VAR(espfix_stack), %rax
|
||||||
SWAPGS
|
|
||||||
|
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
|
||||||
|
SWAPGS /* to user GS */
|
||||||
|
popq %rdi /* Restore user RDI */
|
||||||
|
|
||||||
movq %rax, %rsp
|
movq %rax, %rsp
|
||||||
UNWIND_HINT_IRET_REGS offset=8
|
UNWIND_HINT_IRET_REGS offset=8
|
||||||
|
|
||||||
|
@ -829,7 +954,35 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||||
/*
|
/*
|
||||||
* Exception entry points.
|
* Exception entry points.
|
||||||
*/
|
*/
|
||||||
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
|
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Switch to the thread stack. This is called with the IRET frame and
|
||||||
|
* orig_ax on the stack. (That is, RDI..R12 are not on the stack and
|
||||||
|
* space has not been allocated for them.)
|
||||||
|
*/
|
||||||
|
ENTRY(switch_to_thread_stack)
|
||||||
|
UNWIND_HINT_FUNC
|
||||||
|
|
||||||
|
pushq %rdi
|
||||||
|
/* Need to switch before accessing the thread stack. */
|
||||||
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
|
||||||
|
movq %rsp, %rdi
|
||||||
|
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||||
|
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
|
||||||
|
|
||||||
|
pushq 7*8(%rdi) /* regs->ss */
|
||||||
|
pushq 6*8(%rdi) /* regs->rsp */
|
||||||
|
pushq 5*8(%rdi) /* regs->eflags */
|
||||||
|
pushq 4*8(%rdi) /* regs->cs */
|
||||||
|
pushq 3*8(%rdi) /* regs->ip */
|
||||||
|
pushq 2*8(%rdi) /* regs->orig_ax */
|
||||||
|
pushq 8(%rdi) /* return address */
|
||||||
|
UNWIND_HINT_FUNC
|
||||||
|
|
||||||
|
movq (%rdi), %rdi
|
||||||
|
ret
|
||||||
|
END(switch_to_thread_stack)
|
||||||
|
|
||||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
||||||
ENTRY(\sym)
|
ENTRY(\sym)
|
||||||
|
@ -848,11 +1001,12 @@ ENTRY(\sym)
|
||||||
|
|
||||||
ALLOC_PT_GPREGS_ON_STACK
|
ALLOC_PT_GPREGS_ON_STACK
|
||||||
|
|
||||||
.if \paranoid
|
.if \paranoid < 2
|
||||||
.if \paranoid == 1
|
|
||||||
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
|
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
|
||||||
jnz 1f
|
jnz .Lfrom_usermode_switch_stack_\@
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
|
.if \paranoid
|
||||||
call paranoid_entry
|
call paranoid_entry
|
||||||
.else
|
.else
|
||||||
call error_entry
|
call error_entry
|
||||||
|
@ -894,20 +1048,15 @@ ENTRY(\sym)
|
||||||
jmp error_exit
|
jmp error_exit
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
.if \paranoid == 1
|
.if \paranoid < 2
|
||||||
/*
|
/*
|
||||||
* Paranoid entry from userspace. Switch stacks and treat it
|
* Entry from userspace. Switch stacks and treat it
|
||||||
* as a normal entry. This means that paranoid handlers
|
* as a normal entry. This means that paranoid handlers
|
||||||
* run in real process context if user_mode(regs).
|
* run in real process context if user_mode(regs).
|
||||||
*/
|
*/
|
||||||
1:
|
.Lfrom_usermode_switch_stack_\@:
|
||||||
call error_entry
|
call error_entry
|
||||||
|
|
||||||
|
|
||||||
movq %rsp, %rdi /* pt_regs pointer */
|
|
||||||
call sync_regs
|
|
||||||
movq %rax, %rsp /* switch stack */
|
|
||||||
|
|
||||||
movq %rsp, %rdi /* pt_regs pointer */
|
movq %rsp, %rdi /* pt_regs pointer */
|
||||||
|
|
||||||
.if \has_error_code
|
.if \has_error_code
|
||||||
|
@ -1119,7 +1268,11 @@ ENTRY(paranoid_entry)
|
||||||
js 1f /* negative -> in kernel */
|
js 1f /* negative -> in kernel */
|
||||||
SWAPGS
|
SWAPGS
|
||||||
xorl %ebx, %ebx
|
xorl %ebx, %ebx
|
||||||
1: ret
|
|
||||||
|
1:
|
||||||
|
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
|
||||||
|
|
||||||
|
ret
|
||||||
END(paranoid_entry)
|
END(paranoid_entry)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1141,6 +1294,7 @@ ENTRY(paranoid_exit)
|
||||||
testl %ebx, %ebx /* swapgs needed? */
|
testl %ebx, %ebx /* swapgs needed? */
|
||||||
jnz .Lparanoid_exit_no_swapgs
|
jnz .Lparanoid_exit_no_swapgs
|
||||||
TRACE_IRQS_IRETQ
|
TRACE_IRQS_IRETQ
|
||||||
|
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
|
||||||
SWAPGS_UNSAFE_STACK
|
SWAPGS_UNSAFE_STACK
|
||||||
jmp .Lparanoid_exit_restore
|
jmp .Lparanoid_exit_restore
|
||||||
.Lparanoid_exit_no_swapgs:
|
.Lparanoid_exit_no_swapgs:
|
||||||
|
@ -1168,8 +1322,18 @@ ENTRY(error_entry)
|
||||||
* from user mode due to an IRET fault.
|
* from user mode due to an IRET fault.
|
||||||
*/
|
*/
|
||||||
SWAPGS
|
SWAPGS
|
||||||
|
/* We have user CR3. Change to kernel CR3. */
|
||||||
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
||||||
|
|
||||||
.Lerror_entry_from_usermode_after_swapgs:
|
.Lerror_entry_from_usermode_after_swapgs:
|
||||||
|
/* Put us onto the real thread stack. */
|
||||||
|
popq %r12 /* save return addr in %12 */
|
||||||
|
movq %rsp, %rdi /* arg0 = pt_regs pointer */
|
||||||
|
call sync_regs
|
||||||
|
movq %rax, %rsp /* switch stack */
|
||||||
|
ENCODE_FRAME_POINTER
|
||||||
|
pushq %r12
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to tell lockdep that IRQs are off. We can't do this until
|
* We need to tell lockdep that IRQs are off. We can't do this until
|
||||||
* we fix gsbase, and we should do it before enter_from_user_mode
|
* we fix gsbase, and we should do it before enter_from_user_mode
|
||||||
|
@ -1206,6 +1370,7 @@ ENTRY(error_entry)
|
||||||
* .Lgs_change's error handler with kernel gsbase.
|
* .Lgs_change's error handler with kernel gsbase.
|
||||||
*/
|
*/
|
||||||
SWAPGS
|
SWAPGS
|
||||||
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
||||||
jmp .Lerror_entry_done
|
jmp .Lerror_entry_done
|
||||||
|
|
||||||
.Lbstep_iret:
|
.Lbstep_iret:
|
||||||
|
@ -1215,10 +1380,11 @@ ENTRY(error_entry)
|
||||||
|
|
||||||
.Lerror_bad_iret:
|
.Lerror_bad_iret:
|
||||||
/*
|
/*
|
||||||
* We came from an IRET to user mode, so we have user gsbase.
|
* We came from an IRET to user mode, so we have user
|
||||||
* Switch to kernel gsbase:
|
* gsbase and CR3. Switch to kernel gsbase and CR3:
|
||||||
*/
|
*/
|
||||||
SWAPGS
|
SWAPGS
|
||||||
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Pretend that the exception came from user mode: set up pt_regs
|
* Pretend that the exception came from user mode: set up pt_regs
|
||||||
|
@ -1250,6 +1416,10 @@ END(error_exit)
|
||||||
/*
|
/*
|
||||||
* Runs on exception stack. Xen PV does not go through this path at all,
|
* Runs on exception stack. Xen PV does not go through this path at all,
|
||||||
* so we can use real assembly here.
|
* so we can use real assembly here.
|
||||||
|
*
|
||||||
|
* Registers:
|
||||||
|
* %r14: Used to save/restore the CR3 of the interrupted context
|
||||||
|
* when PAGE_TABLE_ISOLATION is in use. Do not clobber.
|
||||||
*/
|
*/
|
||||||
ENTRY(nmi)
|
ENTRY(nmi)
|
||||||
UNWIND_HINT_IRET_REGS
|
UNWIND_HINT_IRET_REGS
|
||||||
|
@ -1313,6 +1483,7 @@ ENTRY(nmi)
|
||||||
|
|
||||||
swapgs
|
swapgs
|
||||||
cld
|
cld
|
||||||
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
|
||||||
movq %rsp, %rdx
|
movq %rsp, %rdx
|
||||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||||
UNWIND_HINT_IRET_REGS base=%rdx offset=8
|
UNWIND_HINT_IRET_REGS base=%rdx offset=8
|
||||||
|
@ -1565,6 +1736,8 @@ end_repeat_nmi:
|
||||||
movq $-1, %rsi
|
movq $-1, %rsi
|
||||||
call do_nmi
|
call do_nmi
|
||||||
|
|
||||||
|
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
|
||||||
|
|
||||||
testl %ebx, %ebx /* swapgs needed? */
|
testl %ebx, %ebx /* swapgs needed? */
|
||||||
jnz nmi_restore
|
jnz nmi_restore
|
||||||
nmi_swapgs:
|
nmi_swapgs:
|
||||||
|
|
|
@ -48,7 +48,11 @@
|
||||||
*/
|
*/
|
||||||
ENTRY(entry_SYSENTER_compat)
|
ENTRY(entry_SYSENTER_compat)
|
||||||
/* Interrupts are off on entry. */
|
/* Interrupts are off on entry. */
|
||||||
SWAPGS_UNSAFE_STACK
|
SWAPGS
|
||||||
|
|
||||||
|
/* We are about to clobber %rsp anyway, clobbering here is OK */
|
||||||
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
|
||||||
|
|
||||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -215,6 +219,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
|
||||||
pushq $0 /* pt_regs->r14 = 0 */
|
pushq $0 /* pt_regs->r14 = 0 */
|
||||||
pushq $0 /* pt_regs->r15 = 0 */
|
pushq $0 /* pt_regs->r15 = 0 */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We just saved %rdi so it is safe to clobber. It is not
|
||||||
|
* preserved during the C calls inside TRACE_IRQS_OFF anyway.
|
||||||
|
*/
|
||||||
|
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* User mode is traced as though IRQs are on, and SYSENTER
|
* User mode is traced as though IRQs are on, and SYSENTER
|
||||||
* turned them off.
|
* turned them off.
|
||||||
|
@ -256,10 +266,22 @@ sysret32_from_system_call:
|
||||||
* when the system call started, which is already known to user
|
* when the system call started, which is already known to user
|
||||||
* code. We zero R8-R10 to avoid info leaks.
|
* code. We zero R8-R10 to avoid info leaks.
|
||||||
*/
|
*/
|
||||||
|
movq RSP-ORIG_RAX(%rsp), %rsp
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
|
||||||
|
* on the process stack which is not mapped to userspace and
|
||||||
|
* not readable after we SWITCH_TO_USER_CR3. Delay the CR3
|
||||||
|
* switch until after after the last reference to the process
|
||||||
|
* stack.
|
||||||
|
*
|
||||||
|
* %r8/%r9 are zeroed before the sysret, thus safe to clobber.
|
||||||
|
*/
|
||||||
|
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
|
||||||
|
|
||||||
xorq %r8, %r8
|
xorq %r8, %r8
|
||||||
xorq %r9, %r9
|
xorq %r9, %r9
|
||||||
xorq %r10, %r10
|
xorq %r10, %r10
|
||||||
movq RSP-ORIG_RAX(%rsp), %rsp
|
|
||||||
swapgs
|
swapgs
|
||||||
sysretl
|
sysretl
|
||||||
END(entry_SYSCALL_compat)
|
END(entry_SYSCALL_compat)
|
||||||
|
@ -306,8 +328,11 @@ ENTRY(entry_INT80_compat)
|
||||||
*/
|
*/
|
||||||
movl %eax, %eax
|
movl %eax, %eax
|
||||||
|
|
||||||
/* Construct struct pt_regs on stack (iret frame is already on stack) */
|
|
||||||
pushq %rax /* pt_regs->orig_ax */
|
pushq %rax /* pt_regs->orig_ax */
|
||||||
|
|
||||||
|
/* switch to thread stack expects orig_ax to be pushed */
|
||||||
|
call switch_to_thread_stack
|
||||||
|
|
||||||
pushq %rdi /* pt_regs->di */
|
pushq %rdi /* pt_regs->di */
|
||||||
pushq %rsi /* pt_regs->si */
|
pushq %rsi /* pt_regs->si */
|
||||||
pushq %rdx /* pt_regs->dx */
|
pushq %rdx /* pt_regs->dx */
|
||||||
|
|
|
@ -37,6 +37,7 @@
|
||||||
#include <asm/unistd.h>
|
#include <asm/unistd.h>
|
||||||
#include <asm/fixmap.h>
|
#include <asm/fixmap.h>
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
|
#include <asm/paravirt.h>
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include "vsyscall_trace.h"
|
#include "vsyscall_trace.h"
|
||||||
|
@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||||
|
|
||||||
WARN_ON_ONCE(address != regs->ip);
|
WARN_ON_ONCE(address != regs->ip);
|
||||||
|
|
||||||
|
/* This should be unreachable in NATIVE mode. */
|
||||||
|
if (WARN_ON(vsyscall_mode == NATIVE))
|
||||||
|
return false;
|
||||||
|
|
||||||
if (vsyscall_mode == NONE) {
|
if (vsyscall_mode == NONE) {
|
||||||
warn_bad_vsyscall(KERN_INFO, regs,
|
warn_bad_vsyscall(KERN_INFO, regs,
|
||||||
"vsyscall attempted with vsyscall=none");
|
"vsyscall attempted with vsyscall=none");
|
||||||
|
@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
|
||||||
return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
|
return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The VSYSCALL page is the only user-accessible page in the kernel address
|
||||||
|
* range. Normally, the kernel page tables can have _PAGE_USER clear, but
|
||||||
|
* the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
|
||||||
|
* are enabled.
|
||||||
|
*
|
||||||
|
* Some day we may create a "minimal" vsyscall mode in which we emulate
|
||||||
|
* vsyscalls but leave the page not present. If so, we skip calling
|
||||||
|
* this.
|
||||||
|
*/
|
||||||
|
void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
|
||||||
|
{
|
||||||
|
pgd_t *pgd;
|
||||||
|
p4d_t *p4d;
|
||||||
|
pud_t *pud;
|
||||||
|
pmd_t *pmd;
|
||||||
|
|
||||||
|
pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
|
||||||
|
set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
|
||||||
|
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
|
||||||
|
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||||
|
p4d->p4d |= _PAGE_USER;
|
||||||
|
#endif
|
||||||
|
pud = pud_offset(p4d, VSYSCALL_ADDR);
|
||||||
|
set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
|
||||||
|
pmd = pmd_offset(pud, VSYSCALL_ADDR);
|
||||||
|
set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
|
||||||
|
}
|
||||||
|
|
||||||
void __init map_vsyscall(void)
|
void __init map_vsyscall(void)
|
||||||
{
|
{
|
||||||
extern char __vsyscall_page;
|
extern char __vsyscall_page;
|
||||||
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
|
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
|
||||||
|
|
||||||
if (vsyscall_mode != NONE)
|
if (vsyscall_mode != NONE) {
|
||||||
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
|
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
|
||||||
vsyscall_mode == NATIVE
|
vsyscall_mode == NATIVE
|
||||||
? PAGE_KERNEL_VSYSCALL
|
? PAGE_KERNEL_VSYSCALL
|
||||||
: PAGE_KERNEL_VVAR);
|
: PAGE_KERNEL_VVAR);
|
||||||
|
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
|
||||||
|
}
|
||||||
|
|
||||||
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
|
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
|
||||||
(unsigned long)VSYSCALL_ADDR);
|
(unsigned long)VSYSCALL_ADDR);
|
||||||
|
|
|
@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
|
||||||
|
|
||||||
__init int intel_pmu_init(void)
|
__init int intel_pmu_init(void)
|
||||||
{
|
{
|
||||||
|
struct attribute **extra_attr = NULL;
|
||||||
|
struct attribute **to_free = NULL;
|
||||||
union cpuid10_edx edx;
|
union cpuid10_edx edx;
|
||||||
union cpuid10_eax eax;
|
union cpuid10_eax eax;
|
||||||
union cpuid10_ebx ebx;
|
union cpuid10_ebx ebx;
|
||||||
|
@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
|
||||||
unsigned int unused;
|
unsigned int unused;
|
||||||
struct extra_reg *er;
|
struct extra_reg *er;
|
||||||
int version, i;
|
int version, i;
|
||||||
struct attribute **extra_attr = NULL;
|
|
||||||
char *name;
|
char *name;
|
||||||
|
|
||||||
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
|
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
|
||||||
|
@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
|
||||||
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
||||||
hsw_format_attr : nhm_format_attr;
|
hsw_format_attr : nhm_format_attr;
|
||||||
extra_attr = merge_attr(extra_attr, skl_format_attr);
|
extra_attr = merge_attr(extra_attr, skl_format_attr);
|
||||||
|
to_free = extra_attr;
|
||||||
x86_pmu.cpu_events = get_hsw_events_attrs();
|
x86_pmu.cpu_events = get_hsw_events_attrs();
|
||||||
intel_pmu_pebs_data_source_skl(
|
intel_pmu_pebs_data_source_skl(
|
||||||
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
|
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
|
||||||
|
@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
|
||||||
pr_cont("full-width counters, ");
|
pr_cont("full-width counters, ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kfree(to_free);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,16 +3,18 @@
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
||||||
|
#include <asm/cpu_entry_area.h>
|
||||||
#include <asm/perf_event.h>
|
#include <asm/perf_event.h>
|
||||||
#include <asm/insn.h>
|
#include <asm/insn.h>
|
||||||
|
|
||||||
#include "../perf_event.h"
|
#include "../perf_event.h"
|
||||||
|
|
||||||
|
/* Waste a full page so it can be mapped into the cpu_entry_area */
|
||||||
|
DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
|
||||||
|
|
||||||
/* The size of a BTS record in bytes: */
|
/* The size of a BTS record in bytes: */
|
||||||
#define BTS_RECORD_SIZE 24
|
#define BTS_RECORD_SIZE 24
|
||||||
|
|
||||||
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
|
|
||||||
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
|
|
||||||
#define PEBS_FIXUP_SIZE PAGE_SIZE
|
#define PEBS_FIXUP_SIZE PAGE_SIZE
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -279,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu)
|
||||||
|
|
||||||
static DEFINE_PER_CPU(void *, insn_buffer);
|
static DEFINE_PER_CPU(void *, insn_buffer);
|
||||||
|
|
||||||
|
static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
|
||||||
|
{
|
||||||
|
phys_addr_t pa;
|
||||||
|
size_t msz = 0;
|
||||||
|
|
||||||
|
pa = virt_to_phys(addr);
|
||||||
|
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
|
||||||
|
cea_set_pte(cea, pa, prot);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ds_clear_cea(void *cea, size_t size)
|
||||||
|
{
|
||||||
|
size_t msz = 0;
|
||||||
|
|
||||||
|
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
|
||||||
|
cea_set_pte(cea, 0, PAGE_NONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
|
||||||
|
{
|
||||||
|
unsigned int order = get_order(size);
|
||||||
|
int node = cpu_to_node(cpu);
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
|
||||||
|
return page ? page_address(page) : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dsfree_pages(const void *buffer, size_t size)
|
||||||
|
{
|
||||||
|
if (buffer)
|
||||||
|
free_pages((unsigned long)buffer, get_order(size));
|
||||||
|
}
|
||||||
|
|
||||||
static int alloc_pebs_buffer(int cpu)
|
static int alloc_pebs_buffer(int cpu)
|
||||||
{
|
{
|
||||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||||
int node = cpu_to_node(cpu);
|
struct debug_store *ds = hwev->ds;
|
||||||
int max;
|
size_t bsiz = x86_pmu.pebs_buffer_size;
|
||||||
void *buffer, *ibuffer;
|
int max, node = cpu_to_node(cpu);
|
||||||
|
void *buffer, *ibuffer, *cea;
|
||||||
|
|
||||||
if (!x86_pmu.pebs)
|
if (!x86_pmu.pebs)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
|
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
|
||||||
if (unlikely(!buffer))
|
if (unlikely(!buffer))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
@ -300,25 +337,27 @@ static int alloc_pebs_buffer(int cpu)
|
||||||
if (x86_pmu.intel_cap.pebs_format < 2) {
|
if (x86_pmu.intel_cap.pebs_format < 2) {
|
||||||
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
|
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
|
||||||
if (!ibuffer) {
|
if (!ibuffer) {
|
||||||
kfree(buffer);
|
dsfree_pages(buffer, bsiz);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
per_cpu(insn_buffer, cpu) = ibuffer;
|
per_cpu(insn_buffer, cpu) = ibuffer;
|
||||||
}
|
}
|
||||||
|
hwev->ds_pebs_vaddr = buffer;
|
||||||
max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
|
/* Update the cpu entry area mapping */
|
||||||
|
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
|
||||||
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
|
ds->pebs_buffer_base = (unsigned long) cea;
|
||||||
|
ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
|
||||||
ds->pebs_index = ds->pebs_buffer_base;
|
ds->pebs_index = ds->pebs_buffer_base;
|
||||||
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
|
max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
|
||||||
max * x86_pmu.pebs_record_size;
|
ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void release_pebs_buffer(int cpu)
|
static void release_pebs_buffer(int cpu)
|
||||||
{
|
{
|
||||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||||
|
struct debug_store *ds = hwev->ds;
|
||||||
|
void *cea;
|
||||||
|
|
||||||
if (!ds || !x86_pmu.pebs)
|
if (!ds || !x86_pmu.pebs)
|
||||||
return;
|
return;
|
||||||
|
@ -326,73 +365,70 @@ static void release_pebs_buffer(int cpu)
|
||||||
kfree(per_cpu(insn_buffer, cpu));
|
kfree(per_cpu(insn_buffer, cpu));
|
||||||
per_cpu(insn_buffer, cpu) = NULL;
|
per_cpu(insn_buffer, cpu) = NULL;
|
||||||
|
|
||||||
kfree((void *)(unsigned long)ds->pebs_buffer_base);
|
/* Clear the fixmap */
|
||||||
|
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
|
||||||
|
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
|
||||||
ds->pebs_buffer_base = 0;
|
ds->pebs_buffer_base = 0;
|
||||||
|
dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
|
||||||
|
hwev->ds_pebs_vaddr = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int alloc_bts_buffer(int cpu)
|
static int alloc_bts_buffer(int cpu)
|
||||||
{
|
{
|
||||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||||
int node = cpu_to_node(cpu);
|
struct debug_store *ds = hwev->ds;
|
||||||
int max, thresh;
|
void *buffer, *cea;
|
||||||
void *buffer;
|
int max;
|
||||||
|
|
||||||
if (!x86_pmu.bts)
|
if (!x86_pmu.bts)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
|
buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
|
||||||
if (unlikely(!buffer)) {
|
if (unlikely(!buffer)) {
|
||||||
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
|
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
hwev->ds_bts_vaddr = buffer;
|
||||||
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
|
/* Update the fixmap */
|
||||||
thresh = max / 16;
|
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
|
||||||
|
ds->bts_buffer_base = (unsigned long) cea;
|
||||||
ds->bts_buffer_base = (u64)(unsigned long)buffer;
|
ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
|
||||||
ds->bts_index = ds->bts_buffer_base;
|
ds->bts_index = ds->bts_buffer_base;
|
||||||
ds->bts_absolute_maximum = ds->bts_buffer_base +
|
max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
|
||||||
max * BTS_RECORD_SIZE;
|
ds->bts_absolute_maximum = ds->bts_buffer_base + max;
|
||||||
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
|
ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
|
||||||
thresh * BTS_RECORD_SIZE;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void release_bts_buffer(int cpu)
|
static void release_bts_buffer(int cpu)
|
||||||
{
|
{
|
||||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||||
|
struct debug_store *ds = hwev->ds;
|
||||||
|
void *cea;
|
||||||
|
|
||||||
if (!ds || !x86_pmu.bts)
|
if (!ds || !x86_pmu.bts)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
kfree((void *)(unsigned long)ds->bts_buffer_base);
|
/* Clear the fixmap */
|
||||||
|
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
|
||||||
|
ds_clear_cea(cea, BTS_BUFFER_SIZE);
|
||||||
ds->bts_buffer_base = 0;
|
ds->bts_buffer_base = 0;
|
||||||
|
dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
|
||||||
|
hwev->ds_bts_vaddr = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int alloc_ds_buffer(int cpu)
|
static int alloc_ds_buffer(int cpu)
|
||||||
{
|
{
|
||||||
int node = cpu_to_node(cpu);
|
struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
|
||||||
struct debug_store *ds;
|
|
||||||
|
|
||||||
ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
|
|
||||||
if (unlikely(!ds))
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
|
memset(ds, 0, sizeof(*ds));
|
||||||
per_cpu(cpu_hw_events, cpu).ds = ds;
|
per_cpu(cpu_hw_events, cpu).ds = ds;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void release_ds_buffer(int cpu)
|
static void release_ds_buffer(int cpu)
|
||||||
{
|
{
|
||||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
||||||
|
|
||||||
if (!ds)
|
|
||||||
return;
|
|
||||||
|
|
||||||
per_cpu(cpu_hw_events, cpu).ds = NULL;
|
per_cpu(cpu_hw_events, cpu).ds = NULL;
|
||||||
kfree(ds);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void release_ds_buffers(void)
|
void release_ds_buffers(void)
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
|
|
||||||
#include <linux/perf_event.h>
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
|
#include <asm/intel_ds.h>
|
||||||
|
|
||||||
/* To enable MSR tracing please use the generic trace points. */
|
/* To enable MSR tracing please use the generic trace points. */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -77,8 +79,6 @@ struct amd_nb {
|
||||||
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
|
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The maximal number of PEBS events: */
|
|
||||||
#define MAX_PEBS_EVENTS 8
|
|
||||||
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
|
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -95,23 +95,6 @@ struct amd_nb {
|
||||||
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
|
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
|
||||||
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
|
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
|
||||||
|
|
||||||
/*
|
|
||||||
* A debug store configuration.
|
|
||||||
*
|
|
||||||
* We only support architectures that use 64bit fields.
|
|
||||||
*/
|
|
||||||
struct debug_store {
|
|
||||||
u64 bts_buffer_base;
|
|
||||||
u64 bts_index;
|
|
||||||
u64 bts_absolute_maximum;
|
|
||||||
u64 bts_interrupt_threshold;
|
|
||||||
u64 pebs_buffer_base;
|
|
||||||
u64 pebs_index;
|
|
||||||
u64 pebs_absolute_maximum;
|
|
||||||
u64 pebs_interrupt_threshold;
|
|
||||||
u64 pebs_event_reset[MAX_PEBS_EVENTS];
|
|
||||||
};
|
|
||||||
|
|
||||||
#define PEBS_REGS \
|
#define PEBS_REGS \
|
||||||
(PERF_REG_X86_AX | \
|
(PERF_REG_X86_AX | \
|
||||||
PERF_REG_X86_BX | \
|
PERF_REG_X86_BX | \
|
||||||
|
@ -216,6 +199,8 @@ struct cpu_hw_events {
|
||||||
* Intel DebugStore bits
|
* Intel DebugStore bits
|
||||||
*/
|
*/
|
||||||
struct debug_store *ds;
|
struct debug_store *ds;
|
||||||
|
void *ds_pebs_vaddr;
|
||||||
|
void *ds_bts_vaddr;
|
||||||
u64 pebs_enabled;
|
u64 pebs_enabled;
|
||||||
int n_pebs;
|
int n_pebs;
|
||||||
int n_large_pebs;
|
int n_large_pebs;
|
||||||
|
|
|
@ -136,6 +136,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __ASSEMBLY__
|
#ifndef __ASSEMBLY__
|
||||||
|
#ifndef __BPF__
|
||||||
/*
|
/*
|
||||||
* This output constraint should be used for any inline asm which has a "call"
|
* This output constraint should be used for any inline asm which has a "call"
|
||||||
* instruction. Otherwise the asm may be inserted before the frame pointer
|
* instruction. Otherwise the asm may be inserted before the frame pointer
|
||||||
|
@ -145,5 +146,6 @@
|
||||||
register unsigned long current_stack_pointer asm(_ASM_SP);
|
register unsigned long current_stack_pointer asm(_ASM_SP);
|
||||||
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
|
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _ASM_X86_ASM_H */
|
#endif /* _ASM_X86_ASM_H */
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#ifndef _ASM_X86_CPU_ENTRY_AREA_H
|
||||||
|
#define _ASM_X86_CPU_ENTRY_AREA_H
|
||||||
|
|
||||||
|
#include <linux/percpu-defs.h>
|
||||||
|
#include <asm/processor.h>
|
||||||
|
#include <asm/intel_ds.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cpu_entry_area is a percpu region that contains things needed by the CPU
|
||||||
|
* and early entry/exit code. Real types aren't used for all fields here
|
||||||
|
* to avoid circular header dependencies.
|
||||||
|
*
|
||||||
|
* Every field is a virtual alias of some other allocated backing store.
|
||||||
|
* There is no direct allocation of a struct cpu_entry_area.
|
||||||
|
*/
|
||||||
|
struct cpu_entry_area {
|
||||||
|
char gdt[PAGE_SIZE];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The GDT is just below entry_stack and thus serves (on x86_64) as
|
||||||
|
* a a read-only guard page.
|
||||||
|
*/
|
||||||
|
struct entry_stack_page entry_stack_page;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
|
||||||
|
* we need task switches to work, and task switches write to the TSS.
|
||||||
|
*/
|
||||||
|
struct tss_struct tss;
|
||||||
|
|
||||||
|
char entry_trampoline[PAGE_SIZE];
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
/*
|
||||||
|
* Exception stacks used for IST entries.
|
||||||
|
*
|
||||||
|
* In the future, this should have a separate slot for each stack
|
||||||
|
* with guard pages between them.
|
||||||
|
*/
|
||||||
|
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_CPU_SUP_INTEL
|
||||||
|
/*
|
||||||
|
* Per CPU debug store for Intel performance monitoring. Wastes a
|
||||||
|
* full page at the moment.
|
||||||
|
*/
|
||||||
|
struct debug_store cpu_debug_store;
|
||||||
|
/*
|
||||||
|
* The actual PEBS/BTS buffers must be mapped to user space
|
||||||
|
* Reserve enough fixmap PTEs.
|
||||||
|
*/
|
||||||
|
struct debug_store_buffers cpu_debug_buffers;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
|
||||||
|
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
|
||||||
|
|
||||||
|
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
||||||
|
|
||||||
|
extern void setup_cpu_entry_areas(void);
|
||||||
|
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
|
||||||
|
|
||||||
|
#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
|
||||||
|
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
|
||||||
|
|
||||||
|
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
|
||||||
|
|
||||||
|
#define CPU_ENTRY_AREA_MAP_SIZE \
|
||||||
|
(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
|
||||||
|
|
||||||
|
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
|
||||||
|
|
||||||
|
static inline struct entry_stack *cpu_entry_stack(int cpu)
|
||||||
|
{
|
||||||
|
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
|
||||||
set_bit(bit, (unsigned long *)cpu_caps_set); \
|
set_bit(bit, (unsigned long *)cpu_caps_set); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
|
||||||
|
|
||||||
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
|
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
|
||||||
/*
|
/*
|
||||||
* Static testing of CPU features. Used the same as boot_cpu_has().
|
* Static testing of CPU features. Used the same as boot_cpu_has().
|
||||||
|
|
|
@ -197,11 +197,12 @@
|
||||||
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
|
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
|
||||||
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
|
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
|
||||||
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
|
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
|
||||||
|
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
|
||||||
|
|
||||||
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
|
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
|
||||||
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
||||||
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
|
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
|
||||||
|
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
|
||||||
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
|
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
|
||||||
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
||||||
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
|
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
|
||||||
|
@ -340,5 +341,6 @@
|
||||||
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
|
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
|
||||||
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
|
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
|
||||||
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
|
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
|
||||||
|
#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
|
||||||
|
|
||||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <asm/mmu.h>
|
#include <asm/mmu.h>
|
||||||
#include <asm/fixmap.h>
|
#include <asm/fixmap.h>
|
||||||
#include <asm/irq_vectors.h>
|
#include <asm/irq_vectors.h>
|
||||||
|
#include <asm/cpu_entry_area.h>
|
||||||
|
|
||||||
#include <linux/smp.h>
|
#include <linux/smp.h>
|
||||||
#include <linux/percpu.h>
|
#include <linux/percpu.h>
|
||||||
|
@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
|
||||||
|
|
||||||
desc->type = (info->read_exec_only ^ 1) << 1;
|
desc->type = (info->read_exec_only ^ 1) << 1;
|
||||||
desc->type |= info->contents << 2;
|
desc->type |= info->contents << 2;
|
||||||
|
/* Set the ACCESS bit so it can be mapped RO */
|
||||||
|
desc->type |= 1;
|
||||||
|
|
||||||
desc->s = 1;
|
desc->s = 1;
|
||||||
desc->dpl = 0x3;
|
desc->dpl = 0x3;
|
||||||
|
@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
|
||||||
return this_cpu_ptr(&gdt_page)->gdt;
|
return this_cpu_ptr(&gdt_page)->gdt;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get the fixmap index for a specific processor */
|
|
||||||
static inline unsigned int get_cpu_gdt_ro_index(int cpu)
|
|
||||||
{
|
|
||||||
return FIX_GDT_REMAP_BEGIN + cpu;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Provide the fixmap address of the remapped GDT */
|
/* Provide the fixmap address of the remapped GDT */
|
||||||
static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
|
static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
|
||||||
{
|
{
|
||||||
unsigned int idx = get_cpu_gdt_ro_index(cpu);
|
return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
|
||||||
return (struct desc_struct *)__fix_to_virt(idx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Provide the current read-only GDT */
|
/* Provide the current read-only GDT */
|
||||||
|
@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
|
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
|
||||||
{
|
{
|
||||||
struct desc_struct *d = get_cpu_gdt_rw(cpu);
|
struct desc_struct *d = get_cpu_gdt_rw(cpu);
|
||||||
tss_desc tss;
|
tss_desc tss;
|
||||||
|
|
|
@ -50,6 +50,12 @@
|
||||||
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
|
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||||
|
# define DISABLE_PTI 0
|
||||||
|
#else
|
||||||
|
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure to add features to the correct mask
|
* Make sure to add features to the correct mask
|
||||||
*/
|
*/
|
||||||
|
@ -60,7 +66,7 @@
|
||||||
#define DISABLED_MASK4 (DISABLE_PCID)
|
#define DISABLED_MASK4 (DISABLE_PCID)
|
||||||
#define DISABLED_MASK5 0
|
#define DISABLED_MASK5 0
|
||||||
#define DISABLED_MASK6 0
|
#define DISABLED_MASK6 0
|
||||||
#define DISABLED_MASK7 0
|
#define DISABLED_MASK7 (DISABLE_PTI)
|
||||||
#define DISABLED_MASK8 0
|
#define DISABLED_MASK8 0
|
||||||
#define DISABLED_MASK9 (DISABLE_MPX)
|
#define DISABLED_MASK9 (DISABLE_MPX)
|
||||||
#define DISABLED_MASK10 0
|
#define DISABLED_MASK10 0
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
#ifndef _ASM_X86_ESPFIX_H
|
#ifndef _ASM_X86_ESPFIX_H
|
||||||
#define _ASM_X86_ESPFIX_H
|
#define _ASM_X86_ESPFIX_H
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_ESPFIX64
|
||||||
|
|
||||||
#include <asm/percpu.h>
|
#include <asm/percpu.h>
|
||||||
|
|
||||||
|
@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
|
||||||
|
|
||||||
extern void init_espfix_bsp(void);
|
extern void init_espfix_bsp(void);
|
||||||
extern void init_espfix_ap(int cpu);
|
extern void init_espfix_ap(int cpu);
|
||||||
|
#else
|
||||||
#endif /* CONFIG_X86_64 */
|
static inline void init_espfix_ap(int cpu) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _ASM_X86_ESPFIX_H */
|
#endif /* _ASM_X86_ESPFIX_H */
|
||||||
|
|
|
@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
|
||||||
PAGE_SIZE)
|
PAGE_SIZE)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Here we define all the compile-time 'special' virtual
|
* Here we define all the compile-time 'special' virtual
|
||||||
* addresses. The point is to have a constant address at
|
* addresses. The point is to have a constant address at
|
||||||
|
@ -84,7 +83,6 @@ enum fixed_addresses {
|
||||||
FIX_IO_APIC_BASE_0,
|
FIX_IO_APIC_BASE_0,
|
||||||
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
|
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
|
||||||
#endif
|
#endif
|
||||||
FIX_RO_IDT, /* Virtual mapping for read-only IDT */
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
|
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
|
||||||
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
|
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
|
||||||
|
@ -100,9 +98,6 @@ enum fixed_addresses {
|
||||||
#ifdef CONFIG_X86_INTEL_MID
|
#ifdef CONFIG_X86_INTEL_MID
|
||||||
FIX_LNW_VRTC,
|
FIX_LNW_VRTC,
|
||||||
#endif
|
#endif
|
||||||
/* Fixmap entries to remap the GDTs, one per processor. */
|
|
||||||
FIX_GDT_REMAP_BEGIN,
|
|
||||||
FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
|
|
||||||
|
|
||||||
#ifdef CONFIG_ACPI_APEI_GHES
|
#ifdef CONFIG_ACPI_APEI_GHES
|
||||||
/* Used for GHES mapping from assorted contexts */
|
/* Used for GHES mapping from assorted contexts */
|
||||||
|
@ -143,7 +138,7 @@ enum fixed_addresses {
|
||||||
extern void reserve_top_address(unsigned long reserve);
|
extern void reserve_top_address(unsigned long reserve);
|
||||||
|
|
||||||
#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
|
#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
|
||||||
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
|
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
|
||||||
|
|
||||||
extern int fixmaps_set;
|
extern int fixmaps_set;
|
||||||
|
|
||||||
|
|
|
@ -20,16 +20,7 @@
|
||||||
#ifndef _ASM_X86_HYPERVISOR_H
|
#ifndef _ASM_X86_HYPERVISOR_H
|
||||||
#define _ASM_X86_HYPERVISOR_H
|
#define _ASM_X86_HYPERVISOR_H
|
||||||
|
|
||||||
#ifdef CONFIG_HYPERVISOR_GUEST
|
/* x86 hypervisor types */
|
||||||
|
|
||||||
#include <asm/kvm_para.h>
|
|
||||||
#include <asm/x86_init.h>
|
|
||||||
#include <asm/xen/hypervisor.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* x86 hypervisor information
|
|
||||||
*/
|
|
||||||
|
|
||||||
enum x86_hypervisor_type {
|
enum x86_hypervisor_type {
|
||||||
X86_HYPER_NATIVE = 0,
|
X86_HYPER_NATIVE = 0,
|
||||||
X86_HYPER_VMWARE,
|
X86_HYPER_VMWARE,
|
||||||
|
@ -39,6 +30,12 @@ enum x86_hypervisor_type {
|
||||||
X86_HYPER_KVM,
|
X86_HYPER_KVM,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_HYPERVISOR_GUEST
|
||||||
|
|
||||||
|
#include <asm/kvm_para.h>
|
||||||
|
#include <asm/x86_init.h>
|
||||||
|
#include <asm/xen/hypervisor.h>
|
||||||
|
|
||||||
struct hypervisor_x86 {
|
struct hypervisor_x86 {
|
||||||
/* Hypervisor name */
|
/* Hypervisor name */
|
||||||
const char *name;
|
const char *name;
|
||||||
|
@ -58,7 +55,15 @@ struct hypervisor_x86 {
|
||||||
|
|
||||||
extern enum x86_hypervisor_type x86_hyper_type;
|
extern enum x86_hypervisor_type x86_hyper_type;
|
||||||
extern void init_hypervisor_platform(void);
|
extern void init_hypervisor_platform(void);
|
||||||
|
static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
|
||||||
|
{
|
||||||
|
return x86_hyper_type == type;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
static inline void init_hypervisor_platform(void) { }
|
static inline void init_hypervisor_platform(void) { }
|
||||||
|
static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
|
||||||
|
{
|
||||||
|
return type == X86_HYPER_NATIVE;
|
||||||
|
}
|
||||||
#endif /* CONFIG_HYPERVISOR_GUEST */
|
#endif /* CONFIG_HYPERVISOR_GUEST */
|
||||||
#endif /* _ASM_X86_HYPERVISOR_H */
|
#endif /* _ASM_X86_HYPERVISOR_H */
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
#ifndef _ASM_INTEL_DS_H
|
||||||
|
#define _ASM_INTEL_DS_H
|
||||||
|
|
||||||
|
#include <linux/percpu-defs.h>
|
||||||
|
|
||||||
|
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
|
||||||
|
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
|
||||||
|
|
||||||
|
/* The maximal number of PEBS events: */
|
||||||
|
#define MAX_PEBS_EVENTS 8
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A debug store configuration.
|
||||||
|
*
|
||||||
|
* We only support architectures that use 64bit fields.
|
||||||
|
*/
|
||||||
|
struct debug_store {
|
||||||
|
u64 bts_buffer_base;
|
||||||
|
u64 bts_index;
|
||||||
|
u64 bts_absolute_maximum;
|
||||||
|
u64 bts_interrupt_threshold;
|
||||||
|
u64 pebs_buffer_base;
|
||||||
|
u64 pebs_index;
|
||||||
|
u64 pebs_absolute_maximum;
|
||||||
|
u64 pebs_interrupt_threshold;
|
||||||
|
u64 pebs_event_reset[MAX_PEBS_EVENTS];
|
||||||
|
} __aligned(PAGE_SIZE);
|
||||||
|
|
||||||
|
DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
|
||||||
|
|
||||||
|
struct debug_store_buffers {
|
||||||
|
char bts_buffer[BTS_BUFFER_SIZE];
|
||||||
|
char pebs_buffer[PEBS_BUFFER_SIZE];
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,53 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef _ASM_X86_INVPCID
|
||||||
|
#define _ASM_X86_INVPCID
|
||||||
|
|
||||||
|
static inline void __invpcid(unsigned long pcid, unsigned long addr,
|
||||||
|
unsigned long type)
|
||||||
|
{
|
||||||
|
struct { u64 d[2]; } desc = { { pcid, addr } };
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The memory clobber is because the whole point is to invalidate
|
||||||
|
* stale TLB entries and, especially if we're flushing global
|
||||||
|
* mappings, we don't want the compiler to reorder any subsequent
|
||||||
|
* memory accesses before the TLB flush.
|
||||||
|
*
|
||||||
|
* The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
|
||||||
|
* invpcid (%rcx), %rax in long mode.
|
||||||
|
*/
|
||||||
|
asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
|
||||||
|
: : "m" (desc), "a" (type), "c" (&desc) : "memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
#define INVPCID_TYPE_INDIV_ADDR 0
|
||||||
|
#define INVPCID_TYPE_SINGLE_CTXT 1
|
||||||
|
#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
|
||||||
|
#define INVPCID_TYPE_ALL_NON_GLOBAL 3
|
||||||
|
|
||||||
|
/* Flush all mappings for a given pcid and addr, not including globals. */
|
||||||
|
static inline void invpcid_flush_one(unsigned long pcid,
|
||||||
|
unsigned long addr)
|
||||||
|
{
|
||||||
|
__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Flush all mappings for a given PCID, not including globals. */
|
||||||
|
static inline void invpcid_flush_single_context(unsigned long pcid)
|
||||||
|
{
|
||||||
|
__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Flush all mappings, including globals, for all PCIDs. */
|
||||||
|
static inline void invpcid_flush_all(void)
|
||||||
|
{
|
||||||
|
__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Flush all mappings for all PCIDs except globals. */
|
||||||
|
static inline void invpcid_flush_all_nonglobals(void)
|
||||||
|
{
|
||||||
|
__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _ASM_X86_INVPCID */
|
|
@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
|
||||||
extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
|
extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
|
||||||
unsigned int nr_irqs);
|
unsigned int nr_irqs);
|
||||||
extern int mp_irqdomain_activate(struct irq_domain *domain,
|
extern int mp_irqdomain_activate(struct irq_domain *domain,
|
||||||
struct irq_data *irq_data, bool early);
|
struct irq_data *irq_data, bool reserve);
|
||||||
extern void mp_irqdomain_deactivate(struct irq_domain *domain,
|
extern void mp_irqdomain_deactivate(struct irq_domain *domain,
|
||||||
struct irq_data *irq_data);
|
struct irq_data *irq_data);
|
||||||
extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
|
extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
|
||||||
|
|
|
@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
|
||||||
swapgs; \
|
swapgs; \
|
||||||
sysretl
|
sysretl
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_ENTRY
|
||||||
|
#define SAVE_FLAGS(x) pushfq; popq %rax
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
#define INTERRUPT_RETURN iret
|
#define INTERRUPT_RETURN iret
|
||||||
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
|
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
|
||||||
|
|
|
@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
|
||||||
extern int __must_check __die(const char *, struct pt_regs *, long);
|
extern int __must_check __die(const char *, struct pt_regs *, long);
|
||||||
extern void show_stack_regs(struct pt_regs *regs);
|
extern void show_stack_regs(struct pt_regs *regs);
|
||||||
extern void __show_regs(struct pt_regs *regs, int all);
|
extern void __show_regs(struct pt_regs *regs, int all);
|
||||||
|
extern void show_iret_regs(struct pt_regs *regs);
|
||||||
extern unsigned long oops_begin(void);
|
extern unsigned long oops_begin(void);
|
||||||
extern void oops_end(unsigned long, struct pt_regs *, int signr);
|
extern void oops_end(unsigned long, struct pt_regs *, int signr);
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#define _ASM_X86_MMU_H
|
#define _ASM_X86_MMU_H
|
||||||
|
|
||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
|
#include <linux/rwsem.h>
|
||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
|
|
||||||
|
@ -27,7 +28,8 @@ typedef struct {
|
||||||
atomic64_t tlb_gen;
|
atomic64_t tlb_gen;
|
||||||
|
|
||||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||||
struct ldt_struct *ldt;
|
struct rw_semaphore ldt_usr_sem;
|
||||||
|
struct ldt_struct *ldt;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
|
|
@ -50,22 +50,53 @@ struct ldt_struct {
|
||||||
* call gates. On native, we could merge the ldt_struct and LDT
|
* call gates. On native, we could merge the ldt_struct and LDT
|
||||||
* allocations, but it's not worth trying to optimize.
|
* allocations, but it's not worth trying to optimize.
|
||||||
*/
|
*/
|
||||||
struct desc_struct *entries;
|
struct desc_struct *entries;
|
||||||
unsigned int nr_entries;
|
unsigned int nr_entries;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If PTI is in use, then the entries array is not mapped while we're
|
||||||
|
* in user mode. The whole array will be aliased at the addressed
|
||||||
|
* given by ldt_slot_va(slot). We use two slots so that we can allocate
|
||||||
|
* and map, and enable a new LDT without invalidating the mapping
|
||||||
|
* of an older, still-in-use LDT.
|
||||||
|
*
|
||||||
|
* slot will be -1 if this LDT doesn't have an alias mapping.
|
||||||
|
*/
|
||||||
|
int slot;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* This is a multiple of PAGE_SIZE. */
|
||||||
|
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
|
||||||
|
|
||||||
|
static inline void *ldt_slot_va(int slot)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
|
||||||
|
#else
|
||||||
|
BUG();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Used for LDT copy/destruction.
|
* Used for LDT copy/destruction.
|
||||||
*/
|
*/
|
||||||
int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
|
static inline void init_new_context_ldt(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
mm->context.ldt = NULL;
|
||||||
|
init_rwsem(&mm->context.ldt_usr_sem);
|
||||||
|
}
|
||||||
|
int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
|
||||||
void destroy_context_ldt(struct mm_struct *mm);
|
void destroy_context_ldt(struct mm_struct *mm);
|
||||||
|
void ldt_arch_exit_mmap(struct mm_struct *mm);
|
||||||
#else /* CONFIG_MODIFY_LDT_SYSCALL */
|
#else /* CONFIG_MODIFY_LDT_SYSCALL */
|
||||||
static inline int init_new_context_ldt(struct task_struct *tsk,
|
static inline void init_new_context_ldt(struct mm_struct *mm) { }
|
||||||
struct mm_struct *mm)
|
static inline int ldt_dup_context(struct mm_struct *oldmm,
|
||||||
|
struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
static inline void destroy_context_ldt(struct mm_struct *mm) {}
|
static inline void destroy_context_ldt(struct mm_struct *mm) { }
|
||||||
|
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void load_mm_ldt(struct mm_struct *mm)
|
static inline void load_mm_ldt(struct mm_struct *mm)
|
||||||
|
@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
|
||||||
* that we can see.
|
* that we can see.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (unlikely(ldt))
|
if (unlikely(ldt)) {
|
||||||
set_ldt(ldt->entries, ldt->nr_entries);
|
if (static_cpu_has(X86_FEATURE_PTI)) {
|
||||||
else
|
if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
|
||||||
|
/*
|
||||||
|
* Whoops -- either the new LDT isn't mapped
|
||||||
|
* (if slot == -1) or is mapped into a bogus
|
||||||
|
* slot (if slot > 1).
|
||||||
|
*/
|
||||||
|
clear_LDT();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If page table isolation is enabled, ldt->entries
|
||||||
|
* will not be mapped in the userspace pagetables.
|
||||||
|
* Tell the CPU to access the LDT through the alias
|
||||||
|
* at ldt_slot_va(ldt->slot).
|
||||||
|
*/
|
||||||
|
set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
|
||||||
|
} else {
|
||||||
|
set_ldt(ldt->entries, ldt->nr_entries);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
clear_LDT();
|
clear_LDT();
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
clear_LDT();
|
clear_LDT();
|
||||||
#endif
|
#endif
|
||||||
|
@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
|
||||||
static inline int init_new_context(struct task_struct *tsk,
|
static inline int init_new_context(struct task_struct *tsk,
|
||||||
struct mm_struct *mm)
|
struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
mutex_init(&mm->context.lock);
|
||||||
|
|
||||||
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
|
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
|
||||||
atomic64_set(&mm->context.tlb_gen, 0);
|
atomic64_set(&mm->context.tlb_gen, 0);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||||
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
|
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
|
||||||
/* pkey 0 is the default and always allocated */
|
/* pkey 0 is the default and always allocated */
|
||||||
mm->context.pkey_allocation_map = 0x1;
|
mm->context.pkey_allocation_map = 0x1;
|
||||||
/* -1 means unallocated or invalid */
|
/* -1 means unallocated or invalid */
|
||||||
mm->context.execute_only_pkey = -1;
|
mm->context.execute_only_pkey = -1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return init_new_context_ldt(tsk, mm);
|
init_new_context_ldt(mm);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
static inline void destroy_context(struct mm_struct *mm)
|
static inline void destroy_context(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
@ -176,15 +231,16 @@ do { \
|
||||||
} while (0)
|
} while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void arch_dup_mmap(struct mm_struct *oldmm,
|
static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
||||||
struct mm_struct *mm)
|
|
||||||
{
|
{
|
||||||
paravirt_arch_dup_mmap(oldmm, mm);
|
paravirt_arch_dup_mmap(oldmm, mm);
|
||||||
|
return ldt_dup_context(oldmm, mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void arch_exit_mmap(struct mm_struct *mm)
|
static inline void arch_exit_mmap(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
paravirt_arch_exit_mmap(mm);
|
paravirt_arch_exit_mmap(mm);
|
||||||
|
ldt_arch_exit_mmap(mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
@ -281,33 +337,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||||
return __pkru_allows_pkey(vma_pkey(vma), write);
|
return __pkru_allows_pkey(vma_pkey(vma), write);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
|
|
||||||
* bits. This serves two purposes. It prevents a nasty situation in
|
|
||||||
* which PCID-unaware code saves CR3, loads some other value (with PCID
|
|
||||||
* == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
|
|
||||||
* the saved ASID was nonzero. It also means that any bugs involving
|
|
||||||
* loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
|
|
||||||
* deterministically.
|
|
||||||
*/
|
|
||||||
|
|
||||||
static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
|
|
||||||
{
|
|
||||||
if (static_cpu_has(X86_FEATURE_PCID)) {
|
|
||||||
VM_WARN_ON_ONCE(asid > 4094);
|
|
||||||
return __sme_pa(mm->pgd) | (asid + 1);
|
|
||||||
} else {
|
|
||||||
VM_WARN_ON_ONCE(asid != 0);
|
|
||||||
return __sme_pa(mm->pgd);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
|
|
||||||
{
|
|
||||||
VM_WARN_ON_ONCE(asid > 4094);
|
|
||||||
return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This can be used from process context to figure out what the value of
|
* This can be used from process context to figure out what the value of
|
||||||
* CR3 is without needing to do a (slow) __read_cr3().
|
* CR3 is without needing to do a (slow) __read_cr3().
|
||||||
|
@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
|
||||||
*/
|
*/
|
||||||
static inline unsigned long __get_current_cr3_fast(void)
|
static inline unsigned long __get_current_cr3_fast(void)
|
||||||
{
|
{
|
||||||
unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
|
unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
|
||||||
this_cpu_read(cpu_tlbstate.loaded_mm_asid));
|
this_cpu_read(cpu_tlbstate.loaded_mm_asid));
|
||||||
|
|
||||||
/* For now, be very restrictive about when this can be called. */
|
/* For now, be very restrictive about when this can be called. */
|
||||||
|
|
|
@ -927,6 +927,15 @@ extern void default_banner(void);
|
||||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
|
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
|
||||||
CLBR_NONE, \
|
CLBR_NONE, \
|
||||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
|
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_ENTRY
|
||||||
|
#define SAVE_FLAGS(clobbers) \
|
||||||
|
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
|
||||||
|
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
|
||||||
|
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
|
||||||
|
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* CONFIG_X86_32 */
|
#endif /* CONFIG_X86_32 */
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
|
|
|
@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
|
||||||
*/
|
*/
|
||||||
extern gfp_t __userpte_alloc_gfp;
|
extern gfp_t __userpte_alloc_gfp;
|
||||||
|
|
||||||
|
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||||
|
/*
|
||||||
|
* Instead of one PGD, we acquire two PGDs. Being order-1, it is
|
||||||
|
* both 8k in size and 8k-aligned. That lets us just flip bit 12
|
||||||
|
* in a pointer to swap between the two 4k halves.
|
||||||
|
*/
|
||||||
|
#define PGD_ALLOCATION_ORDER 1
|
||||||
|
#else
|
||||||
|
#define PGD_ALLOCATION_ORDER 0
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate and free page tables.
|
* Allocate and free page tables.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
|
||||||
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
|
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
|
||||||
|
|
||||||
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
|
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
|
||||||
|
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
|
||||||
void ptdump_walk_pgd_level_checkwx(void);
|
void ptdump_walk_pgd_level_checkwx(void);
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_WX
|
#ifdef CONFIG_DEBUG_WX
|
||||||
|
@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
|
||||||
|
|
||||||
static inline int p4d_bad(p4d_t p4d)
|
static inline int p4d_bad(p4d_t p4d)
|
||||||
{
|
{
|
||||||
return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
|
unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
|
||||||
|
|
||||||
|
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
|
||||||
|
ignore_flags |= _PAGE_NX;
|
||||||
|
|
||||||
|
return (p4d_flags(p4d) & ~ignore_flags) != 0;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|
||||||
|
|
||||||
|
@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
|
||||||
|
|
||||||
static inline int pgd_bad(pgd_t pgd)
|
static inline int pgd_bad(pgd_t pgd)
|
||||||
{
|
{
|
||||||
return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
|
unsigned long ignore_flags = _PAGE_USER;
|
||||||
|
|
||||||
|
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
|
||||||
|
ignore_flags |= _PAGE_NX;
|
||||||
|
|
||||||
|
return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int pgd_none(pgd_t pgd)
|
static inline int pgd_none(pgd_t pgd)
|
||||||
|
@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
|
||||||
* pgd_offset() returns a (pgd_t *)
|
* pgd_offset() returns a (pgd_t *)
|
||||||
* pgd_index() is used get the offset into the pgd page's array of pgd_t's;
|
* pgd_index() is used get the offset into the pgd page's array of pgd_t's;
|
||||||
*/
|
*/
|
||||||
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
|
#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
|
||||||
|
/*
|
||||||
|
* a shortcut to get a pgd_t in a given mm
|
||||||
|
*/
|
||||||
|
#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
|
||||||
/*
|
/*
|
||||||
* a shortcut which implies the use of the kernel's pgd, instead
|
* a shortcut which implies the use of the kernel's pgd, instead
|
||||||
* of a process's
|
* of a process's
|
||||||
|
@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud)
|
||||||
*/
|
*/
|
||||||
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
|
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
|
||||||
{
|
{
|
||||||
memcpy(dst, src, count * sizeof(pgd_t));
|
memcpy(dst, src, count * sizeof(pgd_t));
|
||||||
|
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||||
|
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||||
|
return;
|
||||||
|
/* Clone the user space pgd as well */
|
||||||
|
memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
|
||||||
|
count * sizeof(pgd_t));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PTE_SHIFT ilog2(PTRS_PER_PTE)
|
#define PTE_SHIFT ilog2(PTRS_PER_PTE)
|
||||||
|
|
|
@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
|
||||||
#define LAST_PKMAP 1024
|
#define LAST_PKMAP 1024
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
|
/*
|
||||||
& PMD_MASK)
|
* Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
|
||||||
|
* to avoid include recursion hell
|
||||||
|
*/
|
||||||
|
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
|
||||||
|
|
||||||
|
#define CPU_ENTRY_AREA_BASE \
|
||||||
|
((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
|
||||||
|
|
||||||
|
#define PKMAP_BASE \
|
||||||
|
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
|
||||||
|
|
||||||
#ifdef CONFIG_HIGHMEM
|
#ifdef CONFIG_HIGHMEM
|
||||||
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
|
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
|
||||||
#else
|
#else
|
||||||
# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
|
# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MODULES_VADDR VMALLOC_START
|
#define MODULES_VADDR VMALLOC_START
|
||||||
|
|
|
@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||||
|
/*
|
||||||
|
* All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
|
||||||
|
* (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
|
||||||
|
* the user one is in the last 4k. To switch between them, you
|
||||||
|
* just need to flip the 12th bit in their addresses.
|
||||||
|
*/
|
||||||
|
#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This generates better code than the inline assembly in
|
||||||
|
* __set_bit().
|
||||||
|
*/
|
||||||
|
static inline void *ptr_set_bit(void *ptr, int bit)
|
||||||
|
{
|
||||||
|
unsigned long __ptr = (unsigned long)ptr;
|
||||||
|
|
||||||
|
__ptr |= BIT(bit);
|
||||||
|
return (void *)__ptr;
|
||||||
|
}
|
||||||
|
static inline void *ptr_clear_bit(void *ptr, int bit)
|
||||||
|
{
|
||||||
|
unsigned long __ptr = (unsigned long)ptr;
|
||||||
|
|
||||||
|
__ptr &= ~BIT(bit);
|
||||||
|
return (void *)__ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
|
||||||
|
{
|
||||||
|
return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
|
||||||
|
{
|
||||||
|
return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
|
||||||
|
{
|
||||||
|
return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
|
||||||
|
{
|
||||||
|
return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_PAGE_TABLE_ISOLATION */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Page table pages are page-aligned. The lower half of the top
|
||||||
|
* level is used for userspace and the top half for the kernel.
|
||||||
|
*
|
||||||
|
* Returns true for parts of the PGD that map userspace and
|
||||||
|
* false for the parts that map the kernel.
|
||||||
|
*/
|
||||||
|
static inline bool pgdp_maps_userspace(void *__ptr)
|
||||||
|
{
|
||||||
|
unsigned long ptr = (unsigned long)__ptr;
|
||||||
|
|
||||||
|
return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||||
|
pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Take a PGD location (pgdp) and a pgd value that needs to be set there.
|
||||||
|
* Populates the user and returns the resulting PGD that must be set in
|
||||||
|
* the kernel copy of the page tables.
|
||||||
|
*/
|
||||||
|
static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||||
|
{
|
||||||
|
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||||
|
return pgd;
|
||||||
|
return __pti_set_user_pgd(pgdp, pgd);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||||
|
{
|
||||||
|
return pgd;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
|
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
|
||||||
{
|
{
|
||||||
|
#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
|
||||||
|
p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
|
||||||
|
#else
|
||||||
*p4dp = p4d;
|
*p4dp = p4d;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void native_p4d_clear(p4d_t *p4d)
|
static inline void native_p4d_clear(p4d_t *p4d)
|
||||||
|
@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
|
||||||
|
|
||||||
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
|
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||||
|
*pgdp = pti_set_user_pgd(pgdp, pgd);
|
||||||
|
#else
|
||||||
*pgdp = pgd;
|
*pgdp = pgd;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void native_pgd_clear(pgd_t *pgd)
|
static inline void native_pgd_clear(pgd_t *pgd)
|
||||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче