Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm updates from Ingo Molnar: "This is another big update. Main changes are: - lots of x86 system call (and other traps/exceptions) entry code enhancements. In particular the complex parts of the 64-bit entry code have been migrated to C code as well, and a number of dusty corners have been refreshed. (Andy Lutomirski) - vDSO special mapping robustification and general cleanups (Andy Lutomirski) - cpufeature refactoring, cleanups and speedups (Borislav Petkov) - lots of other changes ..." * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (64 commits) x86/cpufeature: Enable new AVX-512 features x86/entry/traps: Show unhandled signal for i386 in do_trap() x86/entry: Call enter_from_user_mode() with IRQs off x86/entry/32: Change INT80 to be an interrupt gate x86/entry: Improve system call entry comments x86/entry: Remove TIF_SINGLESTEP entry work x86/entry/32: Add and check a stack canary for the SYSENTER stack x86/entry/32: Simplify and fix up the SYSENTER stack #DB/NMI fixup x86/entry: Only allocate space for tss_struct::SYSENTER_stack if needed x86/entry: Vastly simplify SYSENTER TF (single-step) handling x86/entry/traps: Clear DR6 early in do_debug() and improve the comment x86/entry/traps: Clear TIF_BLOCKSTEP on all debug exceptions x86/entry/32: Restore FLAGS on SYSEXIT x86/entry/32: Filter NT and speed up AC filtering in SYSENTER x86/entry/compat: In SYSENTER, sink AC clearing below the existing FLAGS test selftests/x86: In syscall_nt, test NT|TF as well x86/asm-offsets: Remove PARAVIRT_enabled x86/entry/32: Introduce and use X86_BUG_ESPFIX instead of paravirt_enabled uprobes: __create_xol_area() must nullify xol_mapping.fault x86/cpufeature: Create a new synthetic cpu capability for machine check recovery ...
This commit is contained in:
Коммит
ba33ea811e
|
@ -666,7 +666,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
|
||||
clearcpuid=BITNUM [X86]
|
||||
Disable CPUID feature X for the kernel. See
|
||||
arch/x86/include/asm/cpufeature.h for the valid bit
|
||||
arch/x86/include/asm/cpufeatures.h for the valid bit
|
||||
numbers. Note the Linux specific bits are not necessarily
|
||||
stable over kernel options, but the vendor specific
|
||||
ones should be.
|
||||
|
|
|
@ -60,6 +60,8 @@ Machine check
|
|||
threshold to 1. Enabling this may make memory predictive failure
|
||||
analysis less effective if the bios sets thresholds for memory
|
||||
errors since we will not see details for all errors.
|
||||
mce=recovery
|
||||
Force-enable recoverable machine check code paths
|
||||
|
||||
nomce (for compatibility with i386): same as mce=off
|
||||
|
||||
|
|
|
@ -338,16 +338,6 @@ config DEBUG_IMR_SELFTEST
|
|||
|
||||
If unsure say N here.
|
||||
|
||||
config X86_DEBUG_STATIC_CPU_HAS
|
||||
bool "Debug alternatives"
|
||||
depends on DEBUG_KERNEL
|
||||
---help---
|
||||
This option causes additional code to be generated which
|
||||
fails if static_cpu_has() is used before alternatives have
|
||||
run.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config X86_DEBUG_FPU
|
||||
bool "Debug the x86 FPU code"
|
||||
depends on DEBUG_KERNEL
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef BOOT_CPUFLAGS_H
|
||||
#define BOOT_CPUFLAGS_H
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
struct cpu_features {
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
#include "../include/asm/required-features.h"
|
||||
#include "../include/asm/disabled-features.h"
|
||||
#include "../include/asm/cpufeature.h"
|
||||
#include "../include/asm/cpufeatures.h"
|
||||
#include "../kernel/cpu/capflags.c"
|
||||
|
||||
int main(void)
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
#include <linux/crc32.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/fpu/internal.h>
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
|
||||
|
|
|
@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with
|
|||
.byte 0xf1
|
||||
.endm
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
|
||||
* are different from the entry_32.S versions in not changing the segment
|
||||
* registers. So only suitable for in kernel use, not when transitioning
|
||||
* from or to user space. The resulting stack frame is not a standard
|
||||
* pt_regs frame. The main use case is calling C code from assembler
|
||||
* when all the registers need to be preserved.
|
||||
*/
|
||||
|
||||
.macro SAVE_ALL
|
||||
pushl %eax
|
||||
pushl %ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %edx
|
||||
pushl %ecx
|
||||
pushl %ebx
|
||||
.endm
|
||||
|
||||
.macro RESTORE_ALL
|
||||
popl %ebx
|
||||
popl %ecx
|
||||
popl %edx
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
popl %eax
|
||||
.endm
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <asm/traps.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
|
@ -44,6 +45,8 @@ __visible void enter_from_user_mode(void)
|
|||
CT_WARN_ON(ct_state() != CONTEXT_USER);
|
||||
user_exit();
|
||||
}
|
||||
#else
|
||||
static inline void enter_from_user_mode(void) {}
|
||||
#endif
|
||||
|
||||
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
|
||||
|
@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
|
|||
|
||||
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
/*
|
||||
* If TIF_NOHZ is set, we are required to call user_exit() before
|
||||
* doing anything that could touch RCU.
|
||||
*/
|
||||
if (work & _TIF_NOHZ) {
|
||||
enter_from_user_mode();
|
||||
work &= ~_TIF_NOHZ;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Do seccomp first -- it should minimize exposure of other
|
||||
|
@ -171,16 +163,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
|
|||
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
|
||||
BUG_ON(regs != task_pt_regs(current));
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
|
||||
* If user-mode had set TF itself, then it's still clear from
|
||||
* do_debug() and we need to set it again to restore the user
|
||||
* state. If we entered on the slow path, TF was already set.
|
||||
*/
|
||||
if (work & _TIF_SINGLESTEP)
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Call seccomp_phase2 before running the other hooks so that
|
||||
|
@ -268,6 +250,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
|
|||
/* Called with IRQs disabled. */
|
||||
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
u32 cached_flags;
|
||||
|
||||
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
|
||||
|
@ -275,12 +258,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
|
|||
|
||||
lockdep_sys_exit();
|
||||
|
||||
cached_flags =
|
||||
READ_ONCE(pt_regs_to_thread_info(regs)->flags);
|
||||
cached_flags = READ_ONCE(ti->flags);
|
||||
|
||||
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
|
||||
exit_to_usermode_loop(regs, cached_flags);
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
/*
|
||||
* Compat syscalls set TS_COMPAT. Make sure we clear it before
|
||||
* returning to user mode. We need to clear it *after* signal
|
||||
* handling, because syscall restart has a fixup for compat
|
||||
* syscalls. The fixup is exercised by the ptrace_syscall_32
|
||||
* selftest.
|
||||
*/
|
||||
ti->status &= ~TS_COMPAT;
|
||||
#endif
|
||||
|
||||
user_enter();
|
||||
}
|
||||
|
||||
|
@ -332,33 +325,45 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
|
|||
if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
|
||||
syscall_slow_exit_work(regs, cached_flags);
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
/*
|
||||
* Compat syscalls set TS_COMPAT. Make sure we clear it before
|
||||
* returning to user mode.
|
||||
*/
|
||||
ti->status &= ~TS_COMPAT;
|
||||
#endif
|
||||
|
||||
local_irq_disable();
|
||||
prepare_exit_to_usermode(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
__visible void do_syscall_64(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
unsigned long nr = regs->orig_ax;
|
||||
|
||||
enter_from_user_mode();
|
||||
local_irq_enable();
|
||||
|
||||
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
|
||||
nr = syscall_trace_enter(regs);
|
||||
|
||||
/*
|
||||
* NB: Native and x32 syscalls are dispatched from the same
|
||||
* table. The only functional difference is the x32 bit in
|
||||
* regs->orig_ax, which changes the behavior of some syscalls.
|
||||
*/
|
||||
if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
|
||||
regs->ax = sys_call_table[nr & __SYSCALL_MASK](
|
||||
regs->di, regs->si, regs->dx,
|
||||
regs->r10, regs->r8, regs->r9);
|
||||
}
|
||||
|
||||
syscall_return_slowpath(regs);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
|
||||
/*
|
||||
* Does a 32-bit syscall. Called with IRQs on and does all entry and
|
||||
* exit work and returns with IRQs off. This function is extremely hot
|
||||
* in workloads that use it, and it's usually called from
|
||||
* Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
|
||||
* all entry and exit work and returns with IRQs off. This function is
|
||||
* extremely hot in workloads that use it, and it's usually called from
|
||||
* do_fast_syscall_32, so forcibly inline it to improve performance.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
|
||||
__visible
|
||||
#else
|
||||
/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
|
||||
static
|
||||
#endif
|
||||
__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
|
||||
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
unsigned int nr = (unsigned int)regs->orig_ax;
|
||||
|
@ -393,14 +398,13 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
|
|||
syscall_return_slowpath(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Handles INT80 on 64-bit kernels */
|
||||
__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
|
||||
/* Handles int $0x80 */
|
||||
__visible void do_int80_syscall_32(struct pt_regs *regs)
|
||||
{
|
||||
enter_from_user_mode();
|
||||
local_irq_enable();
|
||||
do_syscall_32_irqs_on(regs);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
|
||||
__visible long do_fast_syscall_32(struct pt_regs *regs)
|
||||
|
@ -420,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
|
|||
*/
|
||||
regs->ip = landing_pad;
|
||||
|
||||
/*
|
||||
* Fetch EBP from where the vDSO stashed it.
|
||||
*
|
||||
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
|
||||
*/
|
||||
enter_from_user_mode();
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
/* Fetch EBP from where the vDSO stashed it. */
|
||||
if (
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
|
@ -443,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
|
|||
/* User code screwed up. */
|
||||
local_irq_disable();
|
||||
regs->ax = -EFAULT;
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
enter_from_user_mode();
|
||||
#endif
|
||||
prepare_exit_to_usermode(regs);
|
||||
return 0; /* Keep it simple: use IRET. */
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
#include <asm/processor-flags.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/irq_vectors.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
|
@ -287,20 +287,93 @@ need_resched:
|
|||
END(resume_kernel)
|
||||
#endif
|
||||
|
||||
# SYSENTER call handler stub
|
||||
GLOBAL(__begin_SYSENTER_singlestep_region)
|
||||
/*
|
||||
* All code from here through __end_SYSENTER_singlestep_region is subject
|
||||
* to being single-stepped if a user program sets TF and executes SYSENTER.
|
||||
* There is absolutely nothing that we can do to prevent this from happening
|
||||
* (thanks Intel!). To keep our handling of this situation as simple as
|
||||
* possible, we handle TF just like AC and NT, except that our #DB handler
|
||||
* will ignore all of the single-step traps generated in this range.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
/*
|
||||
* Xen doesn't set %esp to be precisely what the normal SYSENTER
|
||||
* entry point expects, so fix it up before using the normal path.
|
||||
*/
|
||||
ENTRY(xen_sysenter_target)
|
||||
addl $5*4, %esp /* remove xen-provided frame */
|
||||
jmp sysenter_past_esp
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 32-bit SYSENTER entry.
|
||||
*
|
||||
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
|
||||
* if X86_FEATURE_SEP is available. This is the preferred system call
|
||||
* entry on 32-bit systems.
|
||||
*
|
||||
* The SYSENTER instruction, in principle, should *only* occur in the
|
||||
* vDSO. In practice, a small number of Android devices were shipped
|
||||
* with a copy of Bionic that inlined a SYSENTER instruction. This
|
||||
* never happened in any of Google's Bionic versions -- it only happened
|
||||
* in a narrow range of Intel-provided versions.
|
||||
*
|
||||
* SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
|
||||
* IF and VM in RFLAGS are cleared (IOW: interrupts are off).
|
||||
* SYSENTER does not save anything on the stack,
|
||||
* and does not save old EIP (!!!), ESP, or EFLAGS.
|
||||
*
|
||||
* To avoid losing track of EFLAGS.VM (and thus potentially corrupting
|
||||
* user and/or vm86 state), we explicitly disable the SYSENTER
|
||||
* instruction in vm86 mode by reprogramming the MSRs.
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
* ebx arg1
|
||||
* ecx arg2
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* ebp user stack
|
||||
* 0(%ebp) arg6
|
||||
*/
|
||||
ENTRY(entry_SYSENTER_32)
|
||||
movl TSS_sysenter_sp0(%esp), %esp
|
||||
sysenter_past_esp:
|
||||
pushl $__USER_DS /* pt_regs->ss */
|
||||
pushl %ebp /* pt_regs->sp (stashed in bp) */
|
||||
pushfl /* pt_regs->flags (except IF = 0) */
|
||||
ASM_CLAC /* Clear AC after saving FLAGS */
|
||||
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
|
||||
pushl $__USER_CS /* pt_regs->cs */
|
||||
pushl $0 /* pt_regs->ip = 0 (placeholder) */
|
||||
pushl %eax /* pt_regs->orig_ax */
|
||||
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
|
||||
|
||||
/*
|
||||
* SYSENTER doesn't filter flags, so we need to clear NT, AC
|
||||
* and TF ourselves. To save a few cycles, we can check whether
|
||||
* either was set instead of doing an unconditional popfq.
|
||||
* This needs to happen before enabling interrupts so that
|
||||
* we don't get preempted with NT set.
|
||||
*
|
||||
* If TF is set, we will single-step all the way to here -- do_debug
|
||||
* will ignore all the traps. (Yes, this is slow, but so is
|
||||
* single-stepping in general. This allows us to avoid having
|
||||
* a more complicated code to handle the case where a user program
|
||||
* forces us to single-step through the SYSENTER entry code.)
|
||||
*
|
||||
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
|
||||
* out-of-line as an optimization: NT is unlikely to be set in the
|
||||
* majority of the cases and instead of polluting the I$ unnecessarily,
|
||||
* we're keeping that code behind a branch which will predict as
|
||||
* not-taken and therefore its instructions won't be fetched.
|
||||
*/
|
||||
testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
|
||||
jnz .Lsysenter_fix_flags
|
||||
.Lsysenter_flags_fixed:
|
||||
|
||||
/*
|
||||
* User mode is traced as though IRQs are on, and SYSENTER
|
||||
* turned them off.
|
||||
|
@ -326,6 +399,15 @@ sysenter_past_esp:
|
|||
popl %ebp /* pt_regs->bp */
|
||||
popl %eax /* pt_regs->ax */
|
||||
|
||||
/*
|
||||
* Restore all flags except IF. (We restore IF separately because
|
||||
* STI gives a one-instruction window in which we won't be interrupted,
|
||||
* whereas POPF does not.)
|
||||
*/
|
||||
addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
|
||||
btr $X86_EFLAGS_IF_BIT, (%esp)
|
||||
popfl
|
||||
|
||||
/*
|
||||
* Return back to the vDSO, which will pop ecx and edx.
|
||||
* Don't bother with DS and ES (they already contain __USER_DS).
|
||||
|
@ -339,28 +421,63 @@ sysenter_past_esp:
|
|||
.popsection
|
||||
_ASM_EXTABLE(1b, 2b)
|
||||
PTGS_TO_GS_EX
|
||||
|
||||
.Lsysenter_fix_flags:
|
||||
pushl $X86_EFLAGS_FIXED
|
||||
popfl
|
||||
jmp .Lsysenter_flags_fixed
|
||||
GLOBAL(__end_SYSENTER_singlestep_region)
|
||||
ENDPROC(entry_SYSENTER_32)
|
||||
|
||||
# system call handler stub
|
||||
/*
|
||||
* 32-bit legacy system call entry.
|
||||
*
|
||||
* 32-bit x86 Linux system calls traditionally used the INT $0x80
|
||||
* instruction. INT $0x80 lands here.
|
||||
*
|
||||
* This entry point can be used by any 32-bit perform system calls.
|
||||
* Instances of INT $0x80 can be found inline in various programs and
|
||||
* libraries. It is also used by the vDSO's __kernel_vsyscall
|
||||
* fallback for hardware that doesn't support a faster entry method.
|
||||
* Restarted 32-bit system calls also fall back to INT $0x80
|
||||
* regardless of what instruction was originally used to do the system
|
||||
* call. (64-bit programs can use INT $0x80 as well, but they can
|
||||
* only run on 64-bit kernels and therefore land in
|
||||
* entry_INT80_compat.)
|
||||
*
|
||||
* This is considered a slow path. It is not used by most libc
|
||||
* implementations on modern hardware except during process startup.
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
* ebx arg1
|
||||
* ecx arg2
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* ebp arg6
|
||||
*/
|
||||
ENTRY(entry_INT80_32)
|
||||
ASM_CLAC
|
||||
pushl %eax /* pt_regs->orig_ax */
|
||||
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
|
||||
|
||||
/*
|
||||
* User mode is traced as though IRQs are on. Unlike the 64-bit
|
||||
* case, INT80 is a trap gate on 32-bit kernels, so interrupts
|
||||
* are already on (unless user code is messing around with iopl).
|
||||
* User mode is traced as though IRQs are on, and the interrupt gate
|
||||
* turned them off.
|
||||
*/
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
movl %esp, %eax
|
||||
call do_syscall_32_irqs_on
|
||||
call do_int80_syscall_32
|
||||
.Lsyscall_32_done:
|
||||
|
||||
restore_all:
|
||||
TRACE_IRQS_IRET
|
||||
restore_all_notrace:
|
||||
#ifdef CONFIG_X86_ESPFIX32
|
||||
ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX
|
||||
|
||||
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
|
||||
/*
|
||||
* Warning: PT_OLDSS(%esp) contains the wrong/random values if we
|
||||
|
@ -387,19 +504,6 @@ ENTRY(iret_exc )
|
|||
|
||||
#ifdef CONFIG_X86_ESPFIX32
|
||||
ldt_ss:
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
/*
|
||||
* The kernel can't run on a non-flat stack if paravirt mode
|
||||
* is active. Rather than try to fixup the high bits of
|
||||
* ESP, bypass this code entirely. This may break DOSemu
|
||||
* and/or Wine support in a paravirt VM, although the option
|
||||
* is still available to implement the setting of the high
|
||||
* 16-bits in the INTERRUPT_RETURN paravirt-op.
|
||||
*/
|
||||
cmpl $0, pv_info+PARAVIRT_enabled
|
||||
jne restore_nocheck
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Setup and switch to ESPFIX stack
|
||||
*
|
||||
|
@ -632,14 +736,6 @@ ENTRY(spurious_interrupt_bug)
|
|||
END(spurious_interrupt_bug)
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
/*
|
||||
* Xen doesn't set %esp to be precisely what the normal SYSENTER
|
||||
* entry point expects, so fix it up before using the normal path.
|
||||
*/
|
||||
ENTRY(xen_sysenter_target)
|
||||
addl $5*4, %esp /* remove xen-provided frame */
|
||||
jmp sysenter_past_esp
|
||||
|
||||
ENTRY(xen_hypervisor_callback)
|
||||
pushl $-1 /* orig_ax = -1 => not a system call */
|
||||
SAVE_ALL
|
||||
|
@ -939,51 +1035,48 @@ error_code:
|
|||
jmp ret_from_exception
|
||||
END(page_fault)
|
||||
|
||||
/*
|
||||
* Debug traps and NMI can happen at the one SYSENTER instruction
|
||||
* that sets up the real kernel stack. Check here, since we can't
|
||||
* allow the wrong stack to be used.
|
||||
*
|
||||
* "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
|
||||
* already pushed 3 words if it hits on the sysenter instruction:
|
||||
* eflags, cs and eip.
|
||||
*
|
||||
* We just load the right stack, and push the three (known) values
|
||||
* by hand onto the new stack - while updating the return eip past
|
||||
* the instruction that would have done it for sysenter.
|
||||
*/
|
||||
.macro FIX_STACK offset ok label
|
||||
cmpw $__KERNEL_CS, 4(%esp)
|
||||
jne \ok
|
||||
\label:
|
||||
movl TSS_sysenter_sp0 + \offset(%esp), %esp
|
||||
pushfl
|
||||
pushl $__KERNEL_CS
|
||||
pushl $sysenter_past_esp
|
||||
.endm
|
||||
|
||||
ENTRY(debug)
|
||||
/*
|
||||
* #DB can happen at the first instruction of
|
||||
* entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this
|
||||
* happens, then we will be running on a very small stack. We
|
||||
* need to detect this condition and switch to the thread
|
||||
* stack before calling any C code at all.
|
||||
*
|
||||
* If you edit this code, keep in mind that NMIs can happen in here.
|
||||
*/
|
||||
ASM_CLAC
|
||||
cmpl $entry_SYSENTER_32, (%esp)
|
||||
jne debug_stack_correct
|
||||
FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
|
||||
debug_stack_correct:
|
||||
pushl $-1 # mark this as an int
|
||||
SAVE_ALL
|
||||
TRACE_IRQS_OFF
|
||||
xorl %edx, %edx # error code 0
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
|
||||
/* Are we currently on the SYSENTER stack? */
|
||||
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
|
||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
||||
jb .Ldebug_from_sysenter_stack
|
||||
|
||||
TRACE_IRQS_OFF
|
||||
call do_debug
|
||||
jmp ret_from_exception
|
||||
|
||||
.Ldebug_from_sysenter_stack:
|
||||
/* We're on the SYSENTER stack. Switch off. */
|
||||
movl %esp, %ebp
|
||||
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
|
||||
TRACE_IRQS_OFF
|
||||
call do_debug
|
||||
movl %ebp, %esp
|
||||
jmp ret_from_exception
|
||||
END(debug)
|
||||
|
||||
/*
|
||||
* NMI is doubly nasty. It can happen _while_ we're handling
|
||||
* a debug fault, and the debug fault hasn't yet been able to
|
||||
* clear up the stack. So we first check whether we got an
|
||||
* NMI on the sysenter entry path, but after that we need to
|
||||
* check whether we got an NMI on the debug path where the debug
|
||||
* fault happened on the sysenter path.
|
||||
* NMI is doubly nasty. It can happen on the first instruction of
|
||||
* entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
|
||||
* of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
|
||||
* switched stacks. We handle both conditions by simply checking whether we
|
||||
* interrupted kernel code running on the SYSENTER stack.
|
||||
*/
|
||||
ENTRY(nmi)
|
||||
ASM_CLAC
|
||||
|
@ -994,41 +1087,32 @@ ENTRY(nmi)
|
|||
popl %eax
|
||||
je nmi_espfix_stack
|
||||
#endif
|
||||
cmpl $entry_SYSENTER_32, (%esp)
|
||||
je nmi_stack_fixup
|
||||
pushl %eax
|
||||
movl %esp, %eax
|
||||
/*
|
||||
* Do not access memory above the end of our stack page,
|
||||
* it might not exist.
|
||||
*/
|
||||
andl $(THREAD_SIZE-1), %eax
|
||||
cmpl $(THREAD_SIZE-20), %eax
|
||||
popl %eax
|
||||
jae nmi_stack_correct
|
||||
cmpl $entry_SYSENTER_32, 12(%esp)
|
||||
je nmi_debug_stack_check
|
||||
nmi_stack_correct:
|
||||
pushl %eax
|
||||
|
||||
pushl %eax # pt_regs->orig_ax
|
||||
SAVE_ALL
|
||||
xorl %edx, %edx # zero error code
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
|
||||
/* Are we currently on the SYSENTER stack? */
|
||||
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
|
||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
||||
jb .Lnmi_from_sysenter_stack
|
||||
|
||||
/* Not on SYSENTER stack. */
|
||||
call do_nmi
|
||||
jmp restore_all_notrace
|
||||
|
||||
nmi_stack_fixup:
|
||||
FIX_STACK 12, nmi_stack_correct, 1
|
||||
jmp nmi_stack_correct
|
||||
|
||||
nmi_debug_stack_check:
|
||||
cmpw $__KERNEL_CS, 16(%esp)
|
||||
jne nmi_stack_correct
|
||||
cmpl $debug, (%esp)
|
||||
jb nmi_stack_correct
|
||||
cmpl $debug_esp_fix_insn, (%esp)
|
||||
ja nmi_stack_correct
|
||||
FIX_STACK 24, nmi_stack_correct, 1
|
||||
jmp nmi_stack_correct
|
||||
.Lnmi_from_sysenter_stack:
|
||||
/*
|
||||
* We're on the SYSENTER stack. Switch off. No one (not even debug)
|
||||
* is using the thread stack right now, so it's safe for us to use it.
|
||||
*/
|
||||
movl %esp, %ebp
|
||||
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
|
||||
call do_nmi
|
||||
movl %ebp, %esp
|
||||
jmp restore_all_notrace
|
||||
|
||||
#ifdef CONFIG_X86_ESPFIX32
|
||||
nmi_espfix_stack:
|
||||
|
|
|
@ -103,6 +103,16 @@ ENDPROC(native_usergs_sysret64)
|
|||
/*
|
||||
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
|
||||
*
|
||||
* This is the only entry point used for 64-bit system calls. The
|
||||
* hardware interface is reasonably well designed and the register to
|
||||
* argument mapping Linux uses fits well with the registers that are
|
||||
* available when SYSCALL is used.
|
||||
*
|
||||
* SYSCALL instructions can be found inlined in libc implementations as
|
||||
* well as some other programs and libraries. There are also a handful
|
||||
* of SYSCALL instructions in the vDSO used, for example, as a
|
||||
* clock_gettimeofday fallback.
|
||||
*
|
||||
* 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
|
||||
* then loads new ss, cs, and rip from previously programmed MSRs.
|
||||
* rflags gets masked by a value from another MSR (so CLD and CLAC
|
||||
|
@ -145,17 +155,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
|
|||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq $__USER_DS /* pt_regs->ss */
|
||||
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
|
||||
/*
|
||||
* Re-enable interrupts.
|
||||
* We use 'rsp_scratch' as a scratch space, hence irq-off block above
|
||||
* must execute atomically in the face of possible interrupt-driven
|
||||
* task preemption. We must enable interrupts only after we're done
|
||||
* with using rsp_scratch:
|
||||
*/
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
pushq %r11 /* pt_regs->flags */
|
||||
pushq $__USER_CS /* pt_regs->cs */
|
||||
pushq %rcx /* pt_regs->ip */
|
||||
|
@ -171,9 +175,21 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
|
|||
pushq %r11 /* pt_regs->r11 */
|
||||
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
|
||||
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz tracesys
|
||||
/*
|
||||
* If we need to do entry work or if we guess we'll need to do
|
||||
* exit work, go straight to the slow path.
|
||||
*/
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz entry_SYSCALL64_slow_path
|
||||
|
||||
entry_SYSCALL_64_fastpath:
|
||||
/*
|
||||
* Easy case: enable interrupts and issue the syscall. If the syscall
|
||||
* needs pt_regs, we'll call a stub that disables interrupts again
|
||||
* and jumps to the slow path.
|
||||
*/
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
#if __SYSCALL_MASK == ~0
|
||||
cmpq $__NR_syscall_max, %rax
|
||||
#else
|
||||
|
@ -182,103 +198,56 @@ entry_SYSCALL_64_fastpath:
|
|||
#endif
|
||||
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
|
||||
movq %r10, %rcx
|
||||
|
||||
/*
|
||||
* This call instruction is handled specially in stub_ptregs_64.
|
||||
* It might end up jumping to the slow path. If it jumps, RAX
|
||||
* and all argument registers are clobbered.
|
||||
*/
|
||||
call *sys_call_table(, %rax, 8)
|
||||
.Lentry_SYSCALL_64_after_fastpath_call:
|
||||
|
||||
movq %rax, RAX(%rsp)
|
||||
1:
|
||||
/*
|
||||
* Syscall return path ending with SYSRET (fast path).
|
||||
* Has incompletely filled pt_regs.
|
||||
*/
|
||||
LOCKDEP_SYS_EXIT
|
||||
|
||||
/*
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
* If we get here, then we know that pt_regs is clean for SYSRET64.
|
||||
* If we see that no exit work is required (which we are required
|
||||
* to check with IRQs off), then we can go straight to SYSRET64.
|
||||
*/
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
||||
/*
|
||||
* We must check ti flags with interrupts (or at least preemption)
|
||||
* off because we must *never* return to userspace without
|
||||
* processing exit work that is enqueued if we're preempted here.
|
||||
* In particular, returning to userspace with any of the one-shot
|
||||
* flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
|
||||
* very bad.
|
||||
*/
|
||||
TRACE_IRQS_OFF
|
||||
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
|
||||
jnz 1f
|
||||
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
LOCKDEP_SYS_EXIT
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQs on */
|
||||
movq RIP(%rsp), %rcx
|
||||
movq EFLAGS(%rsp), %r11
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RSP(%rsp), %rsp
|
||||
/*
|
||||
* 64-bit SYSRET restores rip from rcx,
|
||||
* rflags from r11 (but RF and VM bits are forced to 0),
|
||||
* cs and ss are loaded from MSRs.
|
||||
* Restoration of rflags re-enables interrupts.
|
||||
*
|
||||
* NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
|
||||
* descriptor is not reinitialized. This means that we should
|
||||
* avoid SYSRET with SS == NULL, which could happen if we schedule,
|
||||
* exit the kernel, and re-enter using an interrupt vector. (All
|
||||
* interrupt entries on x86_64 set SS to NULL.) We prevent that
|
||||
* from happening by reloading SS in __switch_to. (Actually
|
||||
* detecting the failure in 64-bit userspace is tricky but can be
|
||||
* done.)
|
||||
*/
|
||||
USERGS_SYSRET64
|
||||
|
||||
GLOBAL(int_ret_from_sys_call_irqs_off)
|
||||
1:
|
||||
/*
|
||||
* The fast path looked good when we started, but something changed
|
||||
* along the way and we need to switch to the slow path. Calling
|
||||
* raise(3) will trigger this, for example. IRQs are off.
|
||||
*/
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
jmp int_ret_from_sys_call
|
||||
|
||||
/* Do syscall entry tracing */
|
||||
tracesys:
|
||||
movq %rsp, %rdi
|
||||
movl $AUDIT_ARCH_X86_64, %esi
|
||||
call syscall_trace_enter_phase1
|
||||
test %rax, %rax
|
||||
jnz tracesys_phase2 /* if needed, run the slow path */
|
||||
RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
|
||||
movq ORIG_RAX(%rsp), %rax
|
||||
jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
|
||||
|
||||
tracesys_phase2:
|
||||
SAVE_EXTRA_REGS
|
||||
movq %rsp, %rdi
|
||||
movl $AUDIT_ARCH_X86_64, %esi
|
||||
movq %rax, %rdx
|
||||
call syscall_trace_enter_phase2
|
||||
|
||||
/*
|
||||
* Reload registers from stack in case ptrace changed them.
|
||||
* We don't reload %rax because syscall_trace_entry_phase2() returned
|
||||
* the value it wants us to use in the table lookup.
|
||||
*/
|
||||
RESTORE_C_REGS_EXCEPT_RAX
|
||||
RESTORE_EXTRA_REGS
|
||||
#if __SYSCALL_MASK == ~0
|
||||
cmpq $__NR_syscall_max, %rax
|
||||
#else
|
||||
andl $__SYSCALL_MASK, %eax
|
||||
cmpl $__NR_syscall_max, %eax
|
||||
#endif
|
||||
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
|
||||
movq %r10, %rcx /* fixup for C */
|
||||
call *sys_call_table(, %rax, 8)
|
||||
movq %rax, RAX(%rsp)
|
||||
1:
|
||||
/* Use IRET because user could have changed pt_regs->foo */
|
||||
|
||||
/*
|
||||
* Syscall return path ending with IRET.
|
||||
* Has correct iret frame.
|
||||
*/
|
||||
GLOBAL(int_ret_from_sys_call)
|
||||
SAVE_EXTRA_REGS
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
jmp return_from_SYSCALL_64
|
||||
|
||||
entry_SYSCALL64_slow_path:
|
||||
/* IRQs are off. */
|
||||
SAVE_EXTRA_REGS
|
||||
movq %rsp, %rdi
|
||||
call do_syscall_64 /* returns with IRQs disabled */
|
||||
|
||||
return_from_SYSCALL_64:
|
||||
RESTORE_EXTRA_REGS
|
||||
TRACE_IRQS_IRETQ /* we're about to change IF */
|
||||
|
||||
|
@ -355,83 +324,45 @@ opportunistic_sysret_failed:
|
|||
jmp restore_c_regs_and_iret
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
|
||||
.macro FORK_LIKE func
|
||||
ENTRY(stub_\func)
|
||||
SAVE_EXTRA_REGS 8
|
||||
jmp sys_\func
|
||||
END(stub_\func)
|
||||
.endm
|
||||
|
||||
FORK_LIKE clone
|
||||
FORK_LIKE fork
|
||||
FORK_LIKE vfork
|
||||
|
||||
ENTRY(stub_execve)
|
||||
call sys_execve
|
||||
return_from_execve:
|
||||
testl %eax, %eax
|
||||
jz 1f
|
||||
/* exec failed, can use fast SYSRET code path in this case */
|
||||
ret
|
||||
1:
|
||||
/* must use IRET code path (pt_regs->cs may have changed) */
|
||||
addq $8, %rsp
|
||||
ZERO_EXTRA_REGS
|
||||
movq %rax, RAX(%rsp)
|
||||
jmp int_ret_from_sys_call
|
||||
END(stub_execve)
|
||||
/*
|
||||
* Remaining execve stubs are only 7 bytes long.
|
||||
* ENTRY() often aligns to 16 bytes, which in this case has no benefits.
|
||||
*/
|
||||
.align 8
|
||||
GLOBAL(stub_execveat)
|
||||
call sys_execveat
|
||||
jmp return_from_execve
|
||||
END(stub_execveat)
|
||||
|
||||
#if defined(CONFIG_X86_X32_ABI)
|
||||
.align 8
|
||||
GLOBAL(stub_x32_execve)
|
||||
call compat_sys_execve
|
||||
jmp return_from_execve
|
||||
END(stub_x32_execve)
|
||||
.align 8
|
||||
GLOBAL(stub_x32_execveat)
|
||||
call compat_sys_execveat
|
||||
jmp return_from_execve
|
||||
END(stub_x32_execveat)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* sigreturn is special because it needs to restore all registers on return.
|
||||
* This cannot be done with SYSRET, so use the IRET return path instead.
|
||||
*/
|
||||
ENTRY(stub_rt_sigreturn)
|
||||
ENTRY(stub_ptregs_64)
|
||||
/*
|
||||
* SAVE_EXTRA_REGS result is not normally needed:
|
||||
* sigreturn overwrites all pt_regs->GPREGS.
|
||||
* But sigreturn can fail (!), and there is no easy way to detect that.
|
||||
* To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
|
||||
* we SAVE_EXTRA_REGS here.
|
||||
* Syscalls marked as needing ptregs land here.
|
||||
* If we are on the fast path, we need to save the extra regs,
|
||||
* which we achieve by trying again on the slow path. If we are on
|
||||
* the slow path, the extra regs are already saved.
|
||||
*
|
||||
* RAX stores a pointer to the C function implementing the syscall.
|
||||
* IRQs are on.
|
||||
*/
|
||||
SAVE_EXTRA_REGS 8
|
||||
call sys_rt_sigreturn
|
||||
return_from_stub:
|
||||
addq $8, %rsp
|
||||
RESTORE_EXTRA_REGS
|
||||
movq %rax, RAX(%rsp)
|
||||
jmp int_ret_from_sys_call
|
||||
END(stub_rt_sigreturn)
|
||||
cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
|
||||
jne 1f
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
ENTRY(stub_x32_rt_sigreturn)
|
||||
SAVE_EXTRA_REGS 8
|
||||
call sys32_x32_rt_sigreturn
|
||||
jmp return_from_stub
|
||||
END(stub_x32_rt_sigreturn)
|
||||
#endif
|
||||
/*
|
||||
* Called from fast path -- disable IRQs again, pop return address
|
||||
* and jump to slow path
|
||||
*/
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
popq %rax
|
||||
jmp entry_SYSCALL64_slow_path
|
||||
|
||||
1:
|
||||
/* Called from C */
|
||||
jmp *%rax /* called from C */
|
||||
END(stub_ptregs_64)
|
||||
|
||||
.macro ptregs_stub func
|
||||
ENTRY(ptregs_\func)
|
||||
leaq \func(%rip), %rax
|
||||
jmp stub_ptregs_64
|
||||
END(ptregs_\func)
|
||||
.endm
|
||||
|
||||
/* Instantiate ptregs_stub for each ptregs-using syscall */
|
||||
#define __SYSCALL_64_QUAL_(sym)
|
||||
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
|
||||
#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
|
||||
#include <asm/syscalls_64.h>
|
||||
|
||||
/*
|
||||
* A newly forked process directly context switches into this address.
|
||||
|
@ -439,7 +370,6 @@ END(stub_x32_rt_sigreturn)
|
|||
* rdi: prev task we switched from
|
||||
*/
|
||||
ENTRY(ret_from_fork)
|
||||
|
||||
LOCK ; btr $TIF_FORK, TI_flags(%r8)
|
||||
|
||||
pushq $0x0002
|
||||
|
@ -447,28 +377,32 @@ ENTRY(ret_from_fork)
|
|||
|
||||
call schedule_tail /* rdi: 'prev' task parameter */
|
||||
|
||||
RESTORE_EXTRA_REGS
|
||||
|
||||
testb $3, CS(%rsp) /* from kernel_thread? */
|
||||
jnz 1f
|
||||
|
||||
/*
|
||||
* By the time we get here, we have no idea whether our pt_regs,
|
||||
* ti flags, and ti status came from the 64-bit SYSCALL fast path,
|
||||
* the slow path, or one of the 32-bit compat paths.
|
||||
* Use IRET code path to return, since it can safely handle
|
||||
* all of the above.
|
||||
* We came from kernel_thread. This code path is quite twisted, and
|
||||
* someone should clean it up.
|
||||
*
|
||||
* copy_thread_tls stashes the function pointer in RBX and the
|
||||
* parameter to be passed in RBP. The called function is permitted
|
||||
* to call do_execve and thereby jump to user mode.
|
||||
*/
|
||||
jnz int_ret_from_sys_call
|
||||
|
||||
/*
|
||||
* We came from kernel_thread
|
||||
* nb: we depend on RESTORE_EXTRA_REGS above
|
||||
*/
|
||||
movq %rbp, %rdi
|
||||
call *%rbx
|
||||
movq RBP(%rsp), %rdi
|
||||
call *RBX(%rsp)
|
||||
movl $0, RAX(%rsp)
|
||||
RESTORE_EXTRA_REGS
|
||||
jmp int_ret_from_sys_call
|
||||
|
||||
/*
|
||||
* Fall through as though we're exiting a syscall. This makes a
|
||||
* twisted sort of sense if we just called do_execve.
|
||||
*/
|
||||
|
||||
1:
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
SWAPGS
|
||||
jmp restore_regs_and_iret
|
||||
END(ret_from_fork)
|
||||
|
||||
/*
|
||||
|
|
|
@ -19,12 +19,21 @@
|
|||
.section .entry.text, "ax"
|
||||
|
||||
/*
|
||||
* 32-bit SYSENTER instruction entry.
|
||||
* 32-bit SYSENTER entry.
|
||||
*
|
||||
* SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
|
||||
* IF and VM in rflags are cleared (IOW: interrupts are off).
|
||||
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
|
||||
* on 64-bit kernels running on Intel CPUs.
|
||||
*
|
||||
* The SYSENTER instruction, in principle, should *only* occur in the
|
||||
* vDSO. In practice, a small number of Android devices were shipped
|
||||
* with a copy of Bionic that inlined a SYSENTER instruction. This
|
||||
* never happened in any of Google's Bionic versions -- it only happened
|
||||
* in a narrow range of Intel-provided versions.
|
||||
*
|
||||
* SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
|
||||
* IF and VM in RFLAGS are cleared (IOW: interrupts are off).
|
||||
* SYSENTER does not save anything on the stack,
|
||||
* and does not save old rip (!!!) and rflags.
|
||||
* and does not save old RIP (!!!), RSP, or RFLAGS.
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
|
@ -35,10 +44,6 @@
|
|||
* edi arg5
|
||||
* ebp user stack
|
||||
* 0(%ebp) arg6
|
||||
*
|
||||
* This is purely a fast path. For anything complicated we use the int 0x80
|
||||
* path below. We set up a complete hardware stack frame to share code
|
||||
* with the int 0x80 path.
|
||||
*/
|
||||
ENTRY(entry_SYSENTER_compat)
|
||||
/* Interrupts are off on entry. */
|
||||
|
@ -66,8 +71,6 @@ ENTRY(entry_SYSENTER_compat)
|
|||
*/
|
||||
pushfq /* pt_regs->flags (except IF = 0) */
|
||||
orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
|
||||
ASM_CLAC /* Clear AC after saving FLAGS */
|
||||
|
||||
pushq $__USER32_CS /* pt_regs->cs */
|
||||
xorq %r8,%r8
|
||||
pushq %r8 /* pt_regs->ip = 0 (placeholder) */
|
||||
|
@ -90,19 +93,25 @@ ENTRY(entry_SYSENTER_compat)
|
|||
cld
|
||||
|
||||
/*
|
||||
* Sysenter doesn't filter flags, so we need to clear NT
|
||||
* SYSENTER doesn't filter flags, so we need to clear NT and AC
|
||||
* ourselves. To save a few cycles, we can check whether
|
||||
* NT was set instead of doing an unconditional popfq.
|
||||
* either was set instead of doing an unconditional popfq.
|
||||
* This needs to happen before enabling interrupts so that
|
||||
* we don't get preempted with NT set.
|
||||
*
|
||||
* If TF is set, we will single-step all the way to here -- do_debug
|
||||
* will ignore all the traps. (Yes, this is slow, but so is
|
||||
* single-stepping in general. This allows us to avoid having
|
||||
* a more complicated code to handle the case where a user program
|
||||
* forces us to single-step through the SYSENTER entry code.)
|
||||
*
|
||||
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
|
||||
* out-of-line as an optimization: NT is unlikely to be set in the
|
||||
* majority of the cases and instead of polluting the I$ unnecessarily,
|
||||
* we're keeping that code behind a branch which will predict as
|
||||
* not-taken and therefore its instructions won't be fetched.
|
||||
*/
|
||||
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
|
||||
testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp)
|
||||
jnz .Lsysenter_fix_flags
|
||||
.Lsysenter_flags_fixed:
|
||||
|
||||
|
@ -123,20 +132,42 @@ ENTRY(entry_SYSENTER_compat)
|
|||
pushq $X86_EFLAGS_FIXED
|
||||
popfq
|
||||
jmp .Lsysenter_flags_fixed
|
||||
GLOBAL(__end_entry_SYSENTER_compat)
|
||||
ENDPROC(entry_SYSENTER_compat)
|
||||
|
||||
/*
|
||||
* 32-bit SYSCALL instruction entry.
|
||||
* 32-bit SYSCALL entry.
|
||||
*
|
||||
* 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
|
||||
* then loads new ss, cs, and rip from previously programmed MSRs.
|
||||
* rflags gets masked by a value from another MSR (so CLD and CLAC
|
||||
* are not needed). SYSCALL does not save anything on the stack
|
||||
* and does not change rsp.
|
||||
* 32-bit system calls through the vDSO's __kernel_vsyscall enter here
|
||||
* on 64-bit kernels running on AMD CPUs.
|
||||
*
|
||||
* Note: rflags saving+masking-with-MSR happens only in Long mode
|
||||
* The SYSCALL instruction, in principle, should *only* occur in the
|
||||
* vDSO. In practice, it appears that this really is the case.
|
||||
* As evidence:
|
||||
*
|
||||
* - The calling convention for SYSCALL has changed several times without
|
||||
* anyone noticing.
|
||||
*
|
||||
* - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
|
||||
* user task that did SYSCALL without immediately reloading SS
|
||||
* would randomly crash.
|
||||
*
|
||||
* - Most programmers do not directly target AMD CPUs, and the 32-bit
|
||||
* SYSCALL instruction does not exist on Intel CPUs. Even on AMD
|
||||
* CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
|
||||
* because the SYSCALL instruction in legacy/native 32-bit mode (as
|
||||
* opposed to compat mode) is sufficiently poorly designed as to be
|
||||
* essentially unusable.
|
||||
*
|
||||
* 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
|
||||
* RFLAGS to R11, then loads new SS, CS, and RIP from previously
|
||||
* programmed MSRs. RFLAGS gets masked by a value from another MSR
|
||||
* (so CLD and CLAC are not needed). SYSCALL does not save anything on
|
||||
* the stack and does not change RSP.
|
||||
*
|
||||
* Note: RFLAGS saving+masking-with-MSR happens only in Long mode
|
||||
* (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
|
||||
* Don't get confused: rflags saving+masking depends on Long Mode Active bit
|
||||
* Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
|
||||
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
|
||||
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
|
||||
*
|
||||
|
@ -236,7 +267,21 @@ sysret32_from_system_call:
|
|||
END(entry_SYSCALL_compat)
|
||||
|
||||
/*
|
||||
* Emulated IA32 system calls via int 0x80.
|
||||
* 32-bit legacy system call entry.
|
||||
*
|
||||
* 32-bit x86 Linux system calls traditionally used the INT $0x80
|
||||
* instruction. INT $0x80 lands here.
|
||||
*
|
||||
* This entry point can be used by 32-bit and 64-bit programs to perform
|
||||
* 32-bit system calls. Instances of INT $0x80 can be found inline in
|
||||
* various programs and libraries. It is also used by the vDSO's
|
||||
* __kernel_vsyscall fallback for hardware that doesn't support a faster
|
||||
* entry method. Restarted 32-bit system calls also fall back to INT
|
||||
* $0x80 regardless of what instruction was originally used to do the
|
||||
* system call.
|
||||
*
|
||||
* This is considered a slow path. It is not used by most libc
|
||||
* implementations on modern hardware except during process startup.
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
|
@ -245,17 +290,8 @@ END(entry_SYSCALL_compat)
|
|||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* ebp arg6 (note: not saved in the stack frame, should not be touched)
|
||||
*
|
||||
* Notes:
|
||||
* Uses the same stack frame as the x86-64 version.
|
||||
* All registers except eax must be saved (but ptrace may violate that).
|
||||
* Arguments are zero extended. For system calls that want sign extension and
|
||||
* take long arguments a wrapper is needed. Most calls can just be called
|
||||
* directly.
|
||||
* Assumes it is only called from user space and entered with interrupts off.
|
||||
* ebp arg6
|
||||
*/
|
||||
|
||||
ENTRY(entry_INT80_compat)
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
|
@ -300,7 +336,7 @@ ENTRY(entry_INT80_compat)
|
|||
TRACE_IRQS_OFF
|
||||
|
||||
movq %rsp, %rdi
|
||||
call do_syscall_32_irqs_off
|
||||
call do_int80_syscall_32
|
||||
.Lsyscall_32_done:
|
||||
|
||||
/* Go back to user mode. */
|
||||
|
|
|
@ -6,17 +6,11 @@
|
|||
#include <asm/asm-offsets.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
#define SYM(sym, compat) compat
|
||||
#else
|
||||
#define SYM(sym, compat) sym
|
||||
#endif
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#include <asm/syscalls_32.h>
|
||||
#undef __SYSCALL_I386
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
|
||||
#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
|
||||
|
||||
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
|
||||
|
|
|
@ -6,19 +6,14 @@
|
|||
#include <asm/asm-offsets.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
|
||||
#define __SYSCALL_64_QUAL_(sym) sym
|
||||
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
|
||||
#else
|
||||
# define __SYSCALL_X32(nr, sym, compat) /* nothing */
|
||||
#endif
|
||||
|
||||
#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
#include <asm/syscalls_64.h>
|
||||
#undef __SYSCALL_64
|
||||
|
||||
#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
|
||||
#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
|
||||
|
||||
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
12 common brk sys_brk
|
||||
13 64 rt_sigaction sys_rt_sigaction
|
||||
14 common rt_sigprocmask sys_rt_sigprocmask
|
||||
15 64 rt_sigreturn stub_rt_sigreturn
|
||||
15 64 rt_sigreturn sys_rt_sigreturn/ptregs
|
||||
16 64 ioctl sys_ioctl
|
||||
17 common pread64 sys_pread64
|
||||
18 common pwrite64 sys_pwrite64
|
||||
|
@ -62,10 +62,10 @@
|
|||
53 common socketpair sys_socketpair
|
||||
54 64 setsockopt sys_setsockopt
|
||||
55 64 getsockopt sys_getsockopt
|
||||
56 common clone stub_clone
|
||||
57 common fork stub_fork
|
||||
58 common vfork stub_vfork
|
||||
59 64 execve stub_execve
|
||||
56 common clone sys_clone/ptregs
|
||||
57 common fork sys_fork/ptregs
|
||||
58 common vfork sys_vfork/ptregs
|
||||
59 64 execve sys_execve/ptregs
|
||||
60 common exit sys_exit
|
||||
61 common wait4 sys_wait4
|
||||
62 common kill sys_kill
|
||||
|
@ -178,7 +178,7 @@
|
|||
169 common reboot sys_reboot
|
||||
170 common sethostname sys_sethostname
|
||||
171 common setdomainname sys_setdomainname
|
||||
172 common iopl sys_iopl
|
||||
172 common iopl sys_iopl/ptregs
|
||||
173 common ioperm sys_ioperm
|
||||
174 64 create_module
|
||||
175 common init_module sys_init_module
|
||||
|
@ -328,7 +328,7 @@
|
|||
319 common memfd_create sys_memfd_create
|
||||
320 common kexec_file_load sys_kexec_file_load
|
||||
321 common bpf sys_bpf
|
||||
322 64 execveat stub_execveat
|
||||
322 64 execveat sys_execveat/ptregs
|
||||
323 common userfaultfd sys_userfaultfd
|
||||
324 common membarrier sys_membarrier
|
||||
325 common mlock2 sys_mlock2
|
||||
|
@ -339,14 +339,14 @@
|
|||
# for native 64-bit operation.
|
||||
#
|
||||
512 x32 rt_sigaction compat_sys_rt_sigaction
|
||||
513 x32 rt_sigreturn stub_x32_rt_sigreturn
|
||||
513 x32 rt_sigreturn sys32_x32_rt_sigreturn
|
||||
514 x32 ioctl compat_sys_ioctl
|
||||
515 x32 readv compat_sys_readv
|
||||
516 x32 writev compat_sys_writev
|
||||
517 x32 recvfrom compat_sys_recvfrom
|
||||
518 x32 sendmsg compat_sys_sendmsg
|
||||
519 x32 recvmsg compat_sys_recvmsg
|
||||
520 x32 execve stub_x32_execve
|
||||
520 x32 execve compat_sys_execve/ptregs
|
||||
521 x32 ptrace compat_sys_ptrace
|
||||
522 x32 rt_sigpending compat_sys_rt_sigpending
|
||||
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
|
||||
|
@ -371,4 +371,4 @@
|
|||
542 x32 getsockopt compat_sys_getsockopt
|
||||
543 x32 io_setup compat_sys_io_setup
|
||||
544 x32 io_submit compat_sys_io_submit
|
||||
545 x32 execveat stub_x32_execveat
|
||||
545 x32 execveat compat_sys_execveat/ptregs
|
||||
|
|
|
@ -3,13 +3,63 @@
|
|||
in="$1"
|
||||
out="$2"
|
||||
|
||||
syscall_macro() {
|
||||
abi="$1"
|
||||
nr="$2"
|
||||
entry="$3"
|
||||
|
||||
# Entry can be either just a function name or "function/qualifier"
|
||||
real_entry="${entry%%/*}"
|
||||
qualifier="${entry:${#real_entry}}" # Strip the function name
|
||||
qualifier="${qualifier:1}" # Strip the slash, if any
|
||||
|
||||
echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
|
||||
}
|
||||
|
||||
emit() {
|
||||
abi="$1"
|
||||
nr="$2"
|
||||
entry="$3"
|
||||
compat="$4"
|
||||
|
||||
if [ "$abi" == "64" -a -n "$compat" ]; then
|
||||
echo "a compat entry for a 64-bit syscall makes no sense" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$compat" ]; then
|
||||
if [ -n "$entry" ]; then
|
||||
syscall_macro "$abi" "$nr" "$entry"
|
||||
fi
|
||||
else
|
||||
echo "#ifdef CONFIG_X86_32"
|
||||
if [ -n "$entry" ]; then
|
||||
syscall_macro "$abi" "$nr" "$entry"
|
||||
fi
|
||||
echo "#else"
|
||||
syscall_macro "$abi" "$nr" "$compat"
|
||||
echo "#endif"
|
||||
fi
|
||||
}
|
||||
|
||||
grep '^[0-9]' "$in" | sort -n | (
|
||||
while read nr abi name entry compat; do
|
||||
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
|
||||
if [ -n "$compat" ]; then
|
||||
echo "__SYSCALL_${abi}($nr, $entry, $compat)"
|
||||
elif [ -n "$entry" ]; then
|
||||
echo "__SYSCALL_${abi}($nr, $entry, $entry)"
|
||||
if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then
|
||||
# COMMON is the same as 64, except that we don't expect X32
|
||||
# programs to use it. Our expectation has nothing to do with
|
||||
# any generated code, so treat them the same.
|
||||
emit 64 "$nr" "$entry" "$compat"
|
||||
elif [ "$abi" == "X32" ]; then
|
||||
# X32 is equivalent to 64 on an X32-compatible kernel.
|
||||
echo "#ifdef CONFIG_X86_X32_ABI"
|
||||
emit 64 "$nr" "$entry" "$compat"
|
||||
echo "#endif"
|
||||
elif [ "$abi" == "I386" ]; then
|
||||
emit "$abi" "$nr" "$entry" "$compat"
|
||||
else
|
||||
echo "Unknown abi $abi" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
) > "$out"
|
||||
|
|
|
@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
|
|||
}
|
||||
fprintf(outfile, "\n};\n\n");
|
||||
|
||||
fprintf(outfile, "static struct page *pages[%lu];\n\n",
|
||||
mapping_size / 4096);
|
||||
|
||||
fprintf(outfile, "const struct vdso_image %s = {\n", name);
|
||||
fprintf(outfile, "\t.data = raw_data,\n");
|
||||
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
|
||||
fprintf(outfile, "\t.text_mapping = {\n");
|
||||
fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
|
||||
fprintf(outfile, "\t\t.pages = pages,\n");
|
||||
fprintf(outfile, "\t},\n");
|
||||
if (alt_sec) {
|
||||
fprintf(outfile, "\t.alt = %lu,\n",
|
||||
(unsigned long)GET_LE(&alt_sec->sh_offset));
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/vdso.h>
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <asm/page.h>
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#if defined(CONFIG_X86_64)
|
||||
unsigned int __read_mostly vdso64_enabled = 1;
|
||||
|
@ -27,13 +28,7 @@ unsigned int __read_mostly vdso64_enabled = 1;
|
|||
|
||||
void __init init_vdso_image(const struct vdso_image *image)
|
||||
{
|
||||
int i;
|
||||
int npages = (image->size) / PAGE_SIZE;
|
||||
|
||||
BUG_ON(image->size % PAGE_SIZE != 0);
|
||||
for (i = 0; i < npages; i++)
|
||||
image->text_mapping.pages[i] =
|
||||
virt_to_page(image->data + i*PAGE_SIZE);
|
||||
|
||||
apply_alternatives((struct alt_instr *)(image->data + image->alt),
|
||||
(struct alt_instr *)(image->data + image->alt +
|
||||
|
@ -90,18 +85,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
|
|||
#endif
|
||||
}
|
||||
|
||||
static int vdso_fault(const struct vm_special_mapping *sm,
|
||||
struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
|
||||
|
||||
if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
|
||||
get_page(vmf->page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct vm_special_mapping text_mapping = {
|
||||
.name = "[vdso]",
|
||||
.fault = vdso_fault,
|
||||
};
|
||||
|
||||
static int vvar_fault(const struct vm_special_mapping *sm,
|
||||
struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
|
||||
long sym_offset;
|
||||
int ret = -EFAULT;
|
||||
|
||||
if (!image)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
|
||||
image->sym_vvar_start;
|
||||
|
||||
/*
|
||||
* Sanity check: a symbol offset of zero means that the page
|
||||
* does not exist for this vdso image, not that the page is at
|
||||
* offset zero relative to the text mapping. This should be
|
||||
* impossible here, because sym_offset should only be zero for
|
||||
* the page past the end of the vvar mapping.
|
||||
*/
|
||||
if (sym_offset == 0)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
if (sym_offset == image->sym_vvar_page) {
|
||||
ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
|
||||
__pa_symbol(&__vvar_page) >> PAGE_SHIFT);
|
||||
} else if (sym_offset == image->sym_hpet_page) {
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
|
||||
ret = vm_insert_pfn_prot(
|
||||
vma,
|
||||
(unsigned long)vmf->virtual_address,
|
||||
hpet_address >> PAGE_SHIFT,
|
||||
pgprot_noncached(PAGE_READONLY));
|
||||
}
|
||||
#endif
|
||||
} else if (sym_offset == image->sym_pvclock_page) {
|
||||
struct pvclock_vsyscall_time_info *pvti =
|
||||
pvclock_pvti_cpu0_va();
|
||||
if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
|
||||
ret = vm_insert_pfn(
|
||||
vma,
|
||||
(unsigned long)vmf->virtual_address,
|
||||
__pa(pvti) >> PAGE_SHIFT);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == 0 || ret == -EBUSY)
|
||||
return VM_FAULT_NOPAGE;
|
||||
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long addr, text_start;
|
||||
int ret = 0;
|
||||
static struct page *no_pages[] = {NULL};
|
||||
static struct vm_special_mapping vvar_mapping = {
|
||||
static const struct vm_special_mapping vvar_mapping = {
|
||||
.name = "[vvar]",
|
||||
.pages = no_pages,
|
||||
.fault = vvar_fault,
|
||||
};
|
||||
struct pvclock_vsyscall_time_info *pvti;
|
||||
|
||||
if (calculate_addr) {
|
||||
addr = vdso_addr(current->mm->start_stack,
|
||||
|
@ -121,6 +185,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
|||
|
||||
text_start = addr - image->sym_vvar_start;
|
||||
current->mm->context.vdso = (void __user *)text_start;
|
||||
current->mm->context.vdso_image = image;
|
||||
|
||||
/*
|
||||
* MAYWRITE to allow gdb to COW and set breakpoints
|
||||
|
@ -130,7 +195,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
|||
image->size,
|
||||
VM_READ|VM_EXEC|
|
||||
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
|
||||
&image->text_mapping);
|
||||
&text_mapping);
|
||||
|
||||
if (IS_ERR(vma)) {
|
||||
ret = PTR_ERR(vma);
|
||||
|
@ -140,7 +205,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
|||
vma = _install_special_mapping(mm,
|
||||
addr,
|
||||
-image->sym_vvar_start,
|
||||
VM_READ|VM_MAYREAD,
|
||||
VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
|
||||
VM_PFNMAP,
|
||||
&vvar_mapping);
|
||||
|
||||
if (IS_ERR(vma)) {
|
||||
|
@ -148,41 +214,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
|||
goto up_fail;
|
||||
}
|
||||
|
||||
if (image->sym_vvar_page)
|
||||
ret = remap_pfn_range(vma,
|
||||
text_start + image->sym_vvar_page,
|
||||
__pa_symbol(&__vvar_page) >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
PAGE_READONLY);
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
if (hpet_address && image->sym_hpet_page) {
|
||||
ret = io_remap_pfn_range(vma,
|
||||
text_start + image->sym_hpet_page,
|
||||
hpet_address >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
pgprot_noncached(PAGE_READONLY));
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
}
|
||||
#endif
|
||||
|
||||
pvti = pvclock_pvti_cpu0_va();
|
||||
if (pvti && image->sym_pvclock_page) {
|
||||
ret = remap_pfn_range(vma,
|
||||
text_start + image->sym_pvclock_page,
|
||||
__pa(pvti) >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
PAGE_READONLY);
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
up_fail:
|
||||
if (ret)
|
||||
current->mm->context.vdso = NULL;
|
||||
|
@ -254,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg)
|
|||
#ifdef CONFIG_NUMA
|
||||
node = cpu_to_node(cpu);
|
||||
#endif
|
||||
if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
|
||||
if (static_cpu_has(X86_FEATURE_RDTSCP))
|
||||
write_rdtscp_aux((node << 12) | cpu);
|
||||
|
||||
/*
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#include <asm/vgtod.h>
|
||||
#include <asm/vvar.h>
|
||||
|
||||
int vclocks_used __read_mostly;
|
||||
|
||||
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
|
||||
|
||||
void update_vsyscall_tz(void)
|
||||
|
@ -26,12 +28,17 @@ void update_vsyscall_tz(void)
|
|||
|
||||
void update_vsyscall(struct timekeeper *tk)
|
||||
{
|
||||
int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
|
||||
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
|
||||
|
||||
/* Mark the new vclock used. */
|
||||
BUILD_BUG_ON(VCLOCK_MAX >= 32);
|
||||
WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
|
||||
|
||||
gtod_write_begin(vdata);
|
||||
|
||||
/* copy vsyscall data */
|
||||
vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
|
||||
vdata->vclock_mode = vclock_mode;
|
||||
vdata->cycle_last = tk->tkr_mono.cycle_last;
|
||||
vdata->mask = tk->tkr_mono.mask;
|
||||
vdata->mult = tk->tkr_mono.mult;
|
||||
|
|
|
@ -151,12 +151,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
|
|||
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
|
||||
".popsection"
|
||||
|
||||
/*
|
||||
* This must be included *after* the definition of ALTERNATIVE due to
|
||||
* <asm/arch_hweight.h>
|
||||
*/
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
/*
|
||||
* Alternative instructions for different CPU types or capabilities.
|
||||
*
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#ifndef _ASM_X86_HWEIGHT_H
|
||||
#define _ASM_X86_HWEIGHT_H
|
||||
|
||||
#include <asm/cpufeatures.h>
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
/* popcnt %edi, %eax -- redundant REX prefix for alignment */
|
||||
#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
|
||||
|
|
|
@ -91,7 +91,7 @@ set_bit(long nr, volatile unsigned long *addr)
|
|||
* If it's called on the same region of memory simultaneously, the effect
|
||||
* may be that only one operation succeeds.
|
||||
*/
|
||||
static inline void __set_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
|
||||
}
|
||||
|
@ -128,13 +128,13 @@ clear_bit(long nr, volatile unsigned long *addr)
|
|||
* clear_bit() is atomic and implies release semantics before the memory
|
||||
* operation. It can be used for an unlock.
|
||||
*/
|
||||
static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
barrier();
|
||||
clear_bit(nr, addr);
|
||||
}
|
||||
|
||||
static inline void __clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
|
||||
}
|
||||
|
@ -151,7 +151,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr)
|
|||
* No memory barrier is required here, because x86 cannot reorder stores past
|
||||
* older loads. Same principle as spin_unlock.
|
||||
*/
|
||||
static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
barrier();
|
||||
__clear_bit(nr, addr);
|
||||
|
@ -166,7 +166,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
|
|||
* If it's called on the same region of memory simultaneously, the effect
|
||||
* may be that only one operation succeeds.
|
||||
*/
|
||||
static inline void __change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr)
|
|||
* Note that @nr may be almost arbitrarily large; this function is not
|
||||
* restricted to acting on a single-word quantity.
|
||||
*/
|
||||
static inline void change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
if (IS_IMMEDIATE(nr)) {
|
||||
asm volatile(LOCK_PREFIX "xorb %1,%0"
|
||||
|
@ -201,7 +201,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
|
|||
* This operation is atomic and cannot be reordered.
|
||||
* It also implies a memory barrier.
|
||||
*/
|
||||
static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
|
||||
}
|
||||
|
@ -228,7 +228,7 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
|
|||
* If two examples of this operation race, one can appear to succeed
|
||||
* but actually fail. You must protect multiple accesses with a lock.
|
||||
*/
|
||||
static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
int oldbit;
|
||||
|
||||
|
@ -247,7 +247,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
|
|||
* This operation is atomic and cannot be reordered.
|
||||
* It also implies a memory barrier.
|
||||
*/
|
||||
static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
|
||||
}
|
||||
|
@ -268,7 +268,7 @@ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
|
|||
* accessed from a hypervisor on the same CPU if running in a VM: don't change
|
||||
* this without also updating arch/x86/kernel/kvm.c
|
||||
*/
|
||||
static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
int oldbit;
|
||||
|
||||
|
@ -280,7 +280,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
|
|||
}
|
||||
|
||||
/* WARNING: non atomic and it can be reordered! */
|
||||
static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
int oldbit;
|
||||
|
||||
|
@ -300,7 +300,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
|
|||
* This operation is atomic and cannot be reordered.
|
||||
* It also implies a memory barrier.
|
||||
*/
|
||||
static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
|
||||
}
|
||||
|
@ -311,7 +311,7 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo
|
|||
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
|
||||
}
|
||||
|
||||
static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
|
||||
static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
|
||||
{
|
||||
int oldbit;
|
||||
|
||||
|
@ -343,7 +343,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
|
|||
*
|
||||
* Undefined if no bit exists, so code should check against 0 first.
|
||||
*/
|
||||
static inline unsigned long __ffs(unsigned long word)
|
||||
static __always_inline unsigned long __ffs(unsigned long word)
|
||||
{
|
||||
asm("rep; bsf %1,%0"
|
||||
: "=r" (word)
|
||||
|
@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsigned long word)
|
|||
*
|
||||
* Undefined if no zero exists, so code should check against ~0UL first.
|
||||
*/
|
||||
static inline unsigned long ffz(unsigned long word)
|
||||
static __always_inline unsigned long ffz(unsigned long word)
|
||||
{
|
||||
asm("rep; bsf %1,%0"
|
||||
: "=r" (word)
|
||||
|
@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word)
|
|||
*
|
||||
* Undefined if no set bit exists, so code should check against 0 first.
|
||||
*/
|
||||
static inline unsigned long __fls(unsigned long word)
|
||||
static __always_inline unsigned long __fls(unsigned long word)
|
||||
{
|
||||
asm("bsr %1,%0"
|
||||
: "=r" (word)
|
||||
|
@ -393,7 +393,7 @@ static inline unsigned long __fls(unsigned long word)
|
|||
* set bit if value is nonzero. The first (least significant) bit
|
||||
* is at position 1.
|
||||
*/
|
||||
static inline int ffs(int x)
|
||||
static __always_inline int ffs(int x)
|
||||
{
|
||||
int r;
|
||||
|
||||
|
@ -434,7 +434,7 @@ static inline int ffs(int x)
|
|||
* set bit if value is nonzero. The last (most significant) bit is
|
||||
* at position 32.
|
||||
*/
|
||||
static inline int fls(int x)
|
||||
static __always_inline int fls(int x)
|
||||
{
|
||||
int r;
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
|
||||
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
|
||||
#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
|
||||
#define VCLOCK_MAX 3
|
||||
|
||||
struct arch_clocksource_data {
|
||||
int vclock_mode;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#define ASM_X86_CMPXCHG_H
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,288 +1,7 @@
|
|||
/*
|
||||
* Defines x86 CPU feature bits
|
||||
*/
|
||||
#ifndef _ASM_X86_CPUFEATURE_H
|
||||
#define _ASM_X86_CPUFEATURE_H
|
||||
|
||||
#ifndef _ASM_X86_REQUIRED_FEATURES_H
|
||||
#include <asm/required-features.h>
|
||||
#endif
|
||||
|
||||
#ifndef _ASM_X86_DISABLED_FEATURES_H
|
||||
#include <asm/disabled-features.h>
|
||||
#endif
|
||||
|
||||
#define NCAPINTS 16 /* N 32-bit words worth of info */
|
||||
#define NBUGINTS 1 /* N 32-bit bug flags */
|
||||
|
||||
/*
|
||||
* Note: If the comment begins with a quoted string, that string is used
|
||||
* in /proc/cpuinfo instead of the macro name. If the string is "",
|
||||
* this feature bit is not displayed in /proc/cpuinfo at all.
|
||||
*/
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
|
||||
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
|
||||
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
|
||||
#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
|
||||
#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
|
||||
#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
|
||||
#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
|
||||
#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
|
||||
#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
|
||||
#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
|
||||
#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
|
||||
#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
|
||||
#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
|
||||
#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
|
||||
#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
|
||||
#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
|
||||
/* (plus FCMOVcc, FCOMI with FPU) */
|
||||
#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
|
||||
#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
|
||||
#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
|
||||
#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
|
||||
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
|
||||
#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
|
||||
#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
|
||||
#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
|
||||
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
|
||||
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
|
||||
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
|
||||
#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
|
||||
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
|
||||
#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
|
||||
#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
|
||||
/* Don't duplicate feature flags which are redundant with Intel! */
|
||||
#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
|
||||
#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
|
||||
#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
|
||||
#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
|
||||
#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
|
||||
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
|
||||
#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
|
||||
#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
|
||||
#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
|
||||
#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
|
||||
|
||||
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
|
||||
#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
|
||||
#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
|
||||
#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
|
||||
|
||||
/* Other features, Linux-defined mapping, word 3 */
|
||||
/* This range is used for feature bits which conflict or are synthesized */
|
||||
#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
|
||||
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
|
||||
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
|
||||
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
|
||||
/* cpu types for specific tunings: */
|
||||
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
|
||||
#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
|
||||
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
|
||||
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
|
||||
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
|
||||
#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
|
||||
/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
|
||||
#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
|
||||
#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
|
||||
#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
|
||||
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
|
||||
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
|
||||
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
|
||||
#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
|
||||
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
|
||||
/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
|
||||
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
|
||||
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
|
||||
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
|
||||
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
|
||||
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
|
||||
/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
|
||||
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
|
||||
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
|
||||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
|
||||
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
|
||||
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
||||
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
|
||||
#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
|
||||
#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
|
||||
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
|
||||
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
|
||||
#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
|
||||
#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
|
||||
#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
|
||||
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
|
||||
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
|
||||
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
|
||||
#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
|
||||
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
|
||||
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
|
||||
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
|
||||
#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
|
||||
#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
|
||||
#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
|
||||
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
|
||||
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
|
||||
#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
|
||||
#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
|
||||
#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
|
||||
#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
|
||||
#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
|
||||
#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
|
||||
#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
|
||||
#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
|
||||
#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
|
||||
#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
|
||||
#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
|
||||
|
||||
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
|
||||
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
|
||||
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
|
||||
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
|
||||
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
|
||||
#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
|
||||
#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
|
||||
#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
|
||||
#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
|
||||
#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
|
||||
#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
|
||||
|
||||
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
|
||||
#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
|
||||
#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
|
||||
#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
|
||||
#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
|
||||
#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
|
||||
#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
|
||||
#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
|
||||
#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
|
||||
#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
|
||||
#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
|
||||
#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
|
||||
#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
|
||||
#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
|
||||
#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
|
||||
#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
|
||||
#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
|
||||
#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
|
||||
#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
|
||||
#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
|
||||
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
|
||||
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
|
||||
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
|
||||
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
|
||||
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
|
||||
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
|
||||
|
||||
/*
|
||||
* Auxiliary flags: Linux defined - For features scattered in various
|
||||
* CPUID levels like 0x6, 0xA etc, word 7.
|
||||
*
|
||||
* Reuse free bits when adding new feature flags!
|
||||
*/
|
||||
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
|
||||
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
|
||||
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
||||
|
||||
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
||||
|
||||
/* Virtualization flags: Linux defined, word 8 */
|
||||
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
|
||||
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
|
||||
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
|
||||
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
|
||||
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
|
||||
|
||||
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
|
||||
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
|
||||
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
|
||||
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
|
||||
#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
|
||||
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
|
||||
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
|
||||
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
|
||||
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
|
||||
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
|
||||
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
|
||||
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
|
||||
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
|
||||
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
|
||||
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
|
||||
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
|
||||
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
|
||||
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
|
||||
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
|
||||
#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
|
||||
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
|
||||
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
|
||||
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
|
||||
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
|
||||
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
|
||||
#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
|
||||
|
||||
/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
|
||||
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
|
||||
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
|
||||
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
|
||||
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
|
||||
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
|
||||
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
|
||||
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
|
||||
|
||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
|
||||
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
||||
#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
|
||||
#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
|
||||
#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
|
||||
#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
|
||||
#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
|
||||
#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
|
||||
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
|
||||
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
|
||||
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
|
||||
|
||||
/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
|
||||
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
|
||||
#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
|
||||
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
|
||||
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
|
||||
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
|
||||
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
|
||||
#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
|
||||
#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
|
||||
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
|
||||
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
*/
|
||||
#define X86_BUG(x) (NCAPINTS*32 + (x))
|
||||
|
||||
#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
|
||||
#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
|
||||
#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
|
||||
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
|
||||
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
|
||||
#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
|
||||
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
|
||||
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
|
||||
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
|
||||
#include <asm/processor.h>
|
||||
|
||||
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
|
||||
|
||||
|
@ -369,8 +88,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
|
|||
* is not relevant.
|
||||
*/
|
||||
#define cpu_feature_enabled(bit) \
|
||||
(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \
|
||||
cpu_has(&boot_cpu_data, bit))
|
||||
(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
|
||||
|
||||
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
|
||||
|
||||
|
@ -406,106 +124,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
|
|||
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
|
||||
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
|
||||
/*
|
||||
* Do not add any more of those clumsy macros - use static_cpu_has_safe() for
|
||||
* Do not add any more of those clumsy macros - use static_cpu_has() for
|
||||
* fast paths and boot_cpu_has() otherwise!
|
||||
*/
|
||||
|
||||
#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
|
||||
extern void warn_pre_alternatives(void);
|
||||
extern bool __static_cpu_has_safe(u16 bit);
|
||||
|
||||
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
|
||||
/*
|
||||
* Static testing of CPU features. Used the same as boot_cpu_has().
|
||||
* These are only valid after alternatives have run, but will statically
|
||||
* patch the target code for additional performance.
|
||||
* These will statically patch the target code for additional
|
||||
* performance.
|
||||
*/
|
||||
static __always_inline __pure bool __static_cpu_has(u16 bit)
|
||||
static __always_inline __pure bool _static_cpu_has(u16 bit)
|
||||
{
|
||||
#ifdef CC_HAVE_ASM_GOTO
|
||||
|
||||
#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
|
||||
|
||||
/*
|
||||
* Catch too early usage of this before alternatives
|
||||
* have run.
|
||||
*/
|
||||
asm_volatile_goto("1: jmp %l[t_warn]\n"
|
||||
"2:\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n"
|
||||
" .long 0\n" /* no replacement */
|
||||
" .word %P0\n" /* 1: do replace */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 0\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
/* skipping size check since replacement size = 0 */
|
||||
: : "i" (X86_FEATURE_ALWAYS) : : t_warn);
|
||||
|
||||
#endif
|
||||
|
||||
asm_volatile_goto("1: jmp %l[t_no]\n"
|
||||
"2:\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n"
|
||||
" .long 0\n" /* no replacement */
|
||||
" .word %P0\n" /* feature bit */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 0\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
/* skipping size check since replacement size = 0 */
|
||||
: : "i" (bit) : : t_no);
|
||||
return true;
|
||||
t_no:
|
||||
return false;
|
||||
|
||||
#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
|
||||
t_warn:
|
||||
warn_pre_alternatives();
|
||||
return false;
|
||||
#endif
|
||||
|
||||
#else /* CC_HAVE_ASM_GOTO */
|
||||
|
||||
u8 flag;
|
||||
/* Open-coded due to __stringify() in ALTERNATIVE() */
|
||||
asm volatile("1: movb $0,%0\n"
|
||||
"2:\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n"
|
||||
" .long 3f - .\n"
|
||||
" .word %P1\n" /* feature bit */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 4f - 3f\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
".section .discard,\"aw\",@progbits\n"
|
||||
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
|
||||
".previous\n"
|
||||
".section .altinstr_replacement,\"ax\"\n"
|
||||
"3: movb $1,%0\n"
|
||||
"4:\n"
|
||||
".previous\n"
|
||||
: "=qm" (flag) : "i" (bit));
|
||||
return flag;
|
||||
|
||||
#endif /* CC_HAVE_ASM_GOTO */
|
||||
}
|
||||
|
||||
#define static_cpu_has(bit) \
|
||||
( \
|
||||
__builtin_constant_p(boot_cpu_has(bit)) ? \
|
||||
boot_cpu_has(bit) : \
|
||||
__builtin_constant_p(bit) ? \
|
||||
__static_cpu_has(bit) : \
|
||||
boot_cpu_has(bit) \
|
||||
)
|
||||
|
||||
static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
|
||||
{
|
||||
#ifdef CC_HAVE_ASM_GOTO
|
||||
asm_volatile_goto("1: jmp %l[t_dynamic]\n"
|
||||
asm_volatile_goto("1: jmp 6f\n"
|
||||
"2:\n"
|
||||
".skip -(((5f-4f) - (2b-1b)) > 0) * "
|
||||
"((5f-4f) - (2b-1b)),0x90\n"
|
||||
|
@ -530,66 +161,34 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
|
|||
" .byte 0\n" /* repl len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
: : "i" (bit), "i" (X86_FEATURE_ALWAYS)
|
||||
: : t_dynamic, t_no);
|
||||
".section .altinstr_aux,\"ax\"\n"
|
||||
"6:\n"
|
||||
" testb %[bitnum],%[cap_byte]\n"
|
||||
" jnz %l[t_yes]\n"
|
||||
" jmp %l[t_no]\n"
|
||||
".previous\n"
|
||||
: : "i" (bit), "i" (X86_FEATURE_ALWAYS),
|
||||
[bitnum] "i" (1 << (bit & 7)),
|
||||
[cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
|
||||
: : t_yes, t_no);
|
||||
t_yes:
|
||||
return true;
|
||||
t_no:
|
||||
return false;
|
||||
t_dynamic:
|
||||
return __static_cpu_has_safe(bit);
|
||||
#else
|
||||
u8 flag;
|
||||
/* Open-coded due to __stringify() in ALTERNATIVE() */
|
||||
asm volatile("1: movb $2,%0\n"
|
||||
"2:\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n" /* src offset */
|
||||
" .long 3f - .\n" /* repl offset */
|
||||
" .word %P2\n" /* always replace */
|
||||
" .byte 2b - 1b\n" /* source len */
|
||||
" .byte 4f - 3f\n" /* replacement len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
".section .discard,\"aw\",@progbits\n"
|
||||
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
|
||||
".previous\n"
|
||||
".section .altinstr_replacement,\"ax\"\n"
|
||||
"3: movb $0,%0\n"
|
||||
"4:\n"
|
||||
".previous\n"
|
||||
".section .altinstructions,\"a\"\n"
|
||||
" .long 1b - .\n" /* src offset */
|
||||
" .long 5f - .\n" /* repl offset */
|
||||
" .word %P1\n" /* feature bit */
|
||||
" .byte 4b - 3b\n" /* src len */
|
||||
" .byte 6f - 5f\n" /* repl len */
|
||||
" .byte 0\n" /* pad len */
|
||||
".previous\n"
|
||||
".section .discard,\"aw\",@progbits\n"
|
||||
" .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
|
||||
".previous\n"
|
||||
".section .altinstr_replacement,\"ax\"\n"
|
||||
"5: movb $1,%0\n"
|
||||
"6:\n"
|
||||
".previous\n"
|
||||
: "=qm" (flag)
|
||||
: "i" (bit), "i" (X86_FEATURE_ALWAYS));
|
||||
return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
|
||||
#endif /* CC_HAVE_ASM_GOTO */
|
||||
}
|
||||
|
||||
#define static_cpu_has_safe(bit) \
|
||||
#define static_cpu_has(bit) \
|
||||
( \
|
||||
__builtin_constant_p(boot_cpu_has(bit)) ? \
|
||||
boot_cpu_has(bit) : \
|
||||
_static_cpu_has_safe(bit) \
|
||||
_static_cpu_has(bit) \
|
||||
)
|
||||
#else
|
||||
/*
|
||||
* gcc 3.x is too stupid to do the static test; fall back to dynamic.
|
||||
* Fall back to dynamic for gcc versions which don't support asm goto. Should be
|
||||
* a minority now anyway.
|
||||
*/
|
||||
#define static_cpu_has(bit) boot_cpu_has(bit)
|
||||
#define static_cpu_has_safe(bit) boot_cpu_has(bit)
|
||||
#endif
|
||||
|
||||
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
|
||||
|
@ -597,7 +196,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
|
|||
#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit))
|
||||
|
||||
#define static_cpu_has_bug(bit) static_cpu_has((bit))
|
||||
#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit))
|
||||
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
|
||||
|
||||
#define MAX_CPU_FEATURES (NCAPINTS * 32)
|
||||
|
|
|
@ -0,0 +1,300 @@
|
|||
#ifndef _ASM_X86_CPUFEATURES_H
|
||||
#define _ASM_X86_CPUFEATURES_H
|
||||
|
||||
#ifndef _ASM_X86_REQUIRED_FEATURES_H
|
||||
#include <asm/required-features.h>
|
||||
#endif
|
||||
|
||||
#ifndef _ASM_X86_DISABLED_FEATURES_H
|
||||
#include <asm/disabled-features.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Defines x86 CPU feature bits
|
||||
*/
|
||||
#define NCAPINTS 16 /* N 32-bit words worth of info */
|
||||
#define NBUGINTS 1 /* N 32-bit bug flags */
|
||||
|
||||
/*
|
||||
* Note: If the comment begins with a quoted string, that string is used
|
||||
* in /proc/cpuinfo instead of the macro name. If the string is "",
|
||||
* this feature bit is not displayed in /proc/cpuinfo at all.
|
||||
*/
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
|
||||
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
|
||||
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
|
||||
#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
|
||||
#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
|
||||
#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
|
||||
#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
|
||||
#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
|
||||
#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
|
||||
#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
|
||||
#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
|
||||
#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
|
||||
#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
|
||||
#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
|
||||
#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
|
||||
#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
|
||||
/* (plus FCMOVcc, FCOMI with FPU) */
|
||||
#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
|
||||
#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
|
||||
#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
|
||||
#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
|
||||
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
|
||||
#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
|
||||
#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
|
||||
#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
|
||||
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
|
||||
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
|
||||
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
|
||||
#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
|
||||
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
|
||||
#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
|
||||
#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
|
||||
/* Don't duplicate feature flags which are redundant with Intel! */
|
||||
#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
|
||||
#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
|
||||
#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
|
||||
#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
|
||||
#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
|
||||
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
|
||||
#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
|
||||
#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
|
||||
#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
|
||||
#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
|
||||
|
||||
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
|
||||
#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
|
||||
#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
|
||||
#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
|
||||
|
||||
/* Other features, Linux-defined mapping, word 3 */
|
||||
/* This range is used for feature bits which conflict or are synthesized */
|
||||
#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
|
||||
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
|
||||
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
|
||||
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
|
||||
/* cpu types for specific tunings: */
|
||||
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
|
||||
#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
|
||||
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
|
||||
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
|
||||
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
|
||||
#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
|
||||
/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
|
||||
#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
|
||||
#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
|
||||
#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
|
||||
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
|
||||
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
|
||||
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
|
||||
#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
|
||||
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
|
||||
/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
|
||||
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
|
||||
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
|
||||
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
|
||||
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
|
||||
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
|
||||
/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
|
||||
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
|
||||
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
|
||||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
|
||||
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
|
||||
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
|
||||
#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
||||
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
|
||||
#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
|
||||
#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
|
||||
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
|
||||
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
|
||||
#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
|
||||
#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
|
||||
#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
|
||||
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
|
||||
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
|
||||
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
|
||||
#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
|
||||
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
|
||||
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
|
||||
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
|
||||
#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
|
||||
#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
|
||||
#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
|
||||
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
|
||||
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
|
||||
#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
|
||||
#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
|
||||
#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
|
||||
#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
|
||||
#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
|
||||
#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
|
||||
#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
|
||||
#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
|
||||
#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
|
||||
#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
|
||||
#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
|
||||
|
||||
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
|
||||
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
|
||||
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
|
||||
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
|
||||
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
|
||||
#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
|
||||
#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
|
||||
#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
|
||||
#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
|
||||
#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
|
||||
#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
|
||||
|
||||
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
|
||||
#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
|
||||
#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
|
||||
#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
|
||||
#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
|
||||
#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
|
||||
#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
|
||||
#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
|
||||
#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
|
||||
#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
|
||||
#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
|
||||
#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
|
||||
#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
|
||||
#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
|
||||
#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
|
||||
#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
|
||||
#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
|
||||
#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
|
||||
#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
|
||||
#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
|
||||
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
|
||||
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
|
||||
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
|
||||
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
|
||||
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
|
||||
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
|
||||
|
||||
/*
|
||||
* Auxiliary flags: Linux defined - For features scattered in various
|
||||
* CPUID levels like 0x6, 0xA etc, word 7.
|
||||
*
|
||||
* Reuse free bits when adding new feature flags!
|
||||
*/
|
||||
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
|
||||
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
|
||||
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
||||
|
||||
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
||||
|
||||
/* Virtualization flags: Linux defined, word 8 */
|
||||
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
|
||||
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
|
||||
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
|
||||
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
|
||||
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
|
||||
|
||||
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
|
||||
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
|
||||
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
|
||||
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
|
||||
#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
|
||||
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
|
||||
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
|
||||
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
|
||||
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
|
||||
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
|
||||
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
|
||||
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
|
||||
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
|
||||
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
|
||||
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
|
||||
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
|
||||
#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
|
||||
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
|
||||
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
|
||||
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
|
||||
#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
|
||||
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
|
||||
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
|
||||
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
|
||||
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
|
||||
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
|
||||
#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
|
||||
#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
|
||||
#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
|
||||
|
||||
/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
|
||||
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
|
||||
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
|
||||
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
|
||||
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
|
||||
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
|
||||
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
|
||||
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
|
||||
|
||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
|
||||
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
||||
#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
|
||||
#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
|
||||
#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
|
||||
#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
|
||||
#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
|
||||
#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
|
||||
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
|
||||
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
|
||||
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
|
||||
|
||||
/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
|
||||
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
|
||||
#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
|
||||
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
|
||||
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
|
||||
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
|
||||
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
|
||||
#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
|
||||
#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
|
||||
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
|
||||
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
|
||||
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
*/
|
||||
#define X86_BUG(x) (NCAPINTS*32 + (x))
|
||||
|
||||
#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
|
||||
#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
|
||||
#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
|
||||
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
|
||||
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
|
||||
#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
|
||||
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
|
||||
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
|
||||
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
|
||||
* to avoid confusion.
|
||||
*/
|
||||
#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
|
@ -98,4 +98,27 @@ struct desc_ptr {
|
|||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
/* Access rights as returned by LAR */
|
||||
#define AR_TYPE_RODATA (0 * (1 << 9))
|
||||
#define AR_TYPE_RWDATA (1 * (1 << 9))
|
||||
#define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9))
|
||||
#define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9))
|
||||
#define AR_TYPE_XOCODE (4 * (1 << 9))
|
||||
#define AR_TYPE_XRCODE (5 * (1 << 9))
|
||||
#define AR_TYPE_XOCODE_CONF (6 * (1 << 9))
|
||||
#define AR_TYPE_XRCODE_CONF (7 * (1 << 9))
|
||||
#define AR_TYPE_MASK (7 * (1 << 9))
|
||||
|
||||
#define AR_DPL0 (0 * (1 << 13))
|
||||
#define AR_DPL3 (3 * (1 << 13))
|
||||
#define AR_DPL_MASK (3 * (1 << 13))
|
||||
|
||||
#define AR_A (1 << 8) /* "Accessed" */
|
||||
#define AR_S (1 << 12) /* If clear, "System" segment */
|
||||
#define AR_P (1 << 15) /* "Present" */
|
||||
#define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */
|
||||
#define AR_L (1 << 21) /* "Long mode" for code segments */
|
||||
#define AR_DB (1 << 22) /* D/B, effect depends on type */
|
||||
#define AR_G (1 << 23) /* "Granularity" (limit in pages) */
|
||||
|
||||
#endif /* _ASM_X86_DESC_DEFS_H */
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <asm/user.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/fpu/xstate.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
/*
|
||||
* High level FPU state handling functions:
|
||||
|
@ -58,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
|
|||
*/
|
||||
static __always_inline __pure bool use_eager_fpu(void)
|
||||
{
|
||||
return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
|
||||
return static_cpu_has(X86_FEATURE_EAGER_FPU);
|
||||
}
|
||||
|
||||
static __always_inline __pure bool use_xsaveopt(void)
|
||||
{
|
||||
return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
|
||||
return static_cpu_has(X86_FEATURE_XSAVEOPT);
|
||||
}
|
||||
|
||||
static __always_inline __pure bool use_xsave(void)
|
||||
{
|
||||
return static_cpu_has_safe(X86_FEATURE_XSAVE);
|
||||
return static_cpu_has(X86_FEATURE_XSAVE);
|
||||
}
|
||||
|
||||
static __always_inline __pure bool use_fxsr(void)
|
||||
{
|
||||
return static_cpu_has_safe(X86_FEATURE_FXSR);
|
||||
return static_cpu_has(X86_FEATURE_FXSR);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -300,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
|
|||
|
||||
WARN_ON(system_state != SYSTEM_BOOTING);
|
||||
|
||||
if (static_cpu_has_safe(X86_FEATURE_XSAVES))
|
||||
if (static_cpu_has(X86_FEATURE_XSAVES))
|
||||
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
|
||||
else
|
||||
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
|
||||
|
@ -322,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
|
|||
|
||||
WARN_ON(system_state != SYSTEM_BOOTING);
|
||||
|
||||
if (static_cpu_has_safe(X86_FEATURE_XSAVES))
|
||||
if (static_cpu_has(X86_FEATURE_XSAVES))
|
||||
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
|
||||
else
|
||||
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
|
||||
|
@ -460,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
|
|||
* pending. Clear the x87 state here by setting it to fixed values.
|
||||
* "m" is a random variable that should be in L1.
|
||||
*/
|
||||
if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
|
||||
if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
|
||||
asm volatile(
|
||||
"fnclex\n\t"
|
||||
"emms\n\t"
|
||||
|
|
|
@ -1,23 +1,44 @@
|
|||
#ifdef __ASSEMBLY__
|
||||
#ifndef _ASM_X86_FRAME_H
|
||||
#define _ASM_X86_FRAME_H
|
||||
|
||||
#include <asm/asm.h>
|
||||
|
||||
/* The annotation hides the frame from the unwinder and makes it look
|
||||
like a ordinary ebp save/restore. This avoids some special cases for
|
||||
frame pointer later */
|
||||
/*
|
||||
* These are stack frame creation macros. They should be used by every
|
||||
* callable non-leaf asm function to make kernel stack traces more reliable.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
.macro FRAME
|
||||
__ASM_SIZE(push,) %__ASM_REG(bp)
|
||||
__ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp)
|
||||
.endm
|
||||
.macro ENDFRAME
|
||||
__ASM_SIZE(pop,) %__ASM_REG(bp)
|
||||
.endm
|
||||
#else
|
||||
.macro FRAME
|
||||
.endm
|
||||
.macro ENDFRAME
|
||||
.endm
|
||||
#endif
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
.macro FRAME_BEGIN
|
||||
push %_ASM_BP
|
||||
_ASM_MOV %_ASM_SP, %_ASM_BP
|
||||
.endm
|
||||
|
||||
.macro FRAME_END
|
||||
pop %_ASM_BP
|
||||
.endm
|
||||
|
||||
#else /* !__ASSEMBLY__ */
|
||||
|
||||
#define FRAME_BEGIN \
|
||||
"push %" _ASM_BP "\n" \
|
||||
_ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n"
|
||||
|
||||
#define FRAME_END "pop %" _ASM_BP "\n"
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#define FRAME_OFFSET __ASM_SEL(4, 8)
|
||||
|
||||
#else /* !CONFIG_FRAME_POINTER */
|
||||
|
||||
#define FRAME_BEGIN
|
||||
#define FRAME_END
|
||||
#define FRAME_OFFSET 0
|
||||
|
||||
#endif /* CONFIG_FRAME_POINTER */
|
||||
|
||||
#endif /* _ASM_X86_FRAME_H */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef _ASM_IRQ_WORK_H
|
||||
#define _ASM_IRQ_WORK_H
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
static inline bool arch_irq_work_has_interrupt(void)
|
||||
{
|
||||
|
|
|
@ -135,6 +135,7 @@ struct mca_config {
|
|||
bool ignore_ce;
|
||||
bool disabled;
|
||||
bool ser;
|
||||
bool recovery;
|
||||
bool bios_cmci_threshold;
|
||||
u8 banks;
|
||||
s8 bootlog;
|
||||
|
|
|
@ -19,7 +19,8 @@ typedef struct {
|
|||
#endif
|
||||
|
||||
struct mutex lock;
|
||||
void __user *vdso;
|
||||
void __user *vdso; /* vdso base address */
|
||||
const struct vdso_image *vdso_image; /* vdso image in use */
|
||||
|
||||
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
|
||||
} mm_context_t;
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
#ifndef _ASM_X86_MSR_INDEX_H
|
||||
#define _ASM_X86_MSR_INDEX_H
|
||||
|
||||
/* CPU model specific register (MSR) numbers */
|
||||
/*
|
||||
* CPU model specific register (MSR) numbers.
|
||||
*
|
||||
* Do not add new entries to this file unless the definitions are shared
|
||||
* between multiple compilation units.
|
||||
*/
|
||||
|
||||
/* x86-64 specific MSRs */
|
||||
#define MSR_EFER 0xc0000080 /* extended feature register */
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#define MWAIT_SUBSTATE_MASK 0xf
|
||||
#define MWAIT_CSTATE_MASK 0xf
|
||||
#define MWAIT_SUBSTATE_SIZE 4
|
||||
|
|
|
@ -13,7 +13,7 @@ struct vm86;
|
|||
#include <asm/types.h>
|
||||
#include <uapi/asm/sigcontext.h>
|
||||
#include <asm/current.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/percpu.h>
|
||||
|
@ -24,7 +24,6 @@ struct vm86;
|
|||
#include <asm/fpu/types.h>
|
||||
|
||||
#include <linux/personality.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/math64.h>
|
||||
|
@ -300,10 +299,13 @@ struct tss_struct {
|
|||
*/
|
||||
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Space for the temporary SYSENTER stack:
|
||||
* Space for the temporary SYSENTER stack.
|
||||
*/
|
||||
unsigned long SYSENTER_stack_canary;
|
||||
unsigned long SYSENTER_stack[64];
|
||||
#endif
|
||||
|
||||
} ____cacheline_aligned;
|
||||
|
||||
|
|
|
@ -7,12 +7,23 @@
|
|||
|
||||
void syscall_init(void);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
void entry_SYSCALL_64(void);
|
||||
void entry_SYSCALL_compat(void);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
void entry_INT80_32(void);
|
||||
void entry_INT80_compat(void);
|
||||
void entry_SYSENTER_32(void);
|
||||
void __begin_SYSENTER_singlestep_region(void);
|
||||
void __end_SYSENTER_singlestep_region(void);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
void entry_SYSENTER_compat(void);
|
||||
void __end_entry_SYSENTER_compat(void);
|
||||
void entry_SYSCALL_compat(void);
|
||||
void entry_INT80_compat(void);
|
||||
#endif
|
||||
|
||||
void x86_configure_nx(void);
|
||||
void x86_report_nx(void);
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
X86_EFLAGS_CF | X86_EFLAGS_RF)
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
|
||||
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
|
||||
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
|
||||
struct pt_regs *regs, unsigned long mask);
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
#include <linux/stringify.h>
|
||||
#include <asm/nops.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
|
||||
/* "Raw" instruction opcodes */
|
||||
#define __ASM_CLAC .byte 0x0f,0x01,0xca
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#endif
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/cpumask.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
extern int smp_num_siblings;
|
||||
extern unsigned int num_processors;
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
*/
|
||||
#ifndef __ASSEMBLY__
|
||||
struct task_struct;
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
struct thread_info {
|
||||
|
@ -134,10 +134,13 @@ struct thread_info {
|
|||
#define _TIF_ADDR32 (1 << TIF_ADDR32)
|
||||
#define _TIF_X32 (1 << TIF_X32)
|
||||
|
||||
/* work to do in syscall_trace_enter() */
|
||||
/*
|
||||
* work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
|
||||
* enter_from_user_mode()
|
||||
*/
|
||||
#define _TIF_WORK_SYSCALL_ENTRY \
|
||||
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
|
||||
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
|
||||
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
|
||||
_TIF_NOHZ)
|
||||
|
||||
/* work to do on any return to user space */
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <linux/sched.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/special_insns.h>
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#include <linux/errno.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
/*
|
||||
|
|
|
@ -13,9 +13,6 @@ struct vdso_image {
|
|||
void *data;
|
||||
unsigned long size; /* Always a multiple of PAGE_SIZE */
|
||||
|
||||
/* text_mapping.pages is big enough for data/size page pointers */
|
||||
struct vm_special_mapping text_mapping;
|
||||
|
||||
unsigned long alt, alt_len;
|
||||
|
||||
long sym_vvar_start; /* Negative offset to the vvar area */
|
||||
|
|
|
@ -37,6 +37,12 @@ struct vsyscall_gtod_data {
|
|||
};
|
||||
extern struct vsyscall_gtod_data vsyscall_gtod_data;
|
||||
|
||||
extern int vclocks_used;
|
||||
static inline bool vclock_was_used(int vclock)
|
||||
{
|
||||
return READ_ONCE(vclocks_used) & (1 << vclock);
|
||||
}
|
||||
|
||||
static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
|
||||
{
|
||||
unsigned ret;
|
||||
|
|
|
@ -256,7 +256,7 @@ struct sigcontext_64 {
|
|||
__u16 cs;
|
||||
__u16 gs;
|
||||
__u16 fs;
|
||||
__u16 __pad0;
|
||||
__u16 ss;
|
||||
__u64 err;
|
||||
__u64 trapno;
|
||||
__u64 oldmask;
|
||||
|
@ -341,9 +341,37 @@ struct sigcontext {
|
|||
__u64 rip;
|
||||
__u64 eflags; /* RFLAGS */
|
||||
__u16 cs;
|
||||
|
||||
/*
|
||||
* Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
|
||||
* Linux saved and restored fs and gs in these slots. This
|
||||
* was counterproductive, as fsbase and gsbase were never
|
||||
* saved, so arch_prctl was presumably unreliable.
|
||||
*
|
||||
* These slots should never be reused without extreme caution:
|
||||
*
|
||||
* - Some DOSEMU versions stash fs and gs in these slots manually,
|
||||
* thus overwriting anything the kernel expects to be preserved
|
||||
* in these slots.
|
||||
*
|
||||
* - If these slots are ever needed for any other purpose,
|
||||
* there is some risk that very old 64-bit binaries could get
|
||||
* confused. I doubt that many such binaries still work,
|
||||
* though, since the same patch in 2.5.64 also removed the
|
||||
* 64-bit set_thread_area syscall, so it appears that there
|
||||
* is no TLS API beyond modify_ldt that works in both pre-
|
||||
* and post-2.5.64 kernels.
|
||||
*
|
||||
* If the kernel ever adds explicit fs, gs, fsbase, and gsbase
|
||||
* save/restore, it will most likely need to be opt-in and use
|
||||
* different context slots.
|
||||
*/
|
||||
__u16 gs;
|
||||
__u16 fs;
|
||||
__u16 __pad0;
|
||||
union {
|
||||
__u16 ss; /* If UC_SIGCONTEXT_SS */
|
||||
__u16 __pad0; /* Alias name for old (!UC_SIGCONTEXT_SS) user-space */
|
||||
};
|
||||
__u64 err;
|
||||
__u64 trapno;
|
||||
__u64 oldmask;
|
||||
|
|
|
@ -1,11 +1,54 @@
|
|||
#ifndef _ASM_X86_UCONTEXT_H
|
||||
#define _ASM_X86_UCONTEXT_H
|
||||
|
||||
#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state
|
||||
* information in the memory layout pointed
|
||||
* by the fpstate pointer in the ucontext's
|
||||
* sigcontext struct (uc_mcontext).
|
||||
/*
|
||||
* Indicates the presence of extended state information in the memory
|
||||
* layout pointed by the fpstate pointer in the ucontext's sigcontext
|
||||
* struct (uc_mcontext).
|
||||
*/
|
||||
#define UC_FP_XSTATE 0x1
|
||||
|
||||
#ifdef __x86_64__
|
||||
/*
|
||||
* UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
|
||||
* kernels that save SS in the sigcontext. All kernels that set
|
||||
* UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
|
||||
* regardless of SS (i.e. they implement espfix).
|
||||
*
|
||||
* Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
|
||||
* when delivering a signal that came from 64-bit code.
|
||||
*
|
||||
* Sigreturn restores SS as follows:
|
||||
*
|
||||
* if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
|
||||
* saved CS is not 64-bit)
|
||||
* new SS = saved SS (will fail IRET and signal if invalid)
|
||||
* else
|
||||
* new SS = a flat 32-bit data segment
|
||||
*
|
||||
* This behavior serves three purposes:
|
||||
*
|
||||
* - Legacy programs that construct a 64-bit sigcontext from scratch
|
||||
* with zero or garbage in the SS slot (e.g. old CRIU) and call
|
||||
* sigreturn will still work.
|
||||
*
|
||||
* - Old DOSEMU versions sometimes catch a signal from a segmented
|
||||
* context, delete the old SS segment (with modify_ldt), and change
|
||||
* the saved CS to a 64-bit segment. These DOSEMU versions expect
|
||||
* sigreturn to send them back to 64-bit mode without killing them,
|
||||
* despite the fact that the SS selector when the signal was raised is
|
||||
* no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel
|
||||
* will fix up SS for these DOSEMU versions.
|
||||
*
|
||||
* - Old and new programs that catch a signal and return without
|
||||
* modifying the saved context will end up in exactly the state they
|
||||
* started in, even if they were running in a segmented context when
|
||||
* the signal was raised.. Old kernels would lose track of the
|
||||
* previous SS value.
|
||||
*/
|
||||
#define UC_SIGCONTEXT_SS 0x2
|
||||
#define UC_STRICT_RESTORE_SS 0x4
|
||||
#endif
|
||||
|
||||
#include <asm-generic/ucontext.h>
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
|
|||
unsigned long value;
|
||||
unsigned int id = (x >> 24) & 0xff;
|
||||
|
||||
if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
|
||||
if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
|
||||
rdmsrl(MSR_FAM10H_NODE_ID, value);
|
||||
id |= (value << 2) & 0xff00;
|
||||
}
|
||||
|
@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
|
|||
this_cpu_write(cpu_llc_id, node);
|
||||
|
||||
/* Account for nodes per socket in multi-core-module processors */
|
||||
if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
|
||||
if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
|
||||
rdmsrl(MSR_FAM10H_NODE_ID, val);
|
||||
nodes = ((val >> 3) & 7) + 1;
|
||||
}
|
||||
|
|
|
@ -59,7 +59,6 @@ void common(void) {
|
|||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
BLANK();
|
||||
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
|
||||
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
|
||||
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
|
||||
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include <linux/lguest.h>
|
||||
#include "../../../drivers/lguest/lg.h"
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
|
||||
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
|
||||
static char syscalls[] = {
|
||||
#include <asm/syscalls_32.h>
|
||||
};
|
||||
|
@ -52,6 +52,11 @@ void foo(void)
|
|||
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
|
||||
offsetofend(struct tss_struct, SYSENTER_stack));
|
||||
|
||||
/* Offset from cpu_tss to SYSENTER_stack */
|
||||
OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
|
||||
/* Size of SYSENTER_stack */
|
||||
DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
|
||||
|
||||
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
|
||||
BLANK();
|
||||
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
|
||||
|
|
|
@ -4,17 +4,11 @@
|
|||
|
||||
#include <asm/ia32.h>
|
||||
|
||||
#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
|
||||
#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
# define __SYSCALL_X32(nr, sym, compat) [nr] = 1,
|
||||
#else
|
||||
# define __SYSCALL_X32(nr, sym, compat) /* nothing */
|
||||
#endif
|
||||
#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
|
||||
static char syscalls_64[] = {
|
||||
#include <asm/syscalls_64.h>
|
||||
};
|
||||
#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
|
||||
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
|
||||
static char syscalls_ia32[] = {
|
||||
#include <asm/syscalls_32.h>
|
||||
};
|
||||
|
|
|
@ -42,7 +42,7 @@ ifdef CONFIG_X86_FEATURE_NAMES
|
|||
quiet_cmd_mkcapflags = MKCAP $@
|
||||
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
|
||||
|
||||
cpufeature = $(src)/../../include/asm/cpufeature.h
|
||||
cpufeature = $(src)/../../include/asm/cpufeatures.h
|
||||
|
||||
targets += capflags.c
|
||||
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include <linux/bitops.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/msr.h>
|
||||
|
|
|
@ -800,6 +800,31 @@ static void detect_nopl(struct cpuinfo_x86 *c)
|
|||
clear_cpu_cap(c, X86_FEATURE_NOPL);
|
||||
#else
|
||||
set_cpu_cap(c, X86_FEATURE_NOPL);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ESPFIX is a strange bug. All real CPUs have it. Paravirt
|
||||
* systems that run Linux at CPL > 0 may or may not have the
|
||||
* issue, but, even if they have the issue, there's absolutely
|
||||
* nothing we can do about it because we can't use the real IRET
|
||||
* instruction.
|
||||
*
|
||||
* NB: For the time being, only 32-bit kernels support
|
||||
* X86_BUG_ESPFIX as such. 64-bit kernels directly choose
|
||||
* whether to apply espfix using paravirt hooks. If any
|
||||
* non-paravirt system ever shows up that does *not* have the
|
||||
* ESPFIX issue, we can change this.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
do {
|
||||
extern void native_iret(void);
|
||||
if (pv_cpu_ops.iret == native_iret)
|
||||
set_cpu_bug(c, X86_BUG_ESPFIX);
|
||||
} while (0);
|
||||
#else
|
||||
set_cpu_bug(c, X86_BUG_ESPFIX);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1475,20 +1500,6 @@ void cpu_init(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
|
||||
void warn_pre_alternatives(void)
|
||||
{
|
||||
WARN(1, "You're using static_cpu_has before alternatives have run!\n");
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(warn_pre_alternatives);
|
||||
#endif
|
||||
|
||||
inline bool __static_cpu_has_safe(u16 bit)
|
||||
{
|
||||
return boot_cpu_has(bit);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
|
||||
|
||||
static void bsp_resume(void)
|
||||
{
|
||||
if (this_cpu->c_bsp_resume)
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <linux/timer.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/tsc.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/bugs.h>
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#include <linux/sysfs.h>
|
||||
#include <linux/pci.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/amd_nb.h>
|
||||
#include <asm/smp.h>
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
|
|
|
@ -1578,6 +1578,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
|||
|
||||
if (c->x86 == 6 && c->x86_model == 45)
|
||||
quirk_no_way_out = quirk_sandybridge_ifu;
|
||||
/*
|
||||
* MCG_CAP.MCG_SER_P is necessary but not sufficient to know
|
||||
* whether this processor will actually generate recoverable
|
||||
* machine checks. Check to see if this is an E7 model Xeon.
|
||||
* We can't do a model number check because E5 and E7 use the
|
||||
* same model number. E5 doesn't support recovery, E7 does.
|
||||
*/
|
||||
if (mca_cfg.recovery || (mca_cfg.ser &&
|
||||
!strncmp(c->x86_model_id,
|
||||
"Intel(R) Xeon(R) CPU E7-", 24)))
|
||||
set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
|
||||
}
|
||||
if (cfg->monarch_timeout < 0)
|
||||
cfg->monarch_timeout = 0;
|
||||
|
@ -2030,6 +2041,8 @@ static int __init mcheck_enable(char *str)
|
|||
cfg->bootlog = (str[0] == 'b');
|
||||
else if (!strcmp(str, "bios_cmci_threshold"))
|
||||
cfg->bios_cmci_threshold = true;
|
||||
else if (!strcmp(str, "recovery"))
|
||||
cfg->recovery = true;
|
||||
else if (isdigit(str[0])) {
|
||||
if (get_option(&str, &cfg->tolerant) == 2)
|
||||
get_option(&str, &(cfg->monarch_timeout));
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
|
||||
# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
|
||||
#
|
||||
|
||||
IN=$1
|
||||
|
@ -49,8 +49,8 @@ dump_array()
|
|||
trap 'rm "$OUT"' EXIT
|
||||
|
||||
(
|
||||
echo "#ifndef _ASM_X86_CPUFEATURE_H"
|
||||
echo "#include <asm/cpufeature.h>"
|
||||
echo "#ifndef _ASM_X86_CPUFEATURES_H"
|
||||
echo "#include <asm/cpufeatures.h>"
|
||||
echo "#endif"
|
||||
echo ""
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
#include <linux/smp.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/msr.h>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/msr.h>
|
||||
#include "cpu.h"
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <asm/e820.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
/*
|
||||
* The e820 map is the map that gets modified e.g. with command line parameters
|
||||
|
|
|
@ -51,6 +51,9 @@ void fpu__xstate_clear_all_cpu_caps(void)
|
|||
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
|
||||
setup_clear_cpu_cap(X86_FEATURE_MPX);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
|
||||
}
|
||||
|
|
|
@ -697,9 +697,8 @@ static inline void tramp_free(void *tramp) { }
|
|||
#endif
|
||||
|
||||
/* Defined as markers to the end of the ftrace default trampolines */
|
||||
extern void ftrace_caller_end(void);
|
||||
extern void ftrace_regs_caller_end(void);
|
||||
extern void ftrace_return(void);
|
||||
extern void ftrace_epilogue(void);
|
||||
extern void ftrace_caller_op_ptr(void);
|
||||
extern void ftrace_regs_caller_op_ptr(void);
|
||||
|
||||
|
@ -746,7 +745,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
|||
op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
|
||||
} else {
|
||||
start_offset = (unsigned long)ftrace_caller;
|
||||
end_offset = (unsigned long)ftrace_caller_end;
|
||||
end_offset = (unsigned long)ftrace_epilogue;
|
||||
op_offset = (unsigned long)ftrace_caller_op_ptr;
|
||||
}
|
||||
|
||||
|
@ -754,7 +753,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
|||
|
||||
/*
|
||||
* Allocate enough size to store the ftrace_caller code,
|
||||
* the jmp to ftrace_return, as well as the address of
|
||||
* the jmp to ftrace_epilogue, as well as the address of
|
||||
* the ftrace_ops this trampoline is used for.
|
||||
*/
|
||||
trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *));
|
||||
|
@ -772,8 +771,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
|||
|
||||
ip = (unsigned long)trampoline + size;
|
||||
|
||||
/* The trampoline ends with a jmp to ftrace_return */
|
||||
jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
|
||||
/* The trampoline ends with a jmp to ftrace_epilogue */
|
||||
jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue);
|
||||
memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
|
||||
|
||||
/*
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include <asm/setup.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/msr-index.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/nops.h>
|
||||
#include <asm/bootparam.h>
|
||||
|
|
|
@ -38,7 +38,6 @@
|
|||
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
||||
|
||||
L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
|
||||
L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
|
||||
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
|
||||
L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/pm.h>
|
||||
#include <linux/io.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/irqdomain.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/hpet.h>
|
||||
|
|
|
@ -168,12 +168,14 @@ GLOBAL(ftrace_call)
|
|||
restore_mcount_regs
|
||||
|
||||
/*
|
||||
* The copied trampoline must call ftrace_return as it
|
||||
* The copied trampoline must call ftrace_epilogue as it
|
||||
* still may need to call the function graph tracer.
|
||||
*
|
||||
* The code up to this label is copied into trampolines so
|
||||
* think twice before adding any new code or changing the
|
||||
* layout here.
|
||||
*/
|
||||
GLOBAL(ftrace_caller_end)
|
||||
|
||||
GLOBAL(ftrace_return)
|
||||
GLOBAL(ftrace_epilogue)
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
GLOBAL(ftrace_graph_call)
|
||||
|
@ -244,14 +246,14 @@ GLOBAL(ftrace_regs_call)
|
|||
popfq
|
||||
|
||||
/*
|
||||
* As this jmp to ftrace_return can be a short jump
|
||||
* As this jmp to ftrace_epilogue can be a short jump
|
||||
* it must not be copied into the trampoline.
|
||||
* The trampoline will add the code to jump
|
||||
* to the return.
|
||||
*/
|
||||
GLOBAL(ftrace_regs_caller_end)
|
||||
|
||||
jmp ftrace_return
|
||||
jmp ftrace_epilogue
|
||||
|
||||
END(ftrace_regs_caller)
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <linux/gfp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
static struct class *msr_class;
|
||||
|
|
|
@ -57,6 +57,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
|||
*/
|
||||
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
|
||||
#endif
|
||||
#ifdef CONFIG_X86_32
|
||||
.SYSENTER_stack_canary = STACK_END_MAGIC,
|
||||
#endif
|
||||
};
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_tss);
|
||||
|
||||
|
|
|
@ -61,7 +61,38 @@
|
|||
regs->seg = GET_SEG(seg) | 3; \
|
||||
} while (0)
|
||||
|
||||
int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* If regs->ss will cause an IRET fault, change it. Otherwise leave it
|
||||
* alone. Using this generally makes no sense unless
|
||||
* user_64bit_mode(regs) would return true.
|
||||
*/
|
||||
static void force_valid_ss(struct pt_regs *regs)
|
||||
{
|
||||
u32 ar;
|
||||
asm volatile ("lar %[old_ss], %[ar]\n\t"
|
||||
"jz 1f\n\t" /* If invalid: */
|
||||
"xorl %[ar], %[ar]\n\t" /* set ar = 0 */
|
||||
"1:"
|
||||
: [ar] "=r" (ar)
|
||||
: [old_ss] "rm" ((u16)regs->ss));
|
||||
|
||||
/*
|
||||
* For a valid 64-bit user context, we need DPL 3, type
|
||||
* read-write data or read-write exp-down data, and S and P
|
||||
* set. We can't use VERW because VERW doesn't check the
|
||||
* P bit.
|
||||
*/
|
||||
ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
|
||||
if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
|
||||
ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
|
||||
regs->ss = __USER_DS;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int restore_sigcontext(struct pt_regs *regs,
|
||||
struct sigcontext __user *sc,
|
||||
unsigned long uc_flags)
|
||||
{
|
||||
unsigned long buf_val;
|
||||
void __user *buf;
|
||||
|
@ -94,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
|
|||
COPY(r15);
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
COPY_SEG_CPL3(cs);
|
||||
COPY_SEG_CPL3(ss);
|
||||
#else /* !CONFIG_X86_32 */
|
||||
/* Kernel saves and restores only the CS segment register on signals,
|
||||
* which is the bare minimum needed to allow mixed 32/64-bit code.
|
||||
* App's signal handler can save/restore other segments if needed. */
|
||||
COPY_SEG_CPL3(cs);
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Fix up SS if needed for the benefit of old DOSEMU and
|
||||
* CRIU.
|
||||
*/
|
||||
if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
|
||||
user_64bit_mode(regs)))
|
||||
force_valid_ss(regs);
|
||||
#endif
|
||||
|
||||
get_user_ex(tmpflags, &sc->flags);
|
||||
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
|
||||
|
@ -165,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
|
|||
put_user_ex(regs->cs, &sc->cs);
|
||||
put_user_ex(0, &sc->gs);
|
||||
put_user_ex(0, &sc->fs);
|
||||
put_user_ex(regs->ss, &sc->ss);
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
put_user_ex(fpstate, &sc->fpstate);
|
||||
|
@ -403,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
|
|||
return 0;
|
||||
}
|
||||
#else /* !CONFIG_X86_32 */
|
||||
static unsigned long frame_uc_flags(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (cpu_has_xsave)
|
||||
flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
|
||||
else
|
||||
flags = UC_SIGCONTEXT_SS;
|
||||
|
||||
if (likely(user_64bit_mode(regs)))
|
||||
flags |= UC_STRICT_RESTORE_SS;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static int __setup_rt_frame(int sig, struct ksignal *ksig,
|
||||
sigset_t *set, struct pt_regs *regs)
|
||||
{
|
||||
|
@ -422,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
|
|||
|
||||
put_user_try {
|
||||
/* Create the ucontext. */
|
||||
if (cpu_has_xsave)
|
||||
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
|
||||
put_user_ex(0, &frame->uc.uc_link);
|
||||
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
|
||||
|
||||
|
@ -459,10 +506,28 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
|
|||
|
||||
regs->sp = (unsigned long)frame;
|
||||
|
||||
/* Set up the CS register to run signal handlers in 64-bit mode,
|
||||
even if the handler happens to be interrupting 32-bit code. */
|
||||
/*
|
||||
* Set up the CS and SS registers to run signal handlers in
|
||||
* 64-bit mode, even if the handler happens to be interrupting
|
||||
* 32-bit or 16-bit code.
|
||||
*
|
||||
* SS is subtle. In 64-bit mode, we don't need any particular
|
||||
* SS descriptor, but we do need SS to be valid. It's possible
|
||||
* that the old SS is entirely bogus -- this can happen if the
|
||||
* signal we're trying to deliver is #GP or #SS caused by a bad
|
||||
* SS value. We also have a compatbility issue here: DOSEMU
|
||||
* relies on the contents of the SS register indicating the
|
||||
* SS value at the time of the signal, even though that code in
|
||||
* DOSEMU predates sigreturn's ability to restore SS. (DOSEMU
|
||||
* avoids relying on sigreturn to restore SS; instead it uses
|
||||
* a trampoline.) So we do our best: if the old SS was valid,
|
||||
* we keep it. Otherwise we replace it.
|
||||
*/
|
||||
regs->cs = __USER_CS;
|
||||
|
||||
if (unlikely(regs->ss != __USER_DS))
|
||||
force_valid_ss(regs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
@ -489,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
|
|||
|
||||
put_user_try {
|
||||
/* Create the ucontext. */
|
||||
if (cpu_has_xsave)
|
||||
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
|
||||
put_user_ex(0, &frame->uc.uc_link);
|
||||
compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
|
||||
put_user_ex(0, &frame->uc.uc__pad0);
|
||||
|
@ -554,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void)
|
|||
|
||||
set_current_blocked(&set);
|
||||
|
||||
if (restore_sigcontext(regs, &frame->sc))
|
||||
/*
|
||||
* x86_32 has no uc_flags bits relevant to restore_sigcontext.
|
||||
* Save a few cycles by skipping the __get_user.
|
||||
*/
|
||||
if (restore_sigcontext(regs, &frame->sc, 0))
|
||||
goto badframe;
|
||||
return regs->ax;
|
||||
|
||||
|
@ -570,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void)
|
|||
struct pt_regs *regs = current_pt_regs();
|
||||
struct rt_sigframe __user *frame;
|
||||
sigset_t set;
|
||||
unsigned long uc_flags;
|
||||
|
||||
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
|
||||
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
|
||||
goto badframe;
|
||||
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
|
||||
goto badframe;
|
||||
if (__get_user(uc_flags, &frame->uc.uc_flags))
|
||||
goto badframe;
|
||||
|
||||
set_current_blocked(&set);
|
||||
|
||||
if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
|
||||
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
|
||||
goto badframe;
|
||||
|
||||
if (restore_altstack(&frame->uc.uc_stack))
|
||||
|
@ -692,12 +761,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
|||
|
||||
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
|
||||
{
|
||||
#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
|
||||
#ifdef CONFIG_X86_64
|
||||
if (is_ia32_task())
|
||||
return __NR_ia32_restart_syscall;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
|
||||
#else
|
||||
return __NR_restart_syscall;
|
||||
#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
|
||||
return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
|
||||
__NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
|
||||
#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -763,6 +835,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
|
|||
struct pt_regs *regs = current_pt_regs();
|
||||
struct rt_sigframe_x32 __user *frame;
|
||||
sigset_t set;
|
||||
unsigned long uc_flags;
|
||||
|
||||
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
|
||||
|
||||
|
@ -770,10 +843,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
|
|||
goto badframe;
|
||||
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
|
||||
goto badframe;
|
||||
if (__get_user(uc_flags, &frame->uc.uc_flags))
|
||||
goto badframe;
|
||||
|
||||
set_current_blocked(&set);
|
||||
|
||||
if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
|
||||
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
|
||||
goto badframe;
|
||||
|
||||
if (compat_restore_altstack(&frame->uc.uc_stack))
|
||||
|
|
|
@ -83,32 +83,18 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
|
|||
DECLARE_BITMAP(used_vectors, NR_VECTORS);
|
||||
EXPORT_SYMBOL_GPL(used_vectors);
|
||||
|
||||
static inline void conditional_sti(struct pt_regs *regs)
|
||||
static inline void cond_local_irq_enable(struct pt_regs *regs)
|
||||
{
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static inline void preempt_conditional_sti(struct pt_regs *regs)
|
||||
{
|
||||
preempt_count_inc();
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static inline void conditional_cli(struct pt_regs *regs)
|
||||
static inline void cond_local_irq_disable(struct pt_regs *regs)
|
||||
{
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
static inline void preempt_conditional_cli(struct pt_regs *regs)
|
||||
{
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_disable();
|
||||
preempt_count_dec();
|
||||
}
|
||||
|
||||
void ist_enter(struct pt_regs *regs)
|
||||
{
|
||||
if (user_mode(regs)) {
|
||||
|
@ -262,7 +248,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
|
|||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = trapnr;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
|
||||
printk_ratelimit()) {
|
||||
pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
|
||||
|
@ -271,7 +256,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
|
|||
print_vma_addr(" in ", regs->ip);
|
||||
pr_cont("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
|
||||
}
|
||||
|
@ -286,7 +270,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
|
|||
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
|
||||
NOTIFY_STOP) {
|
||||
conditional_sti(regs);
|
||||
cond_local_irq_enable(regs);
|
||||
do_trap(trapnr, signr, str, regs, error_code,
|
||||
fill_trap_info(regs, signr, trapnr, &info));
|
||||
}
|
||||
|
@ -368,7 +352,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
|||
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
|
||||
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
|
||||
return;
|
||||
conditional_sti(regs);
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
if (!user_mode(regs))
|
||||
die("bounds", regs, error_code);
|
||||
|
@ -443,7 +427,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
|||
struct task_struct *tsk;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
conditional_sti(regs);
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
if (v8086_mode(regs)) {
|
||||
local_irq_enable();
|
||||
|
@ -517,9 +501,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
|||
* as we may switch to the interrupt stack.
|
||||
*/
|
||||
debug_stack_usage_inc();
|
||||
preempt_conditional_sti(regs);
|
||||
preempt_disable();
|
||||
cond_local_irq_enable(regs);
|
||||
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
|
||||
preempt_conditional_cli(regs);
|
||||
cond_local_irq_disable(regs);
|
||||
preempt_enable_no_resched();
|
||||
debug_stack_usage_dec();
|
||||
exit:
|
||||
ist_exit(regs);
|
||||
|
@ -571,6 +557,29 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
|
|||
NOKPROBE_SYMBOL(fixup_bad_iret);
|
||||
#endif
|
||||
|
||||
static bool is_sysenter_singlestep(struct pt_regs *regs)
|
||||
{
|
||||
/*
|
||||
* We don't try for precision here. If we're anywhere in the region of
|
||||
* code that can be single-stepped in the SYSENTER entry path, then
|
||||
* assume that this is a useless single-step trap due to SYSENTER
|
||||
* being invoked with TF set. (We don't know in advance exactly
|
||||
* which instructions will be hit because BTF could plausibly
|
||||
* be set.)
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) <
|
||||
(unsigned long)__end_SYSENTER_singlestep_region -
|
||||
(unsigned long)__begin_SYSENTER_singlestep_region;
|
||||
#elif defined(CONFIG_IA32_EMULATION)
|
||||
return (regs->ip - (unsigned long)entry_SYSENTER_compat) <
|
||||
(unsigned long)__end_entry_SYSENTER_compat -
|
||||
(unsigned long)entry_SYSENTER_compat;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Our handling of the processor debug registers is non-trivial.
|
||||
* We do not clear them on entry and exit from the kernel. Therefore
|
||||
|
@ -605,10 +614,41 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
|||
ist_enter(regs);
|
||||
|
||||
get_debugreg(dr6, 6);
|
||||
/*
|
||||
* The Intel SDM says:
|
||||
*
|
||||
* Certain debug exceptions may clear bits 0-3. The remaining
|
||||
* contents of the DR6 register are never cleared by the
|
||||
* processor. To avoid confusion in identifying debug
|
||||
* exceptions, debug handlers should clear the register before
|
||||
* returning to the interrupted task.
|
||||
*
|
||||
* Keep it simple: clear DR6 immediately.
|
||||
*/
|
||||
set_debugreg(0, 6);
|
||||
|
||||
/* Filter out all the reserved bits which are preset to 1 */
|
||||
dr6 &= ~DR6_RESERVED;
|
||||
|
||||
/*
|
||||
* The SDM says "The processor clears the BTF flag when it
|
||||
* generates a debug exception." Clear TIF_BLOCKSTEP to keep
|
||||
* TIF_BLOCKSTEP in sync with the hardware BTF flag.
|
||||
*/
|
||||
clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
|
||||
|
||||
if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) &&
|
||||
is_sysenter_singlestep(regs))) {
|
||||
dr6 &= ~DR_STEP;
|
||||
if (!dr6)
|
||||
goto exit;
|
||||
/*
|
||||
* else we might have gotten a single-step trap and hit a
|
||||
* watchpoint at the same time, in which case we should fall
|
||||
* through and handle the watchpoint.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* If dr6 has no reason to give us about the origin of this trap,
|
||||
* then it's very likely the result of an icebp/int01 trap.
|
||||
|
@ -617,18 +657,10 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
|||
if (!dr6 && user_mode(regs))
|
||||
user_icebp = 1;
|
||||
|
||||
/* Catch kmemcheck conditions first of all! */
|
||||
/* Catch kmemcheck conditions! */
|
||||
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
|
||||
goto exit;
|
||||
|
||||
/* DR6 may or may not be cleared by the CPU */
|
||||
set_debugreg(0, 6);
|
||||
|
||||
/*
|
||||
* The processor cleared BTF, so don't mark that we need it set.
|
||||
*/
|
||||
clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
|
||||
|
||||
/* Store the virtualized DR6 value */
|
||||
tsk->thread.debugreg6 = dr6;
|
||||
|
||||
|
@ -648,24 +680,25 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
|||
debug_stack_usage_inc();
|
||||
|
||||
/* It's safe to allow irq's after DR6 has been saved */
|
||||
preempt_conditional_sti(regs);
|
||||
preempt_disable();
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
if (v8086_mode(regs)) {
|
||||
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
|
||||
X86_TRAP_DB);
|
||||
preempt_conditional_cli(regs);
|
||||
cond_local_irq_disable(regs);
|
||||
preempt_enable_no_resched();
|
||||
debug_stack_usage_dec();
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
|
||||
/*
|
||||
* Single-stepping through system calls: ignore any exceptions in
|
||||
* kernel space, but re-enable TF when returning to user mode.
|
||||
*
|
||||
* We already checked v86 mode above, so we can check for kernel mode
|
||||
* by just checking the CPL of CS.
|
||||
* Historical junk that used to handle SYSENTER single-stepping.
|
||||
* This should be unreachable now. If we survive for a while
|
||||
* without anyone hitting this warning, we'll turn this into
|
||||
* an oops.
|
||||
*/
|
||||
if ((dr6 & DR_STEP) && !user_mode(regs)) {
|
||||
tsk->thread.debugreg6 &= ~DR_STEP;
|
||||
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
|
@ -673,10 +706,19 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
|||
si_code = get_si_code(tsk->thread.debugreg6);
|
||||
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
|
||||
send_sigtrap(tsk, regs, error_code, si_code);
|
||||
preempt_conditional_cli(regs);
|
||||
cond_local_irq_disable(regs);
|
||||
preempt_enable_no_resched();
|
||||
debug_stack_usage_dec();
|
||||
|
||||
exit:
|
||||
#if defined(CONFIG_X86_32)
|
||||
/*
|
||||
* This is the most likely code path that involves non-trivial use
|
||||
* of the SYSENTER stack. Check that we haven't overrun it.
|
||||
*/
|
||||
WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
|
||||
"Overran or corrupted SYSENTER stack\n");
|
||||
#endif
|
||||
ist_exit(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_debug);
|
||||
|
@ -696,7 +738,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
|||
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
|
||||
return;
|
||||
conditional_sti(regs);
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
if (!user_mode(regs)) {
|
||||
if (!fixup_exception(regs, trapnr)) {
|
||||
|
@ -743,7 +785,7 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
|
|||
dotraplinkage void
|
||||
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
conditional_sti(regs);
|
||||
cond_local_irq_enable(regs);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
|
@ -756,7 +798,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
|||
if (read_cr0() & X86_CR0_EM) {
|
||||
struct math_emu_info info = { };
|
||||
|
||||
conditional_sti(regs);
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
info.regs = regs;
|
||||
math_emulate(&info);
|
||||
|
@ -765,7 +807,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
|||
#endif
|
||||
fpu__restore(¤t->thread.fpu); /* interrupts still off */
|
||||
#ifdef CONFIG_X86_32
|
||||
conditional_sti(regs);
|
||||
cond_local_irq_enable(regs);
|
||||
#endif
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_device_not_available);
|
||||
|
@ -868,7 +910,7 @@ void __init trap_init(void)
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
|
||||
set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
|
||||
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
* appropriately. Either display a message or halt.
|
||||
*/
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
verify_cpu:
|
||||
|
|
|
@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
|||
/* make room for real-mode segments */
|
||||
tsk->thread.sp0 += 16;
|
||||
|
||||
if (static_cpu_has_safe(X86_FEATURE_SEP))
|
||||
if (static_cpu_has(X86_FEATURE_SEP))
|
||||
tsk->thread.sysenter_cs = 0;
|
||||
|
||||
load_sp0(tss, &tsk->thread);
|
||||
|
|
|
@ -192,6 +192,17 @@ SECTIONS
|
|||
:init
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Section for code used exclusively before alternatives are run. All
|
||||
* references to such code must be patched out by alternatives, normally
|
||||
* by using X86_FEATURE_ALWAYS CPU feature bit.
|
||||
*
|
||||
* See static_cpu_has() for an example.
|
||||
*/
|
||||
.altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
|
||||
*(.altinstr_aux)
|
||||
}
|
||||
|
||||
INIT_DATA_SECTION(16)
|
||||
|
||||
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include <linux/linkage.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include <asm/current.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* Copyright 2002 Andi Kleen */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
#undef memmove
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* Copyright 2002 Andi Kleen, SuSE Labs */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
.weak memset
|
||||
|
|
|
@ -943,7 +943,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
|
||||
*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
|
||||
cachemode2protval(pcm));
|
||||
|
||||
return 0;
|
||||
|
@ -959,7 +959,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
|
|||
|
||||
/* Set prot based on lookup */
|
||||
pcm = lookup_memtype(pfn_t_to_phys(pfn));
|
||||
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
|
||||
*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
|
||||
cachemode2protval(pcm));
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
static int disable_nx;
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
#include <asm/nmi.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#include "op_x86_model.h"
|
||||
#include "op_counter.h"
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include <asm/asm.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/cmpxchg.h>
|
||||
#include <asm/nops.h>
|
||||
|
||||
|
|
|
@ -25,11 +25,11 @@
|
|||
|
||||
#define old_mmap sys_old_mmap
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#include <asm/syscalls_32.h>
|
||||
|
||||
#undef __SYSCALL_I386
|
||||
#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
|
||||
#define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym,
|
||||
|
||||
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
|
||||
|
|
|
@ -35,14 +35,11 @@
|
|||
#define stub_execveat sys_execveat
|
||||
#define stub_rt_sigreturn sys_rt_sigreturn
|
||||
|
||||
#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
|
||||
#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
|
||||
|
||||
#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
|
||||
#include <asm/syscalls_64.h>
|
||||
|
||||
#undef __SYSCALL_64
|
||||
#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym,
|
||||
#define __SYSCALL_64(nr, sym, qual) [ nr ] = sym,
|
||||
|
||||
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
|
||||
|
|
|
@ -9,14 +9,12 @@
|
|||
#include <asm/types.h>
|
||||
|
||||
#ifdef __i386__
|
||||
#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
|
||||
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
|
||||
static char syscalls[] = {
|
||||
#include <asm/syscalls_32.h>
|
||||
};
|
||||
#else
|
||||
#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
|
||||
#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
|
||||
#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
|
||||
#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
|
||||
static char syscalls[] = {
|
||||
#include <asm/syscalls_64.h>
|
||||
};
|
||||
|
|
|
@ -1431,7 +1431,7 @@ static int __init intel_pstate_init(void)
|
|||
if (!all_cpu_data)
|
||||
return -ENOMEM;
|
||||
|
||||
if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
|
||||
if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) {
|
||||
pr_info("intel_pstate: HWP enabled\n");
|
||||
hwp_active++;
|
||||
}
|
||||
|
|
|
@ -931,7 +931,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
|
|||
if (bio_flags & EXTENT_BIO_TREE_LOG)
|
||||
return 0;
|
||||
#ifdef CONFIG_X86
|
||||
if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
|
||||
if (static_cpu_has(X86_FEATURE_XMM4_2))
|
||||
return 0;
|
||||
#endif
|
||||
return 1;
|
||||
|
|
|
@ -2139,6 +2139,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
|
|||
int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
|
||||
int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
|
||||
unsigned long pfn);
|
||||
int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
|
||||
unsigned long pfn, pgprot_t pgprot);
|
||||
int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
|
||||
pfn_t pfn);
|
||||
int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
|
||||
|
|
|
@ -566,10 +566,26 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm)
|
|||
}
|
||||
#endif
|
||||
|
||||
struct vm_special_mapping
|
||||
{
|
||||
const char *name;
|
||||
struct vm_fault;
|
||||
|
||||
struct vm_special_mapping {
|
||||
const char *name; /* The name, e.g. "[vdso]". */
|
||||
|
||||
/*
|
||||
* If .fault is not provided, this points to a
|
||||
* NULL-terminated array of pages that back the special mapping.
|
||||
*
|
||||
* This must not be NULL unless .fault is provided.
|
||||
*/
|
||||
struct page **pages;
|
||||
|
||||
/*
|
||||
* If non-NULL, then this is called to resolve page faults
|
||||
* on the special mapping. If used, .pages is not checked.
|
||||
*/
|
||||
int (*fault)(const struct vm_special_mapping *sm,
|
||||
struct vm_area_struct *vma,
|
||||
struct vm_fault *vmf);
|
||||
};
|
||||
|
||||
enum tlb_flush_reason {
|
||||
|
|
|
@ -1178,6 +1178,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
|
|||
goto free_area;
|
||||
|
||||
area->xol_mapping.name = "[uprobes]";
|
||||
area->xol_mapping.fault = NULL;
|
||||
area->xol_mapping.pages = area->pages;
|
||||
area->pages[0] = alloc_page(GFP_HIGHUSER);
|
||||
if (!area->pages[0])
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#include <linux/atomic.h>
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#include <asm/processor.h> /* for boot_cpu_has below */
|
||||
#include <asm/cpufeature.h> /* for boot_cpu_has below */
|
||||
#endif
|
||||
|
||||
#define TEST(bit, op, c_op, val) \
|
||||
|
|
25
mm/memory.c
25
mm/memory.c
|
@ -1550,9 +1550,30 @@ out:
|
|||
*/
|
||||
int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
|
||||
unsigned long pfn)
|
||||
{
|
||||
return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
|
||||
}
|
||||
EXPORT_SYMBOL(vm_insert_pfn);
|
||||
|
||||
/**
|
||||
* vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
|
||||
* @vma: user vma to map to
|
||||
* @addr: target user address of this page
|
||||
* @pfn: source kernel pfn
|
||||
* @pgprot: pgprot flags for the inserted page
|
||||
*
|
||||
* This is exactly like vm_insert_pfn, except that it allows drivers to
|
||||
* to override pgprot on a per-page basis.
|
||||
*
|
||||
* This only makes sense for IO mappings, and it makes no sense for
|
||||
* cow mappings. In general, using multiple vmas is preferable;
|
||||
* vm_insert_pfn_prot should only be used if using multiple VMAs is
|
||||
* impractical.
|
||||
*/
|
||||
int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
|
||||
unsigned long pfn, pgprot_t pgprot)
|
||||
{
|
||||
int ret;
|
||||
pgprot_t pgprot = vma->vm_page_prot;
|
||||
/*
|
||||
* Technically, architectures with pte_special can avoid all these
|
||||
* restrictions (same for remap_pfn_range). However we would like
|
||||
|
@ -1574,7 +1595,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
|
|||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(vm_insert_pfn);
|
||||
EXPORT_SYMBOL(vm_insert_pfn_prot);
|
||||
|
||||
int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
|
||||
pfn_t pfn)
|
||||
|
|
13
mm/mmap.c
13
mm/mmap.c
|
@ -3066,11 +3066,16 @@ static int special_mapping_fault(struct vm_area_struct *vma,
|
|||
pgoff_t pgoff;
|
||||
struct page **pages;
|
||||
|
||||
if (vma->vm_ops == &legacy_special_mapping_vmops)
|
||||
if (vma->vm_ops == &legacy_special_mapping_vmops) {
|
||||
pages = vma->vm_private_data;
|
||||
else
|
||||
pages = ((struct vm_special_mapping *)vma->vm_private_data)->
|
||||
pages;
|
||||
} else {
|
||||
struct vm_special_mapping *sm = vma->vm_private_data;
|
||||
|
||||
if (sm->fault)
|
||||
return sm->fault(sm, vma, vmf);
|
||||
|
||||
pages = sm->pages;
|
||||
}
|
||||
|
||||
for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
|
||||
pgoff--;
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче