Merge commit 'upstream-x86-entry' into WIP.x86/mm
Pull in a minimal set of v4.15 entry code changes, for a base for the MM isolation patches. Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Коммит
0fd2e9c53d
|
@ -4,7 +4,7 @@ ORC unwinder
|
||||||
Overview
|
Overview
|
||||||
--------
|
--------
|
||||||
|
|
||||||
The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
|
The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
|
||||||
similar in concept to a DWARF unwinder. The difference is that the
|
similar in concept to a DWARF unwinder. The difference is that the
|
||||||
format of the ORC data is much simpler than DWARF, which in turn allows
|
format of the ORC data is much simpler than DWARF, which in turn allows
|
||||||
the ORC unwinder to be much simpler and faster.
|
the ORC unwinder to be much simpler and faster.
|
||||||
|
|
4
Makefile
4
Makefile
|
@ -934,8 +934,8 @@ ifdef CONFIG_STACK_VALIDATION
|
||||||
ifeq ($(has_libelf),1)
|
ifeq ($(has_libelf),1)
|
||||||
objtool_target := tools/objtool FORCE
|
objtool_target := tools/objtool FORCE
|
||||||
else
|
else
|
||||||
ifdef CONFIG_ORC_UNWINDER
|
ifdef CONFIG_UNWINDER_ORC
|
||||||
$(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
$(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||||
else
|
else
|
||||||
$(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
$(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -171,7 +171,7 @@ config X86
|
||||||
select HAVE_PERF_USER_STACK_DUMP
|
select HAVE_PERF_USER_STACK_DUMP
|
||||||
select HAVE_RCU_TABLE_FREE
|
select HAVE_RCU_TABLE_FREE
|
||||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||||
select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
|
select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
|
||||||
select HAVE_STACK_VALIDATION if X86_64
|
select HAVE_STACK_VALIDATION if X86_64
|
||||||
select HAVE_SYSCALL_TRACEPOINTS
|
select HAVE_SYSCALL_TRACEPOINTS
|
||||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||||
|
|
|
@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG
|
||||||
|
|
||||||
choice
|
choice
|
||||||
prompt "Choose kernel unwinder"
|
prompt "Choose kernel unwinder"
|
||||||
default FRAME_POINTER_UNWINDER
|
default UNWINDER_ORC if X86_64
|
||||||
|
default UNWINDER_FRAME_POINTER if X86_32
|
||||||
---help---
|
---help---
|
||||||
This determines which method will be used for unwinding kernel stack
|
This determines which method will be used for unwinding kernel stack
|
||||||
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
|
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
|
||||||
livepatch, lockdep, and more.
|
livepatch, lockdep, and more.
|
||||||
|
|
||||||
config FRAME_POINTER_UNWINDER
|
config UNWINDER_ORC
|
||||||
bool "Frame pointer unwinder"
|
|
||||||
select FRAME_POINTER
|
|
||||||
---help---
|
|
||||||
This option enables the frame pointer unwinder for unwinding kernel
|
|
||||||
stack traces.
|
|
||||||
|
|
||||||
The unwinder itself is fast and it uses less RAM than the ORC
|
|
||||||
unwinder, but the kernel text size will grow by ~3% and the kernel's
|
|
||||||
overall performance will degrade by roughly 5-10%.
|
|
||||||
|
|
||||||
This option is recommended if you want to use the livepatch
|
|
||||||
consistency model, as this is currently the only way to get a
|
|
||||||
reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
|
||||||
|
|
||||||
config ORC_UNWINDER
|
|
||||||
bool "ORC unwinder"
|
bool "ORC unwinder"
|
||||||
depends on X86_64
|
depends on X86_64
|
||||||
select STACK_VALIDATION
|
select STACK_VALIDATION
|
||||||
|
@ -396,7 +382,22 @@ config ORC_UNWINDER
|
||||||
Enabling this option will increase the kernel's runtime memory usage
|
Enabling this option will increase the kernel's runtime memory usage
|
||||||
by roughly 2-4MB, depending on your kernel config.
|
by roughly 2-4MB, depending on your kernel config.
|
||||||
|
|
||||||
config GUESS_UNWINDER
|
config UNWINDER_FRAME_POINTER
|
||||||
|
bool "Frame pointer unwinder"
|
||||||
|
select FRAME_POINTER
|
||||||
|
---help---
|
||||||
|
This option enables the frame pointer unwinder for unwinding kernel
|
||||||
|
stack traces.
|
||||||
|
|
||||||
|
The unwinder itself is fast and it uses less RAM than the ORC
|
||||||
|
unwinder, but the kernel text size will grow by ~3% and the kernel's
|
||||||
|
overall performance will degrade by roughly 5-10%.
|
||||||
|
|
||||||
|
This option is recommended if you want to use the livepatch
|
||||||
|
consistency model, as this is currently the only way to get a
|
||||||
|
reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||||
|
|
||||||
|
config UNWINDER_GUESS
|
||||||
bool "Guess unwinder"
|
bool "Guess unwinder"
|
||||||
depends on EXPERT
|
depends on EXPERT
|
||||||
---help---
|
---help---
|
||||||
|
@ -411,7 +412,7 @@ config GUESS_UNWINDER
|
||||||
endchoice
|
endchoice
|
||||||
|
|
||||||
config FRAME_POINTER
|
config FRAME_POINTER
|
||||||
depends on !ORC_UNWINDER && !GUESS_UNWINDER
|
depends on !UNWINDER_ORC && !UNWINDER_GUESS
|
||||||
bool
|
bool
|
||||||
|
|
||||||
endmenu
|
endmenu
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
CONFIG_NOHIGHMEM=y
|
CONFIG_NOHIGHMEM=y
|
||||||
# CONFIG_HIGHMEM4G is not set
|
# CONFIG_HIGHMEM4G is not set
|
||||||
# CONFIG_HIGHMEM64G is not set
|
# CONFIG_HIGHMEM64G is not set
|
||||||
CONFIG_GUESS_UNWINDER=y
|
CONFIG_UNWINDER_GUESS=y
|
||||||
# CONFIG_FRAME_POINTER_UNWINDER is not set
|
# CONFIG_UNWINDER_FRAME_POINTER is not set
|
||||||
|
|
|
@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
|
||||||
# CONFIG_DEBUG_RODATA_TEST is not set
|
# CONFIG_DEBUG_RODATA_TEST is not set
|
||||||
CONFIG_DEBUG_BOOT_PARAMS=y
|
CONFIG_DEBUG_BOOT_PARAMS=y
|
||||||
CONFIG_OPTIMIZE_INLINING=y
|
CONFIG_OPTIMIZE_INLINING=y
|
||||||
|
CONFIG_UNWINDER_ORC=y
|
||||||
CONFIG_SECURITY=y
|
CONFIG_SECURITY=y
|
||||||
CONFIG_SECURITY_NETWORK=y
|
CONFIG_SECURITY_NETWORK=y
|
||||||
CONFIG_SECURITY_SELINUX=y
|
CONFIG_SECURITY_SELINUX=y
|
||||||
|
|
|
@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with
|
||||||
UNWIND_HINT_REGS offset=\offset
|
UNWIND_HINT_REGS offset=\offset
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro RESTORE_EXTRA_REGS offset=0
|
.macro POP_EXTRA_REGS
|
||||||
movq 0*8+\offset(%rsp), %r15
|
popq %r15
|
||||||
movq 1*8+\offset(%rsp), %r14
|
popq %r14
|
||||||
movq 2*8+\offset(%rsp), %r13
|
popq %r13
|
||||||
movq 3*8+\offset(%rsp), %r12
|
popq %r12
|
||||||
movq 4*8+\offset(%rsp), %rbp
|
popq %rbp
|
||||||
movq 5*8+\offset(%rsp), %rbx
|
popq %rbx
|
||||||
UNWIND_HINT_REGS offset=\offset extra=0
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
|
.macro POP_C_REGS
|
||||||
.if \rstor_r11
|
popq %r11
|
||||||
movq 6*8(%rsp), %r11
|
popq %r10
|
||||||
.endif
|
popq %r9
|
||||||
.if \rstor_r8910
|
popq %r8
|
||||||
movq 7*8(%rsp), %r10
|
popq %rax
|
||||||
movq 8*8(%rsp), %r9
|
popq %rcx
|
||||||
movq 9*8(%rsp), %r8
|
popq %rdx
|
||||||
.endif
|
popq %rsi
|
||||||
.if \rstor_rax
|
popq %rdi
|
||||||
movq 10*8(%rsp), %rax
|
|
||||||
.endif
|
|
||||||
.if \rstor_rcx
|
|
||||||
movq 11*8(%rsp), %rcx
|
|
||||||
.endif
|
|
||||||
.if \rstor_rdx
|
|
||||||
movq 12*8(%rsp), %rdx
|
|
||||||
.endif
|
|
||||||
movq 13*8(%rsp), %rsi
|
|
||||||
movq 14*8(%rsp), %rdi
|
|
||||||
UNWIND_HINT_IRET_REGS offset=16*8
|
|
||||||
.endm
|
|
||||||
.macro RESTORE_C_REGS
|
|
||||||
RESTORE_C_REGS_HELPER 1,1,1,1,1
|
|
||||||
.endm
|
|
||||||
.macro RESTORE_C_REGS_EXCEPT_RAX
|
|
||||||
RESTORE_C_REGS_HELPER 0,1,1,1,1
|
|
||||||
.endm
|
|
||||||
.macro RESTORE_C_REGS_EXCEPT_RCX
|
|
||||||
RESTORE_C_REGS_HELPER 1,0,1,1,1
|
|
||||||
.endm
|
|
||||||
.macro RESTORE_C_REGS_EXCEPT_R11
|
|
||||||
RESTORE_C_REGS_HELPER 1,1,0,1,1
|
|
||||||
.endm
|
|
||||||
.macro RESTORE_C_REGS_EXCEPT_RCX_R11
|
|
||||||
RESTORE_C_REGS_HELPER 1,0,0,1,1
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
|
|
||||||
subq $-(15*8+\addskip), %rsp
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro icebp
|
.macro icebp
|
||||||
|
|
|
@ -221,10 +221,9 @@ entry_SYSCALL_64_fastpath:
|
||||||
TRACE_IRQS_ON /* user mode is traced as IRQs on */
|
TRACE_IRQS_ON /* user mode is traced as IRQs on */
|
||||||
movq RIP(%rsp), %rcx
|
movq RIP(%rsp), %rcx
|
||||||
movq EFLAGS(%rsp), %r11
|
movq EFLAGS(%rsp), %r11
|
||||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
addq $6*8, %rsp /* skip extra regs -- they were preserved */
|
||||||
movq RSP(%rsp), %rsp
|
|
||||||
UNWIND_HINT_EMPTY
|
UNWIND_HINT_EMPTY
|
||||||
USERGS_SYSRET64
|
jmp .Lpop_c_regs_except_rcx_r11_and_sysret
|
||||||
|
|
||||||
1:
|
1:
|
||||||
/*
|
/*
|
||||||
|
@ -246,17 +245,18 @@ entry_SYSCALL64_slow_path:
|
||||||
call do_syscall_64 /* returns with IRQs disabled */
|
call do_syscall_64 /* returns with IRQs disabled */
|
||||||
|
|
||||||
return_from_SYSCALL_64:
|
return_from_SYSCALL_64:
|
||||||
RESTORE_EXTRA_REGS
|
|
||||||
TRACE_IRQS_IRETQ /* we're about to change IF */
|
TRACE_IRQS_IRETQ /* we're about to change IF */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to use SYSRET instead of IRET if we're returning to
|
* Try to use SYSRET instead of IRET if we're returning to
|
||||||
* a completely clean 64-bit userspace context.
|
* a completely clean 64-bit userspace context. If we're not,
|
||||||
|
* go to the slow exit path.
|
||||||
*/
|
*/
|
||||||
movq RCX(%rsp), %rcx
|
movq RCX(%rsp), %rcx
|
||||||
movq RIP(%rsp), %r11
|
movq RIP(%rsp), %r11
|
||||||
cmpq %rcx, %r11 /* RCX == RIP */
|
|
||||||
jne opportunistic_sysret_failed
|
cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
|
||||||
|
jne swapgs_restore_regs_and_return_to_usermode
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
|
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
|
||||||
|
@ -274,14 +274,14 @@ return_from_SYSCALL_64:
|
||||||
|
|
||||||
/* If this changed %rcx, it was not canonical */
|
/* If this changed %rcx, it was not canonical */
|
||||||
cmpq %rcx, %r11
|
cmpq %rcx, %r11
|
||||||
jne opportunistic_sysret_failed
|
jne swapgs_restore_regs_and_return_to_usermode
|
||||||
|
|
||||||
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
|
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
|
||||||
jne opportunistic_sysret_failed
|
jne swapgs_restore_regs_and_return_to_usermode
|
||||||
|
|
||||||
movq R11(%rsp), %r11
|
movq R11(%rsp), %r11
|
||||||
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
|
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
|
||||||
jne opportunistic_sysret_failed
|
jne swapgs_restore_regs_and_return_to_usermode
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
|
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
|
||||||
|
@ -302,12 +302,12 @@ return_from_SYSCALL_64:
|
||||||
* would never get past 'stuck_here'.
|
* would never get past 'stuck_here'.
|
||||||
*/
|
*/
|
||||||
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
|
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
|
||||||
jnz opportunistic_sysret_failed
|
jnz swapgs_restore_regs_and_return_to_usermode
|
||||||
|
|
||||||
/* nothing to check for RSP */
|
/* nothing to check for RSP */
|
||||||
|
|
||||||
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
|
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
|
||||||
jne opportunistic_sysret_failed
|
jne swapgs_restore_regs_and_return_to_usermode
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We win! This label is here just for ease of understanding
|
* We win! This label is here just for ease of understanding
|
||||||
|
@ -315,14 +315,20 @@ return_from_SYSCALL_64:
|
||||||
*/
|
*/
|
||||||
syscall_return_via_sysret:
|
syscall_return_via_sysret:
|
||||||
/* rcx and r11 are already restored (see code above) */
|
/* rcx and r11 are already restored (see code above) */
|
||||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
|
||||||
movq RSP(%rsp), %rsp
|
|
||||||
UNWIND_HINT_EMPTY
|
UNWIND_HINT_EMPTY
|
||||||
|
POP_EXTRA_REGS
|
||||||
|
.Lpop_c_regs_except_rcx_r11_and_sysret:
|
||||||
|
popq %rsi /* skip r11 */
|
||||||
|
popq %r10
|
||||||
|
popq %r9
|
||||||
|
popq %r8
|
||||||
|
popq %rax
|
||||||
|
popq %rsi /* skip rcx */
|
||||||
|
popq %rdx
|
||||||
|
popq %rsi
|
||||||
|
popq %rdi
|
||||||
|
movq RSP-ORIG_RAX(%rsp), %rsp
|
||||||
USERGS_SYSRET64
|
USERGS_SYSRET64
|
||||||
|
|
||||||
opportunistic_sysret_failed:
|
|
||||||
SWAPGS
|
|
||||||
jmp restore_c_regs_and_iret
|
|
||||||
END(entry_SYSCALL_64)
|
END(entry_SYSCALL_64)
|
||||||
|
|
||||||
ENTRY(stub_ptregs_64)
|
ENTRY(stub_ptregs_64)
|
||||||
|
@ -423,8 +429,7 @@ ENTRY(ret_from_fork)
|
||||||
movq %rsp, %rdi
|
movq %rsp, %rdi
|
||||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||||
SWAPGS
|
jmp swapgs_restore_regs_and_return_to_usermode
|
||||||
jmp restore_regs_and_iret
|
|
||||||
|
|
||||||
1:
|
1:
|
||||||
/* kernel thread */
|
/* kernel thread */
|
||||||
|
@ -612,8 +617,21 @@ GLOBAL(retint_user)
|
||||||
mov %rsp,%rdi
|
mov %rsp,%rdi
|
||||||
call prepare_exit_to_usermode
|
call prepare_exit_to_usermode
|
||||||
TRACE_IRQS_IRETQ
|
TRACE_IRQS_IRETQ
|
||||||
|
|
||||||
|
GLOBAL(swapgs_restore_regs_and_return_to_usermode)
|
||||||
|
#ifdef CONFIG_DEBUG_ENTRY
|
||||||
|
/* Assert that pt_regs indicates user mode. */
|
||||||
|
testb $3, CS(%rsp)
|
||||||
|
jnz 1f
|
||||||
|
ud2
|
||||||
|
1:
|
||||||
|
#endif
|
||||||
SWAPGS
|
SWAPGS
|
||||||
jmp restore_regs_and_iret
|
POP_EXTRA_REGS
|
||||||
|
POP_C_REGS
|
||||||
|
addq $8, %rsp /* skip regs->orig_ax */
|
||||||
|
INTERRUPT_RETURN
|
||||||
|
|
||||||
|
|
||||||
/* Returning to kernel space */
|
/* Returning to kernel space */
|
||||||
retint_kernel:
|
retint_kernel:
|
||||||
|
@ -633,15 +651,17 @@ retint_kernel:
|
||||||
*/
|
*/
|
||||||
TRACE_IRQS_IRETQ
|
TRACE_IRQS_IRETQ
|
||||||
|
|
||||||
/*
|
GLOBAL(restore_regs_and_return_to_kernel)
|
||||||
* At this label, code paths which return to kernel and to user,
|
#ifdef CONFIG_DEBUG_ENTRY
|
||||||
* which come from interrupts/exception and from syscalls, merge.
|
/* Assert that pt_regs indicates kernel mode. */
|
||||||
*/
|
testb $3, CS(%rsp)
|
||||||
GLOBAL(restore_regs_and_iret)
|
jz 1f
|
||||||
RESTORE_EXTRA_REGS
|
ud2
|
||||||
restore_c_regs_and_iret:
|
1:
|
||||||
RESTORE_C_REGS
|
#endif
|
||||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
POP_EXTRA_REGS
|
||||||
|
POP_C_REGS
|
||||||
|
addq $8, %rsp /* skip regs->orig_ax */
|
||||||
INTERRUPT_RETURN
|
INTERRUPT_RETURN
|
||||||
|
|
||||||
ENTRY(native_iret)
|
ENTRY(native_iret)
|
||||||
|
@ -818,7 +838,7 @@ ENTRY(\sym)
|
||||||
|
|
||||||
ASM_CLAC
|
ASM_CLAC
|
||||||
|
|
||||||
.ifeq \has_error_code
|
.if \has_error_code == 0
|
||||||
pushq $-1 /* ORIG_RAX: no syscall to restart */
|
pushq $-1 /* ORIG_RAX: no syscall to restart */
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
|
@ -1059,6 +1079,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
|
||||||
idtentry stack_segment do_stack_segment has_error_code=1
|
idtentry stack_segment do_stack_segment has_error_code=1
|
||||||
|
|
||||||
#ifdef CONFIG_XEN
|
#ifdef CONFIG_XEN
|
||||||
|
idtentry xennmi do_nmi has_error_code=0
|
||||||
idtentry xendebug do_debug has_error_code=0
|
idtentry xendebug do_debug has_error_code=0
|
||||||
idtentry xenint3 do_int3 has_error_code=0
|
idtentry xenint3 do_int3 has_error_code=0
|
||||||
#endif
|
#endif
|
||||||
|
@ -1112,17 +1133,14 @@ ENTRY(paranoid_exit)
|
||||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||||
TRACE_IRQS_OFF_DEBUG
|
TRACE_IRQS_OFF_DEBUG
|
||||||
testl %ebx, %ebx /* swapgs needed? */
|
testl %ebx, %ebx /* swapgs needed? */
|
||||||
jnz paranoid_exit_no_swapgs
|
jnz .Lparanoid_exit_no_swapgs
|
||||||
TRACE_IRQS_IRETQ
|
TRACE_IRQS_IRETQ
|
||||||
SWAPGS_UNSAFE_STACK
|
SWAPGS_UNSAFE_STACK
|
||||||
jmp paranoid_exit_restore
|
jmp .Lparanoid_exit_restore
|
||||||
paranoid_exit_no_swapgs:
|
.Lparanoid_exit_no_swapgs:
|
||||||
TRACE_IRQS_IRETQ_DEBUG
|
TRACE_IRQS_IRETQ_DEBUG
|
||||||
paranoid_exit_restore:
|
.Lparanoid_exit_restore:
|
||||||
RESTORE_EXTRA_REGS
|
jmp restore_regs_and_return_to_kernel
|
||||||
RESTORE_C_REGS
|
|
||||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
|
||||||
INTERRUPT_RETURN
|
|
||||||
END(paranoid_exit)
|
END(paranoid_exit)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1223,10 +1241,13 @@ ENTRY(error_exit)
|
||||||
jmp retint_user
|
jmp retint_user
|
||||||
END(error_exit)
|
END(error_exit)
|
||||||
|
|
||||||
/* Runs on exception stack */
|
/*
|
||||||
/* XXX: broken on Xen PV */
|
* Runs on exception stack. Xen PV does not go through this path at all,
|
||||||
|
* so we can use real assembly here.
|
||||||
|
*/
|
||||||
ENTRY(nmi)
|
ENTRY(nmi)
|
||||||
UNWIND_HINT_IRET_REGS
|
UNWIND_HINT_IRET_REGS
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We allow breakpoints in NMIs. If a breakpoint occurs, then
|
* We allow breakpoints in NMIs. If a breakpoint occurs, then
|
||||||
* the iretq it performs will take us out of NMI context.
|
* the iretq it performs will take us out of NMI context.
|
||||||
|
@ -1284,7 +1305,7 @@ ENTRY(nmi)
|
||||||
* stacks lest we corrupt the "NMI executing" variable.
|
* stacks lest we corrupt the "NMI executing" variable.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
SWAPGS_UNSAFE_STACK
|
swapgs
|
||||||
cld
|
cld
|
||||||
movq %rsp, %rdx
|
movq %rsp, %rdx
|
||||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||||
|
@ -1328,8 +1349,7 @@ ENTRY(nmi)
|
||||||
* Return back to user mode. We must *not* do the normal exit
|
* Return back to user mode. We must *not* do the normal exit
|
||||||
* work, because we don't want to enable interrupts.
|
* work, because we don't want to enable interrupts.
|
||||||
*/
|
*/
|
||||||
SWAPGS
|
jmp swapgs_restore_regs_and_return_to_usermode
|
||||||
jmp restore_regs_and_iret
|
|
||||||
|
|
||||||
.Lnmi_from_kernel:
|
.Lnmi_from_kernel:
|
||||||
/*
|
/*
|
||||||
|
@ -1450,7 +1470,7 @@ nested_nmi_out:
|
||||||
popq %rdx
|
popq %rdx
|
||||||
|
|
||||||
/* We are returning to kernel mode, so this cannot result in a fault. */
|
/* We are returning to kernel mode, so this cannot result in a fault. */
|
||||||
INTERRUPT_RETURN
|
iretq
|
||||||
|
|
||||||
first_nmi:
|
first_nmi:
|
||||||
/* Restore rdx. */
|
/* Restore rdx. */
|
||||||
|
@ -1481,7 +1501,7 @@ first_nmi:
|
||||||
pushfq /* RFLAGS */
|
pushfq /* RFLAGS */
|
||||||
pushq $__KERNEL_CS /* CS */
|
pushq $__KERNEL_CS /* CS */
|
||||||
pushq $1f /* RIP */
|
pushq $1f /* RIP */
|
||||||
INTERRUPT_RETURN /* continues at repeat_nmi below */
|
iretq /* continues at repeat_nmi below */
|
||||||
UNWIND_HINT_IRET_REGS
|
UNWIND_HINT_IRET_REGS
|
||||||
1:
|
1:
|
||||||
#endif
|
#endif
|
||||||
|
@ -1544,29 +1564,34 @@ end_repeat_nmi:
|
||||||
nmi_swapgs:
|
nmi_swapgs:
|
||||||
SWAPGS_UNSAFE_STACK
|
SWAPGS_UNSAFE_STACK
|
||||||
nmi_restore:
|
nmi_restore:
|
||||||
RESTORE_EXTRA_REGS
|
POP_EXTRA_REGS
|
||||||
RESTORE_C_REGS
|
POP_C_REGS
|
||||||
|
|
||||||
/* Point RSP at the "iret" frame. */
|
/*
|
||||||
REMOVE_PT_GPREGS_FROM_STACK 6*8
|
* Skip orig_ax and the "outermost" frame to point RSP at the "iret"
|
||||||
|
* at the "iret" frame.
|
||||||
|
*/
|
||||||
|
addq $6*8, %rsp
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clear "NMI executing". Set DF first so that we can easily
|
* Clear "NMI executing". Set DF first so that we can easily
|
||||||
* distinguish the remaining code between here and IRET from
|
* distinguish the remaining code between here and IRET from
|
||||||
* the SYSCALL entry and exit paths. On a native kernel, we
|
* the SYSCALL entry and exit paths.
|
||||||
* could just inspect RIP, but, on paravirt kernels,
|
*
|
||||||
* INTERRUPT_RETURN can translate into a jump into a
|
* We arguably should just inspect RIP instead, but I (Andy) wrote
|
||||||
* hypercall page.
|
* this code when I had the misapprehension that Xen PV supported
|
||||||
|
* NMIs, and Xen PV would break that approach.
|
||||||
*/
|
*/
|
||||||
std
|
std
|
||||||
movq $0, 5*8(%rsp) /* clear "NMI executing" */
|
movq $0, 5*8(%rsp) /* clear "NMI executing" */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* INTERRUPT_RETURN reads the "iret" frame and exits the NMI
|
* iretq reads the "iret" frame and exits the NMI stack in a
|
||||||
* stack in a single instruction. We are returning to kernel
|
* single instruction. We are returning to kernel mode, so this
|
||||||
* mode, so this cannot result in a fault.
|
* cannot result in a fault. Similarly, we don't need to worry
|
||||||
|
* about espfix64 on the way back to kernel mode.
|
||||||
*/
|
*/
|
||||||
INTERRUPT_RETURN
|
iretq
|
||||||
END(nmi)
|
END(nmi)
|
||||||
|
|
||||||
ENTRY(ignore_sysret)
|
ENTRY(ignore_sysret)
|
||||||
|
|
|
@ -337,8 +337,7 @@ ENTRY(entry_INT80_compat)
|
||||||
|
|
||||||
/* Go back to user mode. */
|
/* Go back to user mode. */
|
||||||
TRACE_IRQS_ON
|
TRACE_IRQS_ON
|
||||||
SWAPGS
|
jmp swapgs_restore_regs_and_return_to_usermode
|
||||||
jmp restore_regs_and_iret
|
|
||||||
END(entry_INT80_compat)
|
END(entry_INT80_compat)
|
||||||
|
|
||||||
ENTRY(stub32_clone)
|
ENTRY(stub32_clone)
|
||||||
|
|
|
@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
|
||||||
bool ok;
|
bool ok;
|
||||||
unsigned int retry = RDRAND_RETRY_LOOPS;
|
unsigned int retry = RDRAND_RETRY_LOOPS;
|
||||||
do {
|
do {
|
||||||
asm volatile(RDRAND_LONG "\n\t"
|
asm volatile(RDRAND_LONG
|
||||||
CC_SET(c)
|
CC_SET(c)
|
||||||
: CC_OUT(c) (ok), "=a" (*v));
|
: CC_OUT(c) (ok), "=a" (*v));
|
||||||
if (ok)
|
if (ok)
|
||||||
|
@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
|
||||||
bool ok;
|
bool ok;
|
||||||
unsigned int retry = RDRAND_RETRY_LOOPS;
|
unsigned int retry = RDRAND_RETRY_LOOPS;
|
||||||
do {
|
do {
|
||||||
asm volatile(RDRAND_INT "\n\t"
|
asm volatile(RDRAND_INT
|
||||||
CC_SET(c)
|
CC_SET(c)
|
||||||
: CC_OUT(c) (ok), "=a" (*v));
|
: CC_OUT(c) (ok), "=a" (*v));
|
||||||
if (ok)
|
if (ok)
|
||||||
|
@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
|
||||||
static inline bool rdseed_long(unsigned long *v)
|
static inline bool rdseed_long(unsigned long *v)
|
||||||
{
|
{
|
||||||
bool ok;
|
bool ok;
|
||||||
asm volatile(RDSEED_LONG "\n\t"
|
asm volatile(RDSEED_LONG
|
||||||
CC_SET(c)
|
CC_SET(c)
|
||||||
: CC_OUT(c) (ok), "=a" (*v));
|
: CC_OUT(c) (ok), "=a" (*v));
|
||||||
return ok;
|
return ok;
|
||||||
|
@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
|
||||||
static inline bool rdseed_int(unsigned int *v)
|
static inline bool rdseed_int(unsigned int *v)
|
||||||
{
|
{
|
||||||
bool ok;
|
bool ok;
|
||||||
asm volatile(RDSEED_INT "\n\t"
|
asm volatile(RDSEED_INT
|
||||||
CC_SET(c)
|
CC_SET(c)
|
||||||
: CC_OUT(c) (ok), "=a" (*v));
|
: CC_OUT(c) (ok), "=a" (*v));
|
||||||
return ok;
|
return ok;
|
||||||
|
|
|
@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
|
||||||
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
|
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
|
||||||
{
|
{
|
||||||
bool negative;
|
bool negative;
|
||||||
asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
|
asm volatile(LOCK_PREFIX "andb %2,%1"
|
||||||
CC_SET(s)
|
CC_SET(s)
|
||||||
: CC_OUT(s) (negative), ADDR
|
: CC_OUT(s) (negative), ADDR
|
||||||
: "ir" ((char) ~(1 << nr)) : "memory");
|
: "ir" ((char) ~(1 << nr)) : "memory");
|
||||||
|
@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
|
||||||
{
|
{
|
||||||
bool oldbit;
|
bool oldbit;
|
||||||
|
|
||||||
asm("bts %2,%1\n\t"
|
asm("bts %2,%1"
|
||||||
CC_SET(c)
|
CC_SET(c)
|
||||||
: CC_OUT(c) (oldbit), ADDR
|
: CC_OUT(c) (oldbit), ADDR
|
||||||
: "Ir" (nr));
|
: "Ir" (nr));
|
||||||
|
@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
|
||||||
{
|
{
|
||||||
bool oldbit;
|
bool oldbit;
|
||||||
|
|
||||||
asm volatile("btr %2,%1\n\t"
|
asm volatile("btr %2,%1"
|
||||||
CC_SET(c)
|
CC_SET(c)
|
||||||
: CC_OUT(c) (oldbit), ADDR
|
: CC_OUT(c) (oldbit), ADDR
|
||||||
: "Ir" (nr));
|
: "Ir" (nr));
|
||||||
|
@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
|
||||||
{
|
{
|
||||||
bool oldbit;
|
bool oldbit;
|
||||||
|
|
||||||
asm volatile("btc %2,%1\n\t"
|
asm volatile("btc %2,%1"
|
||||||
CC_SET(c)
|
CC_SET(c)
|
||||||
: CC_OUT(c) (oldbit), ADDR
|
: CC_OUT(c) (oldbit), ADDR
|
||||||
: "Ir" (nr) : "memory");
|
: "Ir" (nr) : "memory");
|
||||||
|
@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
|
||||||
{
|
{
|
||||||
bool oldbit;
|
bool oldbit;
|
||||||
|
|
||||||
asm volatile("bt %2,%1\n\t"
|
asm volatile("bt %2,%1"
|
||||||
CC_SET(c)
|
CC_SET(c)
|
||||||
: CC_OUT(c) (oldbit)
|
: CC_OUT(c) (oldbit)
|
||||||
: "m" (*(unsigned long *)addr), "Ir" (nr));
|
: "m" (*(unsigned long *)addr), "Ir" (nr));
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
*/
|
*/
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
|
#include <linux/sched/task_stack.h>
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/user32.h>
|
#include <asm/user32.h>
|
||||||
#include <asm/unistd.h>
|
#include <asm/unistd.h>
|
||||||
|
|
|
@ -126,11 +126,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
|
||||||
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
|
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
|
||||||
|
|
||||||
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
|
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
|
||||||
#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
|
|
||||||
#define setup_clear_cpu_cap(bit) do { \
|
extern void setup_clear_cpu_cap(unsigned int bit);
|
||||||
clear_cpu_cap(&boot_cpu_data, bit); \
|
extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
|
||||||
set_bit(bit, (unsigned long *)cpu_caps_cleared); \
|
|
||||||
} while (0)
|
|
||||||
#define setup_force_cpu_cap(bit) do { \
|
#define setup_force_cpu_cap(bit) do { \
|
||||||
set_cpu_cap(&boot_cpu_data, bit); \
|
set_cpu_cap(&boot_cpu_data, bit); \
|
||||||
set_bit(bit, (unsigned long *)cpu_caps_set); \
|
set_bit(bit, (unsigned long *)cpu_caps_set); \
|
||||||
|
|
|
@ -22,6 +22,11 @@
|
||||||
* this feature bit is not displayed in /proc/cpuinfo at all.
|
* this feature bit is not displayed in /proc/cpuinfo at all.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When adding new features here that depend on other features,
|
||||||
|
* please update the table in kernel/cpu/cpuid-deps.c
|
||||||
|
*/
|
||||||
|
|
||||||
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
|
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
|
||||||
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
|
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
|
||||||
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
|
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
|
||||||
|
@ -295,6 +300,12 @@
|
||||||
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
|
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
|
||||||
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
|
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
|
||||||
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
|
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
|
||||||
|
#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
|
||||||
|
#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
|
||||||
|
#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
|
||||||
|
#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
|
||||||
|
#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
|
||||||
|
#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
|
||||||
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
|
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
|
||||||
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
|
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
|
||||||
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
|
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
#include <asm/orc_types.h>
|
#include <asm/orc_types.h>
|
||||||
|
|
||||||
struct mod_arch_specific {
|
struct mod_arch_specific {
|
||||||
#ifdef CONFIG_ORC_UNWINDER
|
#ifdef CONFIG_UNWINDER_ORC
|
||||||
unsigned int num_orcs;
|
unsigned int num_orcs;
|
||||||
int *orc_unwind_ip;
|
int *orc_unwind_ip;
|
||||||
struct orc_entry *orc_unwind;
|
struct orc_entry *orc_unwind;
|
||||||
|
|
|
@ -16,10 +16,9 @@
|
||||||
#include <linux/cpumask.h>
|
#include <linux/cpumask.h>
|
||||||
#include <asm/frame.h>
|
#include <asm/frame.h>
|
||||||
|
|
||||||
static inline void load_sp0(struct tss_struct *tss,
|
static inline void load_sp0(unsigned long sp0)
|
||||||
struct thread_struct *thread)
|
|
||||||
{
|
{
|
||||||
PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
|
PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The paravirtualized CPUID instruction. */
|
/* The paravirtualized CPUID instruction. */
|
||||||
|
|
|
@ -134,7 +134,7 @@ struct pv_cpu_ops {
|
||||||
void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
|
void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
|
||||||
void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
|
void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
|
||||||
|
|
||||||
void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
|
void (*load_sp0)(unsigned long sp0);
|
||||||
|
|
||||||
void (*set_iopl_mask)(unsigned mask);
|
void (*set_iopl_mask)(unsigned mask);
|
||||||
|
|
||||||
|
|
|
@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
|
||||||
{
|
{
|
||||||
bool oldbit;
|
bool oldbit;
|
||||||
|
|
||||||
asm volatile("bt "__percpu_arg(2)",%1\n\t"
|
asm volatile("bt "__percpu_arg(2)",%1"
|
||||||
CC_SET(c)
|
CC_SET(c)
|
||||||
: CC_OUT(c) (oldbit)
|
: CC_OUT(c) (oldbit)
|
||||||
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
|
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
|
||||||
|
|
|
@ -431,7 +431,9 @@ typedef struct {
|
||||||
struct thread_struct {
|
struct thread_struct {
|
||||||
/* Cached TLS descriptors: */
|
/* Cached TLS descriptors: */
|
||||||
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
|
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
unsigned long sp0;
|
unsigned long sp0;
|
||||||
|
#endif
|
||||||
unsigned long sp;
|
unsigned long sp;
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
unsigned long sysenter_cs;
|
unsigned long sysenter_cs;
|
||||||
|
@ -518,16 +520,9 @@ static inline void native_set_iopl_mask(unsigned mask)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
|
native_load_sp0(unsigned long sp0)
|
||||||
{
|
{
|
||||||
tss->x86_tss.sp0 = thread->sp0;
|
this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
|
||||||
#ifdef CONFIG_X86_32
|
|
||||||
/* Only happens when SEP is enabled, no need to test "SEP"arately: */
|
|
||||||
if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
|
|
||||||
tss->x86_tss.ss1 = thread->sysenter_cs;
|
|
||||||
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void native_swapgs(void)
|
static inline void native_swapgs(void)
|
||||||
|
@ -547,15 +542,20 @@ static inline unsigned long current_top_of_stack(void)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool on_thread_stack(void)
|
||||||
|
{
|
||||||
|
return (unsigned long)(current_top_of_stack() -
|
||||||
|
current_stack_pointer) < THREAD_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_PARAVIRT
|
#ifdef CONFIG_PARAVIRT
|
||||||
#include <asm/paravirt.h>
|
#include <asm/paravirt.h>
|
||||||
#else
|
#else
|
||||||
#define __cpuid native_cpuid
|
#define __cpuid native_cpuid
|
||||||
|
|
||||||
static inline void load_sp0(struct tss_struct *tss,
|
static inline void load_sp0(unsigned long sp0)
|
||||||
struct thread_struct *thread)
|
|
||||||
{
|
{
|
||||||
native_load_sp0(tss, thread);
|
native_load_sp0(sp0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define set_iopl_mask native_set_iopl_mask
|
#define set_iopl_mask native_set_iopl_mask
|
||||||
|
@ -804,6 +804,15 @@ static inline void spin_lock_prefetch(const void *x)
|
||||||
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
|
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
|
||||||
TOP_OF_KERNEL_STACK_PADDING)
|
TOP_OF_KERNEL_STACK_PADDING)
|
||||||
|
|
||||||
|
#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
|
||||||
|
|
||||||
|
#define task_pt_regs(task) \
|
||||||
|
({ \
|
||||||
|
unsigned long __ptr = (unsigned long)task_stack_page(task); \
|
||||||
|
__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
|
||||||
|
((struct pt_regs *)__ptr) - 1; \
|
||||||
|
})
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
/*
|
/*
|
||||||
* User space process size: 3GB (default).
|
* User space process size: 3GB (default).
|
||||||
|
@ -823,23 +832,6 @@ static inline void spin_lock_prefetch(const void *x)
|
||||||
.addr_limit = KERNEL_DS, \
|
.addr_limit = KERNEL_DS, \
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
|
|
||||||
* This is necessary to guarantee that the entire "struct pt_regs"
|
|
||||||
* is accessible even if the CPU haven't stored the SS/ESP registers
|
|
||||||
* on the stack (interrupt gate does not save these registers
|
|
||||||
* when switching to the same priv ring).
|
|
||||||
* Therefore beware: accessing the ss/esp fields of the
|
|
||||||
* "struct pt_regs" is possible, but they may contain the
|
|
||||||
* completely wrong values.
|
|
||||||
*/
|
|
||||||
#define task_pt_regs(task) \
|
|
||||||
({ \
|
|
||||||
unsigned long __ptr = (unsigned long)task_stack_page(task); \
|
|
||||||
__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
|
|
||||||
((struct pt_regs *)__ptr) - 1; \
|
|
||||||
})
|
|
||||||
|
|
||||||
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
|
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
@ -873,11 +865,9 @@ static inline void spin_lock_prefetch(const void *x)
|
||||||
#define STACK_TOP_MAX TASK_SIZE_MAX
|
#define STACK_TOP_MAX TASK_SIZE_MAX
|
||||||
|
|
||||||
#define INIT_THREAD { \
|
#define INIT_THREAD { \
|
||||||
.sp0 = TOP_OF_INIT_STACK, \
|
|
||||||
.addr_limit = KERNEL_DS, \
|
.addr_limit = KERNEL_DS, \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
|
|
||||||
extern unsigned long KSTK_ESP(struct task_struct *task);
|
extern unsigned long KSTK_ESP(struct task_struct *task);
|
||||||
|
|
||||||
#endif /* CONFIG_X86_64 */
|
#endif /* CONFIG_X86_64 */
|
||||||
|
|
|
@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
static inline bool user_64bit_mode(struct pt_regs *regs)
|
static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
#ifndef CONFIG_PARAVIRT
|
#ifndef CONFIG_PARAVIRT
|
||||||
/*
|
/*
|
||||||
* On non-paravirt systems, this is the only long mode CPL 3
|
* On non-paravirt systems, this is the only long mode CPL 3
|
||||||
|
@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||||
/* Headers are too twisted for this to go in paravirt.h. */
|
/* Headers are too twisted for this to go in paravirt.h. */
|
||||||
return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
|
return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
|
||||||
#endif
|
#endif
|
||||||
|
#else /* !CONFIG_X86_64 */
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
#define current_user_stack_pointer() current_pt_regs()->sp
|
#define current_user_stack_pointer() current_pt_regs()->sp
|
||||||
#define compat_user_stack_pointer() current_pt_regs()->sp
|
#define compat_user_stack_pointer() current_pt_regs()->sp
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -29,7 +29,7 @@ cc_label: \
|
||||||
#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
|
#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
|
||||||
do { \
|
do { \
|
||||||
bool c; \
|
bool c; \
|
||||||
asm volatile (fullop ";" CC_SET(cc) \
|
asm volatile (fullop CC_SET(cc) \
|
||||||
: [counter] "+m" (var), CC_OUT(cc) (c) \
|
: [counter] "+m" (var), CC_OUT(cc) (c) \
|
||||||
: __VA_ARGS__ : clobbers); \
|
: __VA_ARGS__ : clobbers); \
|
||||||
return c; \
|
return c; \
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
#ifndef _ASM_X86_SWITCH_TO_H
|
#ifndef _ASM_X86_SWITCH_TO_H
|
||||||
#define _ASM_X86_SWITCH_TO_H
|
#define _ASM_X86_SWITCH_TO_H
|
||||||
|
|
||||||
|
#include <linux/sched/task_stack.h>
|
||||||
|
|
||||||
struct task_struct; /* one of the stranger aspects of C forward declarations */
|
struct task_struct; /* one of the stranger aspects of C forward declarations */
|
||||||
|
|
||||||
struct task_struct *__switch_to_asm(struct task_struct *prev,
|
struct task_struct *__switch_to_asm(struct task_struct *prev,
|
||||||
|
@ -73,4 +75,26 @@ do { \
|
||||||
((last) = __switch_to_asm((prev), (next))); \
|
((last) = __switch_to_asm((prev), (next))); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
static inline void refresh_sysenter_cs(struct thread_struct *thread)
|
||||||
|
{
|
||||||
|
/* Only happens when SEP is enabled, no need to test "SEP"arately: */
|
||||||
|
if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
|
||||||
|
return;
|
||||||
|
|
||||||
|
this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
|
||||||
|
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This is used when switching tasks or entering/exiting vm86 mode. */
|
||||||
|
static inline void update_sp0(struct task_struct *task)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_X86_32
|
||||||
|
load_sp0(task->thread.sp0);
|
||||||
|
#else
|
||||||
|
load_sp0(task_top_of_stack(task));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _ASM_X86_SWITCH_TO_H */
|
#endif /* _ASM_X86_SWITCH_TO_H */
|
||||||
|
|
|
@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
|
||||||
asmlinkage long sys_iopl(unsigned int);
|
asmlinkage long sys_iopl(unsigned int);
|
||||||
|
|
||||||
/* kernel/ldt.c */
|
/* kernel/ldt.c */
|
||||||
asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
|
asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
|
||||||
|
|
||||||
/* kernel/signal.c */
|
/* kernel/signal.c */
|
||||||
asmlinkage long sys_rt_sigreturn(void);
|
asmlinkage long sys_rt_sigreturn(void);
|
||||||
|
|
|
@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
DEFINE_EVENT(x86_fpu, x86_fpu_state,
|
|
||||||
TP_PROTO(struct fpu *fpu),
|
|
||||||
TP_ARGS(fpu)
|
|
||||||
);
|
|
||||||
|
|
||||||
DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
|
DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
|
||||||
TP_PROTO(struct fpu *fpu),
|
TP_PROTO(struct fpu *fpu),
|
||||||
TP_ARGS(fpu)
|
TP_ARGS(fpu)
|
||||||
|
@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
|
||||||
TP_ARGS(fpu)
|
TP_ARGS(fpu)
|
||||||
);
|
);
|
||||||
|
|
||||||
DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
|
|
||||||
TP_PROTO(struct fpu *fpu),
|
|
||||||
TP_ARGS(fpu)
|
|
||||||
);
|
|
||||||
|
|
||||||
DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
|
DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
|
||||||
TP_PROTO(struct fpu *fpu),
|
TP_PROTO(struct fpu *fpu),
|
||||||
TP_ARGS(fpu)
|
TP_ARGS(fpu)
|
||||||
|
|
|
@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void);
|
||||||
|
|
||||||
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
||||||
asmlinkage void xen_divide_error(void);
|
asmlinkage void xen_divide_error(void);
|
||||||
|
asmlinkage void xen_xennmi(void);
|
||||||
asmlinkage void xen_xendebug(void);
|
asmlinkage void xen_xendebug(void);
|
||||||
asmlinkage void xen_xenint3(void);
|
asmlinkage void xen_xenint3(void);
|
||||||
asmlinkage void xen_nmi(void);
|
|
||||||
asmlinkage void xen_overflow(void);
|
asmlinkage void xen_overflow(void);
|
||||||
asmlinkage void xen_bounds(void);
|
asmlinkage void xen_bounds(void);
|
||||||
asmlinkage void xen_invalid_op(void);
|
asmlinkage void xen_invalid_op(void);
|
||||||
|
@ -145,4 +145,22 @@ enum {
|
||||||
X86_TRAP_IRET = 32, /* 32, IRET Exception */
|
X86_TRAP_IRET = 32, /* 32, IRET Exception */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Page fault error code bits:
|
||||||
|
*
|
||||||
|
* bit 0 == 0: no page found 1: protection fault
|
||||||
|
* bit 1 == 0: read access 1: write access
|
||||||
|
* bit 2 == 0: kernel-mode access 1: user-mode access
|
||||||
|
* bit 3 == 1: use of reserved bit detected
|
||||||
|
* bit 4 == 1: fault was an instruction fetch
|
||||||
|
* bit 5 == 1: protection keys block access
|
||||||
|
*/
|
||||||
|
enum x86_pf_error_code {
|
||||||
|
X86_PF_PROT = 1 << 0,
|
||||||
|
X86_PF_WRITE = 1 << 1,
|
||||||
|
X86_PF_USER = 1 << 2,
|
||||||
|
X86_PF_RSVD = 1 << 3,
|
||||||
|
X86_PF_INSTR = 1 << 4,
|
||||||
|
X86_PF_PK = 1 << 5,
|
||||||
|
};
|
||||||
#endif /* _ASM_X86_TRAPS_H */
|
#endif /* _ASM_X86_TRAPS_H */
|
||||||
|
|
|
@ -13,11 +13,11 @@ struct unwind_state {
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
int graph_idx;
|
int graph_idx;
|
||||||
bool error;
|
bool error;
|
||||||
#if defined(CONFIG_ORC_UNWINDER)
|
#if defined(CONFIG_UNWINDER_ORC)
|
||||||
bool signal, full_regs;
|
bool signal, full_regs;
|
||||||
unsigned long sp, bp, ip;
|
unsigned long sp, bp, ip;
|
||||||
struct pt_regs *regs;
|
struct pt_regs *regs;
|
||||||
#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
|
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
|
||||||
bool got_irq;
|
bool got_irq;
|
||||||
unsigned long *bp, *orig_sp, ip;
|
unsigned long *bp, *orig_sp, ip;
|
||||||
struct pt_regs *regs;
|
struct pt_regs *regs;
|
||||||
|
@ -51,7 +51,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||||
__unwind_start(state, task, regs, first_frame);
|
__unwind_start(state, task, regs, first_frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
|
#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
|
||||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||||
{
|
{
|
||||||
if (unwind_done(state))
|
if (unwind_done(state))
|
||||||
|
@ -66,7 +66,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_ORC_UNWINDER
|
#ifdef CONFIG_UNWINDER_ORC
|
||||||
void unwind_init(void);
|
void unwind_init(void);
|
||||||
void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
|
void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
|
||||||
void *orc, size_t orc_size);
|
void *orc, size_t orc_size);
|
||||||
|
|
|
@ -152,5 +152,8 @@
|
||||||
#define CX86_ARR_BASE 0xc4
|
#define CX86_ARR_BASE 0xc4
|
||||||
#define CX86_RCR_BASE 0xdc
|
#define CX86_RCR_BASE 0xdc
|
||||||
|
|
||||||
|
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||||
|
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||||
|
X86_CR0_PG)
|
||||||
|
|
||||||
#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
|
#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
|
||||||
|
|
|
@ -27,7 +27,6 @@ KASAN_SANITIZE_dumpstack.o := n
|
||||||
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||||
KASAN_SANITIZE_stacktrace.o := n
|
KASAN_SANITIZE_stacktrace.o := n
|
||||||
|
|
||||||
OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
|
|
||||||
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
||||||
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
|
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
|
||||||
OBJECT_FILES_NON_STANDARD_test_nx.o := y
|
OBJECT_FILES_NON_STANDARD_test_nx.o := y
|
||||||
|
@ -128,9 +127,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
|
||||||
obj-$(CONFIG_TRACING) += tracepoint.o
|
obj-$(CONFIG_TRACING) += tracepoint.o
|
||||||
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
|
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
|
||||||
|
|
||||||
obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o
|
obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
|
||||||
obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o
|
obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
|
||||||
obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o
|
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
|
||||||
|
|
||||||
###
|
###
|
||||||
# 64 bit specific files
|
# 64 bit specific files
|
||||||
|
|
|
@ -23,6 +23,7 @@ obj-y += rdrand.o
|
||||||
obj-y += match.o
|
obj-y += match.o
|
||||||
obj-y += bugs.o
|
obj-y += bugs.o
|
||||||
obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
|
obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
|
||||||
|
obj-y += cpuid-deps.o
|
||||||
|
|
||||||
obj-$(CONFIG_PROC_FS) += proc.o
|
obj-$(CONFIG_PROC_FS) += proc.o
|
||||||
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
|
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
|
||||||
|
|
|
@ -1301,18 +1301,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
|
||||||
pr_cont(")\n");
|
pr_cont(")\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static __init int setup_disablecpuid(char *arg)
|
/*
|
||||||
|
* clearcpuid= was already parsed in fpu__init_parse_early_param.
|
||||||
|
* But we need to keep a dummy __setup around otherwise it would
|
||||||
|
* show up as an environment variable for init.
|
||||||
|
*/
|
||||||
|
static __init int setup_clearcpuid(char *arg)
|
||||||
{
|
{
|
||||||
int bit;
|
|
||||||
|
|
||||||
if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
|
|
||||||
setup_clear_cpu_cap(bit);
|
|
||||||
else
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
__setup("clearcpuid=", setup_disablecpuid);
|
__setup("clearcpuid=", setup_clearcpuid);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
||||||
|
@ -1572,9 +1570,13 @@ void cpu_init(void)
|
||||||
initialize_tlbstate_and_flush();
|
initialize_tlbstate_and_flush();
|
||||||
enter_lazy_tlb(&init_mm, me);
|
enter_lazy_tlb(&init_mm, me);
|
||||||
|
|
||||||
load_sp0(t, ¤t->thread);
|
/*
|
||||||
|
* Initialize the TSS. Don't bother initializing sp0, as the initial
|
||||||
|
* task never enters user mode.
|
||||||
|
*/
|
||||||
set_tss_desc(cpu, t);
|
set_tss_desc(cpu, t);
|
||||||
load_TR_desc();
|
load_TR_desc();
|
||||||
|
|
||||||
load_mm_ldt(&init_mm);
|
load_mm_ldt(&init_mm);
|
||||||
|
|
||||||
clear_all_debug_regs();
|
clear_all_debug_regs();
|
||||||
|
@ -1596,7 +1598,6 @@ void cpu_init(void)
|
||||||
int cpu = smp_processor_id();
|
int cpu = smp_processor_id();
|
||||||
struct task_struct *curr = current;
|
struct task_struct *curr = current;
|
||||||
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
||||||
struct thread_struct *thread = &curr->thread;
|
|
||||||
|
|
||||||
wait_for_master_cpu(cpu);
|
wait_for_master_cpu(cpu);
|
||||||
|
|
||||||
|
@ -1627,9 +1628,13 @@ void cpu_init(void)
|
||||||
initialize_tlbstate_and_flush();
|
initialize_tlbstate_and_flush();
|
||||||
enter_lazy_tlb(&init_mm, curr);
|
enter_lazy_tlb(&init_mm, curr);
|
||||||
|
|
||||||
load_sp0(t, thread);
|
/*
|
||||||
|
* Initialize the TSS. Don't bother initializing sp0, as the initial
|
||||||
|
* task never enters user mode.
|
||||||
|
*/
|
||||||
set_tss_desc(cpu, t);
|
set_tss_desc(cpu, t);
|
||||||
load_TR_desc();
|
load_TR_desc();
|
||||||
|
|
||||||
load_mm_ldt(&init_mm);
|
load_mm_ldt(&init_mm);
|
||||||
|
|
||||||
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
||||||
|
|
|
@ -0,0 +1,125 @@
|
||||||
|
/* Declare dependencies between CPUIDs */
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/init.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <asm/cpufeature.h>
|
||||||
|
|
||||||
|
struct cpuid_dep {
|
||||||
|
unsigned int feature;
|
||||||
|
unsigned int depends;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Table of CPUID features that depend on others.
|
||||||
|
*
|
||||||
|
* This only includes dependencies that can be usefully disabled, not
|
||||||
|
* features part of the base set (like FPU).
|
||||||
|
*
|
||||||
|
* Note this all is not __init / __initdata because it can be
|
||||||
|
* called from cpu hotplug. It shouldn't do anything in this case,
|
||||||
|
* but it's difficult to tell that to the init reference checker.
|
||||||
|
*/
|
||||||
|
const static struct cpuid_dep cpuid_deps[] = {
|
||||||
|
{ X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
|
||||||
|
{ X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
|
||||||
|
{ X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
|
||||||
|
{ X86_FEATURE_AVX, X86_FEATURE_XSAVE },
|
||||||
|
{ X86_FEATURE_PKU, X86_FEATURE_XSAVE },
|
||||||
|
{ X86_FEATURE_MPX, X86_FEATURE_XSAVE },
|
||||||
|
{ X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
|
||||||
|
{ X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
|
||||||
|
{ X86_FEATURE_XMM, X86_FEATURE_FXSR },
|
||||||
|
{ X86_FEATURE_XMM2, X86_FEATURE_XMM },
|
||||||
|
{ X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
||||||
|
{ X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
|
||||||
|
{ X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
|
||||||
|
{ X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
||||||
|
{ X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
|
||||||
|
{ X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
|
||||||
|
{ X86_FEATURE_F16C, X86_FEATURE_XMM2, },
|
||||||
|
{ X86_FEATURE_AES, X86_FEATURE_XMM2 },
|
||||||
|
{ X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
|
||||||
|
{ X86_FEATURE_FMA, X86_FEATURE_AVX },
|
||||||
|
{ X86_FEATURE_AVX2, X86_FEATURE_AVX, },
|
||||||
|
{ X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
|
||||||
|
{ X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
|
||||||
|
{ X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
|
||||||
|
{ X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
|
||||||
|
{ X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
|
||||||
|
{ X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
|
||||||
|
{ X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
|
||||||
|
{ X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
|
||||||
|
{ X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
|
||||||
|
{ X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
|
||||||
|
{ X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
|
||||||
|
{ X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
|
||||||
|
{ X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
|
||||||
|
{ X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
|
||||||
|
{ X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
|
||||||
|
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
|
||||||
|
{ X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
|
||||||
|
{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
|
||||||
|
{
|
||||||
|
clear_bit32(bit, c->x86_capability);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __setup_clear_cpu_cap(unsigned int bit)
|
||||||
|
{
|
||||||
|
clear_cpu_cap(&boot_cpu_data, bit);
|
||||||
|
set_bit32(bit, cpu_caps_cleared);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
|
||||||
|
{
|
||||||
|
if (!c)
|
||||||
|
__setup_clear_cpu_cap(feature);
|
||||||
|
else
|
||||||
|
__clear_cpu_cap(c, feature);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Take the capabilities and the BUG bits into account */
|
||||||
|
#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
|
||||||
|
|
||||||
|
static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
||||||
|
{
|
||||||
|
DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
|
||||||
|
const struct cpuid_dep *d;
|
||||||
|
bool changed;
|
||||||
|
|
||||||
|
if (WARN_ON(feature >= MAX_FEATURE_BITS))
|
||||||
|
return;
|
||||||
|
|
||||||
|
clear_feature(c, feature);
|
||||||
|
|
||||||
|
/* Collect all features to disable, handling dependencies */
|
||||||
|
memset(disable, 0, sizeof(disable));
|
||||||
|
__set_bit(feature, disable);
|
||||||
|
|
||||||
|
/* Loop until we get a stable state. */
|
||||||
|
do {
|
||||||
|
changed = false;
|
||||||
|
for (d = cpuid_deps; d->feature; d++) {
|
||||||
|
if (!test_bit(d->depends, disable))
|
||||||
|
continue;
|
||||||
|
if (__test_and_set_bit(d->feature, disable))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
changed = true;
|
||||||
|
clear_feature(c, d->feature);
|
||||||
|
}
|
||||||
|
} while (changed);
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
||||||
|
{
|
||||||
|
do_clear_cpu_cap(c, feature);
|
||||||
|
}
|
||||||
|
|
||||||
|
void setup_clear_cpu_cap(unsigned int feature)
|
||||||
|
{
|
||||||
|
do_clear_cpu_cap(NULL, feature);
|
||||||
|
}
|
|
@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
|
||||||
*/
|
*/
|
||||||
static void __init fpu__init_parse_early_param(void)
|
static void __init fpu__init_parse_early_param(void)
|
||||||
{
|
{
|
||||||
|
char arg[32];
|
||||||
|
char *argptr = arg;
|
||||||
|
int bit;
|
||||||
|
|
||||||
if (cmdline_find_option_bool(boot_command_line, "no387"))
|
if (cmdline_find_option_bool(boot_command_line, "no387"))
|
||||||
setup_clear_cpu_cap(X86_FEATURE_FPU);
|
setup_clear_cpu_cap(X86_FEATURE_FPU);
|
||||||
|
|
||||||
|
@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
|
||||||
|
|
||||||
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
|
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
|
||||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||||
|
|
||||||
|
if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
|
||||||
|
sizeof(arg)) &&
|
||||||
|
get_option(&argptr, &bit) &&
|
||||||
|
bit >= 0 &&
|
||||||
|
bit < NCAPINTS * 32)
|
||||||
|
setup_clear_cpu_cap(bit);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include <asm/fpu/xstate.h>
|
#include <asm/fpu/xstate.h>
|
||||||
|
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
|
#include <asm/cpufeature.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Although we spell it out in here, the Processor Trace
|
* Although we spell it out in here, the Processor Trace
|
||||||
|
@ -36,6 +37,19 @@ static const char *xfeature_names[] =
|
||||||
"unknown xstate feature" ,
|
"unknown xstate feature" ,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static short xsave_cpuid_features[] __initdata = {
|
||||||
|
X86_FEATURE_FPU,
|
||||||
|
X86_FEATURE_XMM,
|
||||||
|
X86_FEATURE_AVX,
|
||||||
|
X86_FEATURE_MPX,
|
||||||
|
X86_FEATURE_MPX,
|
||||||
|
X86_FEATURE_AVX512F,
|
||||||
|
X86_FEATURE_AVX512F,
|
||||||
|
X86_FEATURE_AVX512F,
|
||||||
|
X86_FEATURE_INTEL_PT,
|
||||||
|
X86_FEATURE_PKU,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mask of xstate features supported by the CPU and the kernel:
|
* Mask of xstate features supported by the CPU and the kernel:
|
||||||
*/
|
*/
|
||||||
|
@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size;
|
||||||
void fpu__xstate_clear_all_cpu_caps(void)
|
void fpu__xstate_clear_all_cpu_caps(void)
|
||||||
{
|
{
|
||||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX2);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512F);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_MPX);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_PKU);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
|
|
||||||
setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void)
|
||||||
unsigned int eax, ebx, ecx, edx;
|
unsigned int eax, ebx, ecx, edx;
|
||||||
static int on_boot_cpu __initdata = 1;
|
static int on_boot_cpu __initdata = 1;
|
||||||
int err;
|
int err;
|
||||||
|
int i;
|
||||||
|
|
||||||
WARN_ON_FPU(!on_boot_cpu);
|
WARN_ON_FPU(!on_boot_cpu);
|
||||||
on_boot_cpu = 0;
|
on_boot_cpu = 0;
|
||||||
|
@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void)
|
||||||
goto out_disable;
|
goto out_disable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Clear XSAVE features that are disabled in the normal CPUID.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
|
||||||
|
if (!boot_cpu_has(xsave_cpuid_features[i]))
|
||||||
|
xfeatures_mask &= ~BIT(i);
|
||||||
|
}
|
||||||
|
|
||||||
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
|
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
|
||||||
|
|
||||||
/* Enable xstate instructions to be able to continue with initialization: */
|
/* Enable xstate instructions to be able to continue with initialization: */
|
||||||
|
|
|
@ -212,9 +212,6 @@ ENTRY(startup_32_smp)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.Ldefault_entry:
|
.Ldefault_entry:
|
||||||
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
|
||||||
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
|
||||||
X86_CR0_PG)
|
|
||||||
movl $(CR0_STATE & ~X86_CR0_PG),%eax
|
movl $(CR0_STATE & ~X86_CR0_PG),%eax
|
||||||
movl %eax,%cr0
|
movl %eax,%cr0
|
||||||
|
|
||||||
|
@ -402,7 +399,7 @@ ENTRY(early_idt_handler_array)
|
||||||
# 24(%rsp) error code
|
# 24(%rsp) error code
|
||||||
i = 0
|
i = 0
|
||||||
.rept NUM_EXCEPTION_VECTORS
|
.rept NUM_EXCEPTION_VECTORS
|
||||||
.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
|
||||||
pushl $0 # Dummy error code, to make stack frame uniform
|
pushl $0 # Dummy error code, to make stack frame uniform
|
||||||
.endif
|
.endif
|
||||||
pushl $i # 20(%esp) Vector number
|
pushl $i # 20(%esp) Vector number
|
||||||
|
|
|
@ -50,6 +50,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||||
.code64
|
.code64
|
||||||
.globl startup_64
|
.globl startup_64
|
||||||
startup_64:
|
startup_64:
|
||||||
|
UNWIND_HINT_EMPTY
|
||||||
/*
|
/*
|
||||||
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
|
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
|
||||||
* and someone has loaded an identity mapped page table
|
* and someone has loaded an identity mapped page table
|
||||||
|
@ -89,6 +90,7 @@ startup_64:
|
||||||
addq $(early_top_pgt - __START_KERNEL_map), %rax
|
addq $(early_top_pgt - __START_KERNEL_map), %rax
|
||||||
jmp 1f
|
jmp 1f
|
||||||
ENTRY(secondary_startup_64)
|
ENTRY(secondary_startup_64)
|
||||||
|
UNWIND_HINT_EMPTY
|
||||||
/*
|
/*
|
||||||
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
|
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
|
||||||
* and someone has loaded a mapped page table.
|
* and someone has loaded a mapped page table.
|
||||||
|
@ -133,6 +135,7 @@ ENTRY(secondary_startup_64)
|
||||||
movq $1f, %rax
|
movq $1f, %rax
|
||||||
jmp *%rax
|
jmp *%rax
|
||||||
1:
|
1:
|
||||||
|
UNWIND_HINT_EMPTY
|
||||||
|
|
||||||
/* Check if nx is implemented */
|
/* Check if nx is implemented */
|
||||||
movl $0x80000001, %eax
|
movl $0x80000001, %eax
|
||||||
|
@ -150,9 +153,6 @@ ENTRY(secondary_startup_64)
|
||||||
1: wrmsr /* Make changes effective */
|
1: wrmsr /* Make changes effective */
|
||||||
|
|
||||||
/* Setup cr0 */
|
/* Setup cr0 */
|
||||||
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
|
||||||
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
|
||||||
X86_CR0_PG)
|
|
||||||
movl $CR0_STATE, %eax
|
movl $CR0_STATE, %eax
|
||||||
/* Make changes effective */
|
/* Make changes effective */
|
||||||
movq %rax, %cr0
|
movq %rax, %cr0
|
||||||
|
@ -235,7 +235,7 @@ ENTRY(secondary_startup_64)
|
||||||
pushq %rax # target address in negative space
|
pushq %rax # target address in negative space
|
||||||
lretq
|
lretq
|
||||||
.Lafter_lret:
|
.Lafter_lret:
|
||||||
ENDPROC(secondary_startup_64)
|
END(secondary_startup_64)
|
||||||
|
|
||||||
#include "verify_cpu.S"
|
#include "verify_cpu.S"
|
||||||
|
|
||||||
|
@ -247,6 +247,7 @@ ENDPROC(secondary_startup_64)
|
||||||
*/
|
*/
|
||||||
ENTRY(start_cpu0)
|
ENTRY(start_cpu0)
|
||||||
movq initial_stack(%rip), %rsp
|
movq initial_stack(%rip), %rsp
|
||||||
|
UNWIND_HINT_EMPTY
|
||||||
jmp .Ljump_to_C_code
|
jmp .Ljump_to_C_code
|
||||||
ENDPROC(start_cpu0)
|
ENDPROC(start_cpu0)
|
||||||
#endif
|
#endif
|
||||||
|
@ -266,26 +267,24 @@ ENDPROC(start_cpu0)
|
||||||
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
|
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
|
||||||
__FINITDATA
|
__FINITDATA
|
||||||
|
|
||||||
bad_address:
|
|
||||||
jmp bad_address
|
|
||||||
|
|
||||||
__INIT
|
__INIT
|
||||||
ENTRY(early_idt_handler_array)
|
ENTRY(early_idt_handler_array)
|
||||||
# 104(%rsp) %rflags
|
|
||||||
# 96(%rsp) %cs
|
|
||||||
# 88(%rsp) %rip
|
|
||||||
# 80(%rsp) error code
|
|
||||||
i = 0
|
i = 0
|
||||||
.rept NUM_EXCEPTION_VECTORS
|
.rept NUM_EXCEPTION_VECTORS
|
||||||
.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
|
||||||
pushq $0 # Dummy error code, to make stack frame uniform
|
UNWIND_HINT_IRET_REGS
|
||||||
|
pushq $0 # Dummy error code, to make stack frame uniform
|
||||||
|
.else
|
||||||
|
UNWIND_HINT_IRET_REGS offset=8
|
||||||
.endif
|
.endif
|
||||||
pushq $i # 72(%rsp) Vector number
|
pushq $i # 72(%rsp) Vector number
|
||||||
jmp early_idt_handler_common
|
jmp early_idt_handler_common
|
||||||
|
UNWIND_HINT_IRET_REGS
|
||||||
i = i + 1
|
i = i + 1
|
||||||
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
|
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
|
||||||
.endr
|
.endr
|
||||||
ENDPROC(early_idt_handler_array)
|
UNWIND_HINT_IRET_REGS offset=16
|
||||||
|
END(early_idt_handler_array)
|
||||||
|
|
||||||
early_idt_handler_common:
|
early_idt_handler_common:
|
||||||
/*
|
/*
|
||||||
|
@ -313,6 +312,7 @@ early_idt_handler_common:
|
||||||
pushq %r13 /* pt_regs->r13 */
|
pushq %r13 /* pt_regs->r13 */
|
||||||
pushq %r14 /* pt_regs->r14 */
|
pushq %r14 /* pt_regs->r14 */
|
||||||
pushq %r15 /* pt_regs->r15 */
|
pushq %r15 /* pt_regs->r15 */
|
||||||
|
UNWIND_HINT_REGS
|
||||||
|
|
||||||
cmpq $14,%rsi /* Page fault? */
|
cmpq $14,%rsi /* Page fault? */
|
||||||
jnz 10f
|
jnz 10f
|
||||||
|
@ -327,8 +327,8 @@ early_idt_handler_common:
|
||||||
|
|
||||||
20:
|
20:
|
||||||
decl early_recursion_flag(%rip)
|
decl early_recursion_flag(%rip)
|
||||||
jmp restore_regs_and_iret
|
jmp restore_regs_and_return_to_kernel
|
||||||
ENDPROC(early_idt_handler_common)
|
END(early_idt_handler_common)
|
||||||
|
|
||||||
__INITDATA
|
__INITDATA
|
||||||
|
|
||||||
|
@ -435,7 +435,7 @@ ENTRY(phys_base)
|
||||||
EXPORT_SYMBOL(phys_base)
|
EXPORT_SYMBOL(phys_base)
|
||||||
|
|
||||||
#include "../../x86/xen/xen-head.S"
|
#include "../../x86/xen/xen-head.S"
|
||||||
|
|
||||||
__PAGE_ALIGNED_BSS
|
__PAGE_ALIGNED_BSS
|
||||||
NEXT_PAGE(empty_zero_page)
|
NEXT_PAGE(empty_zero_page)
|
||||||
.skip PAGE_SIZE
|
.skip PAGE_SIZE
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/smp.h>
|
#include <linux/smp.h>
|
||||||
|
#include <linux/syscalls.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/vmalloc.h>
|
#include <linux/vmalloc.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
@ -295,8 +296,8 @@ out:
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
asmlinkage int sys_modify_ldt(int func, void __user *ptr,
|
SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
|
||||||
unsigned long bytecount)
|
unsigned long , bytecount)
|
||||||
{
|
{
|
||||||
int ret = -ENOSYS;
|
int ret = -ENOSYS;
|
||||||
|
|
||||||
|
@ -314,5 +315,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
|
||||||
ret = write_ldt(ptr, bytecount, 0);
|
ret = write_ldt(ptr, bytecount, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return ret;
|
/*
|
||||||
|
* The SYSCALL_DEFINE() macros give us an 'unsigned long'
|
||||||
|
* return type, but tht ABI for sys_modify_ldt() expects
|
||||||
|
* 'int'. This cast gives us an int-sized value in %rax
|
||||||
|
* for the return code. The 'unsigned' is necessary so
|
||||||
|
* the compiler does not try to sign-extend the negative
|
||||||
|
* return codes into the high half of the register when
|
||||||
|
* taking the value from int->long.
|
||||||
|
*/
|
||||||
|
return (unsigned int)ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,13 @@
|
||||||
*/
|
*/
|
||||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
||||||
.x86_tss = {
|
.x86_tss = {
|
||||||
.sp0 = TOP_OF_INIT_STACK,
|
/*
|
||||||
|
* .sp0 is only used when entering ring 0 from a lower
|
||||||
|
* privilege level. Since the init task never runs anything
|
||||||
|
* but ring 0 code, there is no need for a valid value here.
|
||||||
|
* Poison it.
|
||||||
|
*/
|
||||||
|
.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
.ss0 = __KERNEL_DS,
|
.ss0 = __KERNEL_DS,
|
||||||
.ss1 = __KERNEL_CS,
|
.ss1 = __KERNEL_CS,
|
||||||
|
|
|
@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reload esp0 and cpu_current_top_of_stack. This changes
|
* Reload esp0 and cpu_current_top_of_stack. This changes
|
||||||
* current_thread_info().
|
* current_thread_info(). Refresh the SYSENTER configuration in
|
||||||
|
* case prev or next is vm86.
|
||||||
*/
|
*/
|
||||||
load_sp0(tss, next);
|
update_sp0(next_p);
|
||||||
|
refresh_sysenter_cs(next);
|
||||||
this_cpu_write(cpu_current_top_of_stack,
|
this_cpu_write(cpu_current_top_of_stack,
|
||||||
(unsigned long)task_stack_page(next_p) +
|
(unsigned long)task_stack_page(next_p) +
|
||||||
THREAD_SIZE);
|
THREAD_SIZE);
|
||||||
|
|
|
@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||||
struct inactive_task_frame *frame;
|
struct inactive_task_frame *frame;
|
||||||
struct task_struct *me = current;
|
struct task_struct *me = current;
|
||||||
|
|
||||||
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
|
|
||||||
childregs = task_pt_regs(p);
|
childregs = task_pt_regs(p);
|
||||||
fork_frame = container_of(childregs, struct fork_frame, regs);
|
fork_frame = container_of(childregs, struct fork_frame, regs);
|
||||||
frame = &fork_frame->frame;
|
frame = &fork_frame->frame;
|
||||||
|
@ -464,8 +463,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||||
*/
|
*/
|
||||||
this_cpu_write(current_task, next_p);
|
this_cpu_write(current_task, next_p);
|
||||||
|
|
||||||
/* Reload esp0 and ss1. This changes current_thread_info(). */
|
/* Reload sp0. */
|
||||||
load_sp0(tss, next);
|
update_sp0(next_p);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now maybe reload the debug registers and handle I/O bitmaps
|
* Now maybe reload the debug registers and handle I/O bitmaps
|
||||||
|
|
|
@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
/* Stack for startup_32 can be just as for start_secondary onwards */
|
/* Stack for startup_32 can be just as for start_secondary onwards */
|
||||||
irq_ctx_init(cpu);
|
irq_ctx_init(cpu);
|
||||||
per_cpu(cpu_current_top_of_stack, cpu) =
|
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
|
||||||
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
|
|
||||||
#else
|
#else
|
||||||
initial_gs = per_cpu_offset(cpu);
|
initial_gs = per_cpu_offset(cpu);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
|
||||||
* will catch asm bugs and any attempt to use ist_preempt_enable
|
* will catch asm bugs and any attempt to use ist_preempt_enable
|
||||||
* from double_fault.
|
* from double_fault.
|
||||||
*/
|
*/
|
||||||
BUG_ON((unsigned long)(current_top_of_stack() -
|
BUG_ON(!on_thread_stack());
|
||||||
current_stack_pointer) >= THREAD_SIZE);
|
|
||||||
|
|
||||||
preempt_enable_no_resched();
|
preempt_enable_no_resched();
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
#include <asm/cpufeatures.h>
|
#include <asm/cpufeatures.h>
|
||||||
#include <asm/msr-index.h>
|
#include <asm/msr-index.h>
|
||||||
|
|
||||||
verify_cpu:
|
ENTRY(verify_cpu)
|
||||||
pushf # Save caller passed flags
|
pushf # Save caller passed flags
|
||||||
push $0 # Kill any dangerous flags
|
push $0 # Kill any dangerous flags
|
||||||
popf
|
popf
|
||||||
|
@ -139,3 +139,4 @@ verify_cpu:
|
||||||
popf # Restore caller passed flags
|
popf # Restore caller passed flags
|
||||||
xorl %eax, %eax
|
xorl %eax, %eax
|
||||||
ret
|
ret
|
||||||
|
ENDPROC(verify_cpu)
|
||||||
|
|
|
@ -55,6 +55,7 @@
|
||||||
#include <asm/irq.h>
|
#include <asm/irq.h>
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
#include <asm/vm86.h>
|
#include <asm/vm86.h>
|
||||||
|
#include <asm/switch_to.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Known problems:
|
* Known problems:
|
||||||
|
@ -94,7 +95,6 @@
|
||||||
|
|
||||||
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||||
{
|
{
|
||||||
struct tss_struct *tss;
|
|
||||||
struct task_struct *tsk = current;
|
struct task_struct *tsk = current;
|
||||||
struct vm86plus_struct __user *user;
|
struct vm86plus_struct __user *user;
|
||||||
struct vm86 *vm86 = current->thread.vm86;
|
struct vm86 *vm86 = current->thread.vm86;
|
||||||
|
@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||||
do_exit(SIGSEGV);
|
do_exit(SIGSEGV);
|
||||||
}
|
}
|
||||||
|
|
||||||
tss = &per_cpu(cpu_tss, get_cpu());
|
preempt_disable();
|
||||||
tsk->thread.sp0 = vm86->saved_sp0;
|
tsk->thread.sp0 = vm86->saved_sp0;
|
||||||
tsk->thread.sysenter_cs = __KERNEL_CS;
|
tsk->thread.sysenter_cs = __KERNEL_CS;
|
||||||
load_sp0(tss, &tsk->thread);
|
update_sp0(tsk);
|
||||||
|
refresh_sysenter_cs(&tsk->thread);
|
||||||
vm86->saved_sp0 = 0;
|
vm86->saved_sp0 = 0;
|
||||||
put_cpu();
|
preempt_enable();
|
||||||
|
|
||||||
memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs));
|
memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs));
|
||||||
|
|
||||||
|
@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
|
||||||
|
|
||||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||||
{
|
{
|
||||||
struct tss_struct *tss;
|
|
||||||
struct task_struct *tsk = current;
|
struct task_struct *tsk = current;
|
||||||
struct vm86 *vm86 = tsk->thread.vm86;
|
struct vm86 *vm86 = tsk->thread.vm86;
|
||||||
struct kernel_vm86_regs vm86regs;
|
struct kernel_vm86_regs vm86regs;
|
||||||
|
@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||||
vm86->saved_sp0 = tsk->thread.sp0;
|
vm86->saved_sp0 = tsk->thread.sp0;
|
||||||
lazy_save_gs(vm86->regs32.gs);
|
lazy_save_gs(vm86->regs32.gs);
|
||||||
|
|
||||||
tss = &per_cpu(cpu_tss, get_cpu());
|
|
||||||
/* make room for real-mode segments */
|
/* make room for real-mode segments */
|
||||||
|
preempt_disable();
|
||||||
tsk->thread.sp0 += 16;
|
tsk->thread.sp0 += 16;
|
||||||
|
|
||||||
if (static_cpu_has(X86_FEATURE_SEP))
|
if (static_cpu_has(X86_FEATURE_SEP)) {
|
||||||
tsk->thread.sysenter_cs = 0;
|
tsk->thread.sysenter_cs = 0;
|
||||||
|
refresh_sysenter_cs(&tsk->thread);
|
||||||
|
}
|
||||||
|
|
||||||
load_sp0(tss, &tsk->thread);
|
update_sp0(tsk);
|
||||||
put_cpu();
|
preempt_enable();
|
||||||
|
|
||||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
if (vm86->flags & VM86_SCREEN_BITMAP)
|
||||||
mark_screen_rdonly(tsk->mm);
|
mark_screen_rdonly(tsk->mm);
|
||||||
|
|
|
@ -29,26 +29,6 @@
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include <asm/trace/exceptions.h>
|
#include <asm/trace/exceptions.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* Page fault error code bits:
|
|
||||||
*
|
|
||||||
* bit 0 == 0: no page found 1: protection fault
|
|
||||||
* bit 1 == 0: read access 1: write access
|
|
||||||
* bit 2 == 0: kernel-mode access 1: user-mode access
|
|
||||||
* bit 3 == 1: use of reserved bit detected
|
|
||||||
* bit 4 == 1: fault was an instruction fetch
|
|
||||||
* bit 5 == 1: protection keys block access
|
|
||||||
*/
|
|
||||||
enum x86_pf_error_code {
|
|
||||||
|
|
||||||
PF_PROT = 1 << 0,
|
|
||||||
PF_WRITE = 1 << 1,
|
|
||||||
PF_USER = 1 << 2,
|
|
||||||
PF_RSVD = 1 << 3,
|
|
||||||
PF_INSTR = 1 << 4,
|
|
||||||
PF_PK = 1 << 5,
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns 0 if mmiotrace is disabled, or if the fault is not
|
* Returns 0 if mmiotrace is disabled, or if the fault is not
|
||||||
* handled by mmiotrace:
|
* handled by mmiotrace:
|
||||||
|
@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||||
* If it was a exec (instruction fetch) fault on NX page, then
|
* If it was a exec (instruction fetch) fault on NX page, then
|
||||||
* do not ignore the fault:
|
* do not ignore the fault:
|
||||||
*/
|
*/
|
||||||
if (error_code & PF_INSTR)
|
if (error_code & X86_PF_INSTR)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
instr = (void *)convert_ip_to_linear(current, regs);
|
instr = (void *)convert_ip_to_linear(current, regs);
|
||||||
|
@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||||
* siginfo so userspace can discover which protection key was set
|
* siginfo so userspace can discover which protection key was set
|
||||||
* on the PTE.
|
* on the PTE.
|
||||||
*
|
*
|
||||||
* If we get here, we know that the hardware signaled a PF_PK
|
* If we get here, we know that the hardware signaled a X86_PF_PK
|
||||||
* fault and that there was a VMA once we got in the fault
|
* fault and that there was a VMA once we got in the fault
|
||||||
* handler. It does *not* guarantee that the VMA we find here
|
* handler. It does *not* guarantee that the VMA we find here
|
||||||
* was the one that we faulted on.
|
* was the one that we faulted on.
|
||||||
|
@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
|
||||||
/*
|
/*
|
||||||
* force_sig_info_fault() is called from a number of
|
* force_sig_info_fault() is called from a number of
|
||||||
* contexts, some of which have a VMA and some of which
|
* contexts, some of which have a VMA and some of which
|
||||||
* do not. The PF_PK handing happens after we have a
|
* do not. The X86_PF_PK handing happens after we have a
|
||||||
* valid VMA, so we should never reach this without a
|
* valid VMA, so we should never reach this without a
|
||||||
* valid VMA.
|
* valid VMA.
|
||||||
*/
|
*/
|
||||||
|
@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
|
||||||
if (!oops_may_print())
|
if (!oops_may_print())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (error_code & PF_INSTR) {
|
if (error_code & X86_PF_INSTR) {
|
||||||
unsigned int level;
|
unsigned int level;
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
|
@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
||||||
*/
|
*/
|
||||||
if (current->thread.sig_on_uaccess_err && signal) {
|
if (current->thread.sig_on_uaccess_err && signal) {
|
||||||
tsk->thread.trap_nr = X86_TRAP_PF;
|
tsk->thread.trap_nr = X86_TRAP_PF;
|
||||||
tsk->thread.error_code = error_code | PF_USER;
|
tsk->thread.error_code = error_code | X86_PF_USER;
|
||||||
tsk->thread.cr2 = address;
|
tsk->thread.cr2 = address;
|
||||||
|
|
||||||
/* XXX: hwpoison faults will set the wrong code. */
|
/* XXX: hwpoison faults will set the wrong code. */
|
||||||
|
@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||||
struct task_struct *tsk = current;
|
struct task_struct *tsk = current;
|
||||||
|
|
||||||
/* User mode accesses just cause a SIGSEGV */
|
/* User mode accesses just cause a SIGSEGV */
|
||||||
if (error_code & PF_USER) {
|
if (error_code & X86_PF_USER) {
|
||||||
/*
|
/*
|
||||||
* It's possible to have interrupts off here:
|
* It's possible to have interrupts off here:
|
||||||
*/
|
*/
|
||||||
|
@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||||
* Instruction fetch faults in the vsyscall page might need
|
* Instruction fetch faults in the vsyscall page might need
|
||||||
* emulation.
|
* emulation.
|
||||||
*/
|
*/
|
||||||
if (unlikely((error_code & PF_INSTR) &&
|
if (unlikely((error_code & X86_PF_INSTR) &&
|
||||||
((address & ~0xfff) == VSYSCALL_ADDR))) {
|
((address & ~0xfff) == VSYSCALL_ADDR))) {
|
||||||
if (emulate_vsyscall(regs, address))
|
if (emulate_vsyscall(regs, address))
|
||||||
return;
|
return;
|
||||||
|
@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||||
* are always protection faults.
|
* are always protection faults.
|
||||||
*/
|
*/
|
||||||
if (address >= TASK_SIZE_MAX)
|
if (address >= TASK_SIZE_MAX)
|
||||||
error_code |= PF_PROT;
|
error_code |= X86_PF_PROT;
|
||||||
|
|
||||||
if (likely(show_unhandled_signals))
|
if (likely(show_unhandled_signals))
|
||||||
show_signal_msg(regs, error_code, address, tsk);
|
show_signal_msg(regs, error_code, address, tsk);
|
||||||
|
@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
|
||||||
|
|
||||||
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
||||||
return false;
|
return false;
|
||||||
if (error_code & PF_PK)
|
if (error_code & X86_PF_PK)
|
||||||
return true;
|
return true;
|
||||||
/* this checks permission keys on the VMA: */
|
/* this checks permission keys on the VMA: */
|
||||||
if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
|
if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
|
||||||
(error_code & PF_INSTR), foreign))
|
(error_code & X86_PF_INSTR), foreign))
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
||||||
int code = BUS_ADRERR;
|
int code = BUS_ADRERR;
|
||||||
|
|
||||||
/* Kernel mode? Handle exceptions or die: */
|
/* Kernel mode? Handle exceptions or die: */
|
||||||
if (!(error_code & PF_USER)) {
|
if (!(error_code & X86_PF_USER)) {
|
||||||
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
|
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1053,14 +1033,14 @@ static noinline void
|
||||||
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
||||||
unsigned long address, u32 *pkey, unsigned int fault)
|
unsigned long address, u32 *pkey, unsigned int fault)
|
||||||
{
|
{
|
||||||
if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
|
if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
|
||||||
no_context(regs, error_code, address, 0, 0);
|
no_context(regs, error_code, address, 0, 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fault & VM_FAULT_OOM) {
|
if (fault & VM_FAULT_OOM) {
|
||||||
/* Kernel mode? Handle exceptions or die: */
|
/* Kernel mode? Handle exceptions or die: */
|
||||||
if (!(error_code & PF_USER)) {
|
if (!(error_code & X86_PF_USER)) {
|
||||||
no_context(regs, error_code, address,
|
no_context(regs, error_code, address,
|
||||||
SIGSEGV, SEGV_MAPERR);
|
SIGSEGV, SEGV_MAPERR);
|
||||||
return;
|
return;
|
||||||
|
@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
||||||
|
|
||||||
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
|
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
|
||||||
{
|
{
|
||||||
if ((error_code & PF_WRITE) && !pte_write(*pte))
|
if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if ((error_code & PF_INSTR) && !pte_exec(*pte))
|
if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
|
||||||
return 0;
|
return 0;
|
||||||
/*
|
/*
|
||||||
* Note: We do not do lazy flushing on protection key
|
* Note: We do not do lazy flushing on protection key
|
||||||
* changes, so no spurious fault will ever set PF_PK.
|
* changes, so no spurious fault will ever set X86_PF_PK.
|
||||||
*/
|
*/
|
||||||
if ((error_code & PF_PK))
|
if ((error_code & X86_PF_PK))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
|
||||||
* change, so user accesses are not expected to cause spurious
|
* change, so user accesses are not expected to cause spurious
|
||||||
* faults.
|
* faults.
|
||||||
*/
|
*/
|
||||||
if (error_code != (PF_WRITE | PF_PROT)
|
if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
|
||||||
&& error_code != (PF_INSTR | PF_PROT))
|
error_code != (X86_PF_INSTR | X86_PF_PROT))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
pgd = init_mm.pgd + pgd_index(address);
|
pgd = init_mm.pgd + pgd_index(address);
|
||||||
|
@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
||||||
* always an unconditional error and can never result in
|
* always an unconditional error and can never result in
|
||||||
* a follow-up action to resolve the fault, like a COW.
|
* a follow-up action to resolve the fault, like a COW.
|
||||||
*/
|
*/
|
||||||
if (error_code & PF_PK)
|
if (error_code & X86_PF_PK)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure to check the VMA so that we do not perform
|
* Make sure to check the VMA so that we do not perform
|
||||||
* faults just to hit a PF_PK as soon as we fill in a
|
* faults just to hit a X86_PF_PK as soon as we fill in a
|
||||||
* page.
|
* page.
|
||||||
*/
|
*/
|
||||||
if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
|
if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
|
||||||
(error_code & PF_INSTR), foreign))
|
(error_code & X86_PF_INSTR), foreign))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
if (error_code & PF_WRITE) {
|
if (error_code & X86_PF_WRITE) {
|
||||||
/* write, present and write, not present: */
|
/* write, present and write, not present: */
|
||||||
if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* read, present: */
|
/* read, present: */
|
||||||
if (unlikely(error_code & PF_PROT))
|
if (unlikely(error_code & X86_PF_PROT))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/* read, not present: */
|
/* read, not present: */
|
||||||
|
@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
|
||||||
if (!static_cpu_has(X86_FEATURE_SMAP))
|
if (!static_cpu_has(X86_FEATURE_SMAP))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (error_code & PF_USER)
|
if (error_code & X86_PF_USER)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
|
if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
|
||||||
|
@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||||
* protection error (error_code & 9) == 0.
|
* protection error (error_code & 9) == 0.
|
||||||
*/
|
*/
|
||||||
if (unlikely(fault_in_kernel_space(address))) {
|
if (unlikely(fault_in_kernel_space(address))) {
|
||||||
if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
|
if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
|
||||||
if (vmalloc_fault(address) >= 0)
|
if (vmalloc_fault(address) >= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||||
if (unlikely(kprobes_fault(regs)))
|
if (unlikely(kprobes_fault(regs)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (unlikely(error_code & PF_RSVD))
|
if (unlikely(error_code & X86_PF_RSVD))
|
||||||
pgtable_bad(regs, error_code, address);
|
pgtable_bad(regs, error_code, address);
|
||||||
|
|
||||||
if (unlikely(smap_violation(error_code, regs))) {
|
if (unlikely(smap_violation(error_code, regs))) {
|
||||||
|
@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||||
*/
|
*/
|
||||||
if (user_mode(regs)) {
|
if (user_mode(regs)) {
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
error_code |= PF_USER;
|
error_code |= X86_PF_USER;
|
||||||
flags |= FAULT_FLAG_USER;
|
flags |= FAULT_FLAG_USER;
|
||||||
} else {
|
} else {
|
||||||
if (regs->flags & X86_EFLAGS_IF)
|
if (regs->flags & X86_EFLAGS_IF)
|
||||||
|
@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||||
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
|
|
||||||
if (error_code & PF_WRITE)
|
if (error_code & X86_PF_WRITE)
|
||||||
flags |= FAULT_FLAG_WRITE;
|
flags |= FAULT_FLAG_WRITE;
|
||||||
if (error_code & PF_INSTR)
|
if (error_code & X86_PF_INSTR)
|
||||||
flags |= FAULT_FLAG_INSTRUCTION;
|
flags |= FAULT_FLAG_INSTRUCTION;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||||
* space check, thus avoiding the deadlock:
|
* space check, thus avoiding the deadlock:
|
||||||
*/
|
*/
|
||||||
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
|
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
|
||||||
if ((error_code & PF_USER) == 0 &&
|
if (!(error_code & X86_PF_USER) &&
|
||||||
!search_exception_tables(regs->ip)) {
|
!search_exception_tables(regs->ip)) {
|
||||||
bad_area_nosemaphore(regs, error_code, address, NULL);
|
bad_area_nosemaphore(regs, error_code, address, NULL);
|
||||||
return;
|
return;
|
||||||
|
@ -1409,7 +1389,7 @@ retry:
|
||||||
bad_area(regs, error_code, address);
|
bad_area(regs, error_code, address);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (error_code & PF_USER) {
|
if (error_code & X86_PF_USER) {
|
||||||
/*
|
/*
|
||||||
* Accessing the stack below %sp is always a bug.
|
* Accessing the stack below %sp is always a bug.
|
||||||
* The large cushion allows instructions like enter
|
* The large cushion allows instructions like enter
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
#include <linux/syscalls.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <asm/unistd.h>
|
#include <asm/unistd.h>
|
||||||
#include <os.h>
|
#include <os.h>
|
||||||
|
@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm)
|
||||||
mm->arch.ldt.entry_count = 0;
|
mm->arch.ldt.entry_count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
|
SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
|
||||||
|
unsigned long , bytecount)
|
||||||
{
|
{
|
||||||
return do_modify_ldt_skas(func, ptr, bytecount);
|
/* See non-um modify_ldt() for why we do this cast */
|
||||||
|
return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
|
||||||
}
|
}
|
||||||
|
|
|
@ -601,7 +601,7 @@ static struct trap_array_entry trap_array[] = {
|
||||||
#ifdef CONFIG_X86_MCE
|
#ifdef CONFIG_X86_MCE
|
||||||
{ machine_check, xen_machine_check, true },
|
{ machine_check, xen_machine_check, true },
|
||||||
#endif
|
#endif
|
||||||
{ nmi, xen_nmi, true },
|
{ nmi, xen_xennmi, true },
|
||||||
{ overflow, xen_overflow, false },
|
{ overflow, xen_overflow, false },
|
||||||
#ifdef CONFIG_IA32_EMULATION
|
#ifdef CONFIG_IA32_EMULATION
|
||||||
{ entry_INT80_compat, xen_entry_INT80_compat, false },
|
{ entry_INT80_compat, xen_entry_INT80_compat, false },
|
||||||
|
@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void xen_load_sp0(struct tss_struct *tss,
|
static void xen_load_sp0(unsigned long sp0)
|
||||||
struct thread_struct *thread)
|
|
||||||
{
|
{
|
||||||
struct multicall_space mcs;
|
struct multicall_space mcs;
|
||||||
|
|
||||||
mcs = xen_mc_entry(0);
|
mcs = xen_mc_entry(0);
|
||||||
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
|
MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
|
||||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
||||||
tss->x86_tss.sp0 = thread->sp0;
|
this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void xen_set_iopl_mask(unsigned mask)
|
void xen_set_iopl_mask(unsigned mask)
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
* single-threaded.
|
* single-threaded.
|
||||||
*/
|
*/
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
|
#include <linux/sched/task_stack.h>
|
||||||
#include <linux/err.h>
|
#include <linux/err.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/smp.h>
|
#include <linux/smp.h>
|
||||||
|
@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||||
#endif
|
#endif
|
||||||
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
|
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bring up the CPU in cpu_bringup_and_idle() with the stack
|
||||||
|
* pointing just below where pt_regs would be if it were a normal
|
||||||
|
* kernel entry.
|
||||||
|
*/
|
||||||
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
|
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
|
||||||
ctxt->flags = VGCF_IN_KERNEL;
|
ctxt->flags = VGCF_IN_KERNEL;
|
||||||
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
|
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
|
||||||
ctxt->user_regs.ds = __USER_DS;
|
ctxt->user_regs.ds = __USER_DS;
|
||||||
ctxt->user_regs.es = __USER_DS;
|
ctxt->user_regs.es = __USER_DS;
|
||||||
ctxt->user_regs.ss = __KERNEL_DS;
|
ctxt->user_regs.ss = __KERNEL_DS;
|
||||||
|
ctxt->user_regs.cs = __KERNEL_CS;
|
||||||
|
ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
|
||||||
|
|
||||||
xen_copy_trap_info(ctxt->trap_ctxt);
|
xen_copy_trap_info(ctxt->trap_ctxt);
|
||||||
|
|
||||||
|
@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||||
ctxt->gdt_frames[0] = gdt_mfn;
|
ctxt->gdt_frames[0] = gdt_mfn;
|
||||||
ctxt->gdt_ents = GDT_ENTRIES;
|
ctxt->gdt_ents = GDT_ENTRIES;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set SS:SP that Xen will use when entering guest kernel mode
|
||||||
|
* from guest user mode. Subsequent calls to load_sp0() can
|
||||||
|
* change this value.
|
||||||
|
*/
|
||||||
ctxt->kernel_ss = __KERNEL_DS;
|
ctxt->kernel_ss = __KERNEL_DS;
|
||||||
ctxt->kernel_sp = idle->thread.sp0;
|
ctxt->kernel_sp = task_top_of_stack(idle);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
ctxt->event_callback_cs = __KERNEL_CS;
|
ctxt->event_callback_cs = __KERNEL_CS;
|
||||||
|
@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||||
(unsigned long)xen_hypervisor_callback;
|
(unsigned long)xen_hypervisor_callback;
|
||||||
ctxt->failsafe_callback_eip =
|
ctxt->failsafe_callback_eip =
|
||||||
(unsigned long)xen_failsafe_callback;
|
(unsigned long)xen_failsafe_callback;
|
||||||
ctxt->user_regs.cs = __KERNEL_CS;
|
|
||||||
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
|
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
|
||||||
|
|
||||||
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
|
|
||||||
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
|
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
|
||||||
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
|
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
|
||||||
BUG();
|
BUG();
|
||||||
|
|
|
@ -30,7 +30,7 @@ xen_pv_trap debug
|
||||||
xen_pv_trap xendebug
|
xen_pv_trap xendebug
|
||||||
xen_pv_trap int3
|
xen_pv_trap int3
|
||||||
xen_pv_trap xenint3
|
xen_pv_trap xenint3
|
||||||
xen_pv_trap nmi
|
xen_pv_trap xennmi
|
||||||
xen_pv_trap overflow
|
xen_pv_trap overflow
|
||||||
xen_pv_trap bounds
|
xen_pv_trap bounds
|
||||||
xen_pv_trap invalid_op
|
xen_pv_trap invalid_op
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include <asm/boot.h>
|
#include <asm/boot.h>
|
||||||
#include <asm/asm.h>
|
#include <asm/asm.h>
|
||||||
#include <asm/page_types.h>
|
#include <asm/page_types.h>
|
||||||
|
#include <asm/unwind_hints.h>
|
||||||
|
|
||||||
#include <xen/interface/elfnote.h>
|
#include <xen/interface/elfnote.h>
|
||||||
#include <xen/interface/features.h>
|
#include <xen/interface/features.h>
|
||||||
|
@ -20,6 +21,7 @@
|
||||||
#ifdef CONFIG_XEN_PV
|
#ifdef CONFIG_XEN_PV
|
||||||
__INIT
|
__INIT
|
||||||
ENTRY(startup_xen)
|
ENTRY(startup_xen)
|
||||||
|
UNWIND_HINT_EMPTY
|
||||||
cld
|
cld
|
||||||
|
|
||||||
/* Clear .bss */
|
/* Clear .bss */
|
||||||
|
@ -34,21 +36,24 @@ ENTRY(startup_xen)
|
||||||
mov $init_thread_union+THREAD_SIZE, %_ASM_SP
|
mov $init_thread_union+THREAD_SIZE, %_ASM_SP
|
||||||
|
|
||||||
jmp xen_start_kernel
|
jmp xen_start_kernel
|
||||||
|
END(startup_xen)
|
||||||
__FINIT
|
__FINIT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.pushsection .text
|
.pushsection .text
|
||||||
.balign PAGE_SIZE
|
.balign PAGE_SIZE
|
||||||
ENTRY(hypercall_page)
|
ENTRY(hypercall_page)
|
||||||
.skip PAGE_SIZE
|
.rept (PAGE_SIZE / 32)
|
||||||
|
UNWIND_HINT_EMPTY
|
||||||
|
.skip 32
|
||||||
|
.endr
|
||||||
|
|
||||||
#define HYPERCALL(n) \
|
#define HYPERCALL(n) \
|
||||||
.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
|
.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
|
||||||
.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
|
.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
|
||||||
#include <asm/xen-hypercalls.h>
|
#include <asm/xen-hypercalls.h>
|
||||||
#undef HYPERCALL
|
#undef HYPERCALL
|
||||||
|
END(hypercall_page)
|
||||||
.popsection
|
.popsection
|
||||||
|
|
||||||
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
|
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
|
||||||
|
|
|
@ -687,7 +687,7 @@
|
||||||
#define BUG_TABLE
|
#define BUG_TABLE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_ORC_UNWINDER
|
#ifdef CONFIG_UNWINDER_ORC
|
||||||
#define ORC_UNWIND_TABLE \
|
#define ORC_UNWIND_TABLE \
|
||||||
. = ALIGN(4); \
|
. = ALIGN(4); \
|
||||||
.orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
|
.orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
|
||||||
|
|
|
@ -228,6 +228,32 @@ static inline unsigned long __ffs64(u64 word)
|
||||||
return __ffs((unsigned long)word);
|
return __ffs((unsigned long)word);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* clear_bit32 - Clear a bit in memory for u32 array
|
||||||
|
* @nr: Bit to clear
|
||||||
|
* @addr: u32 * address of bitmap
|
||||||
|
*
|
||||||
|
* Same as clear_bit, but avoids needing casts for u32 arrays.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static __always_inline void clear_bit32(long nr, volatile u32 *addr)
|
||||||
|
{
|
||||||
|
clear_bit(nr, (volatile unsigned long *)addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set_bit32 - Set a bit in memory for u32 array
|
||||||
|
* @nr: Bit to clear
|
||||||
|
* @addr: u32 * address of bitmap
|
||||||
|
*
|
||||||
|
* Same as set_bit, but avoids needing casts for u32 arrays.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static __always_inline void set_bit32(long nr, volatile u32 *addr)
|
||||||
|
{
|
||||||
|
set_bit(nr, (volatile unsigned long *)addr);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
|
|
||||||
#ifndef set_mask_bits
|
#ifndef set_mask_bits
|
||||||
|
|
|
@ -376,7 +376,7 @@ config STACK_VALIDATION
|
||||||
that runtime stack traces are more reliable.
|
that runtime stack traces are more reliable.
|
||||||
|
|
||||||
This is also a prerequisite for generation of ORC unwind data, which
|
This is also a prerequisite for generation of ORC unwind data, which
|
||||||
is needed for CONFIG_ORC_UNWINDER.
|
is needed for CONFIG_UNWINDER_ORC.
|
||||||
|
|
||||||
For more information, see
|
For more information, see
|
||||||
tools/objtool/Documentation/stack-validation.txt.
|
tools/objtool/Documentation/stack-validation.txt.
|
||||||
|
|
|
@ -259,7 +259,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
|
||||||
|
|
||||||
__objtool_obj := $(objtree)/tools/objtool/objtool
|
__objtool_obj := $(objtree)/tools/objtool/objtool
|
||||||
|
|
||||||
objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
|
objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
|
||||||
|
|
||||||
ifndef CONFIG_FRAME_POINTER
|
ifndef CONFIG_FRAME_POINTER
|
||||||
objtool_args += --no-fp
|
objtool_args += --no-fp
|
||||||
|
|
|
@ -1757,11 +1757,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
|
||||||
if (insn->dead_end)
|
if (insn->dead_end)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
insn = next_insn;
|
if (!next_insn) {
|
||||||
if (!insn) {
|
if (state.cfa.base == CFI_UNDEFINED)
|
||||||
|
return 0;
|
||||||
WARN("%s: unexpected end of section", sec->name);
|
WARN("%s: unexpected end of section", sec->name);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
insn = next_insn;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -70,7 +70,7 @@ static void cmd_usage(void)
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
exit(1);
|
exit(129);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void handle_options(int *argc, const char ***argv)
|
static void handle_options(int *argc, const char ***argv)
|
||||||
|
@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Unknown option: %s\n", cmd);
|
fprintf(stderr, "Unknown option: %s\n", cmd);
|
||||||
fprintf(stderr, "\n Usage: %s\n",
|
cmd_usage();
|
||||||
objtool_usage_string);
|
|
||||||
exit(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
(*argv)++;
|
(*argv)++;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче