Merge branch 'x86/pti' into x86/mm, to pick up dependencies
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Коммит
3c76db70eb
5
Makefile
5
Makefile
|
@ -489,6 +489,11 @@ KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
|
|||
KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
|
||||
endif
|
||||
|
||||
RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
|
||||
RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
|
||||
RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
|
||||
export RETPOLINE_CFLAGS
|
||||
|
||||
ifeq ($(config-targets),1)
|
||||
# ===========================================================================
|
||||
# *config targets only - make sure prerequisites are updated, and descend
|
||||
|
|
|
@ -430,6 +430,7 @@ config GOLDFISH
|
|||
config RETPOLINE
|
||||
bool "Avoid speculative indirect branches in kernel"
|
||||
default y
|
||||
select STACK_VALIDATION if HAVE_STACK_VALIDATION
|
||||
help
|
||||
Compile kernel with the retpoline compiler options to guard against
|
||||
kernel-to-user data leaks by avoiding speculative indirect
|
||||
|
@ -2315,7 +2316,7 @@ choice
|
|||
it can be used to assist security vulnerability exploitation.
|
||||
|
||||
This setting can be changed at boot time via the kernel command
|
||||
line parameter vsyscall=[native|emulate|none].
|
||||
line parameter vsyscall=[emulate|none].
|
||||
|
||||
On a system with recent enough glibc (2.14 or newer) and no
|
||||
static binaries, you can say None without a performance penalty
|
||||
|
@ -2323,15 +2324,6 @@ choice
|
|||
|
||||
If unsure, select "Emulate".
|
||||
|
||||
config LEGACY_VSYSCALL_NATIVE
|
||||
bool "Native"
|
||||
help
|
||||
Actual executable code is located in the fixed vsyscall
|
||||
address mapping, implementing time() efficiently. Since
|
||||
this makes the mapping executable, it can be used during
|
||||
security vulnerability exploitation (traditionally as
|
||||
ROP gadgets). This configuration is not recommended.
|
||||
|
||||
config LEGACY_VSYSCALL_EMULATE
|
||||
bool "Emulate"
|
||||
help
|
||||
|
|
|
@ -232,10 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
|
|||
|
||||
# Avoid indirect branches in kernel to deal with Spectre
|
||||
ifdef CONFIG_RETPOLINE
|
||||
RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
|
||||
ifneq ($(RETPOLINE_CFLAGS),)
|
||||
ifneq ($(RETPOLINE_CFLAGS),)
|
||||
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
archscripts: scripts_basic
|
||||
|
|
|
@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
|
|||
|
||||
#define SIZEOF_PTREGS 21*8
|
||||
|
||||
.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
|
||||
.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
|
||||
/*
|
||||
* Push registers and sanitize registers of values that a
|
||||
* speculation attack might otherwise want to exploit. The
|
||||
|
@ -105,32 +105,41 @@ For 32-bit we have the following conventions - kernel is built with
|
|||
* could be put to use in a speculative execution gadget.
|
||||
* Interleave XOR with PUSH for better uop scheduling:
|
||||
*/
|
||||
.if \save_ret
|
||||
pushq %rsi /* pt_regs->si */
|
||||
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
|
||||
movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
|
||||
.else
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
.endif
|
||||
pushq \rdx /* pt_regs->dx */
|
||||
pushq %rcx /* pt_regs->cx */
|
||||
pushq \rax /* pt_regs->ax */
|
||||
pushq %r8 /* pt_regs->r8 */
|
||||
xorq %r8, %r8 /* nospec r8 */
|
||||
xorl %r8d, %r8d /* nospec r8 */
|
||||
pushq %r9 /* pt_regs->r9 */
|
||||
xorq %r9, %r9 /* nospec r9 */
|
||||
xorl %r9d, %r9d /* nospec r9 */
|
||||
pushq %r10 /* pt_regs->r10 */
|
||||
xorq %r10, %r10 /* nospec r10 */
|
||||
xorl %r10d, %r10d /* nospec r10 */
|
||||
pushq %r11 /* pt_regs->r11 */
|
||||
xorq %r11, %r11 /* nospec r11*/
|
||||
xorl %r11d, %r11d /* nospec r11*/
|
||||
pushq %rbx /* pt_regs->rbx */
|
||||
xorl %ebx, %ebx /* nospec rbx*/
|
||||
pushq %rbp /* pt_regs->rbp */
|
||||
xorl %ebp, %ebp /* nospec rbp*/
|
||||
pushq %r12 /* pt_regs->r12 */
|
||||
xorq %r12, %r12 /* nospec r12*/
|
||||
xorl %r12d, %r12d /* nospec r12*/
|
||||
pushq %r13 /* pt_regs->r13 */
|
||||
xorq %r13, %r13 /* nospec r13*/
|
||||
xorl %r13d, %r13d /* nospec r13*/
|
||||
pushq %r14 /* pt_regs->r14 */
|
||||
xorq %r14, %r14 /* nospec r14*/
|
||||
xorl %r14d, %r14d /* nospec r14*/
|
||||
pushq %r15 /* pt_regs->r15 */
|
||||
xorq %r15, %r15 /* nospec r15*/
|
||||
xorl %r15d, %r15d /* nospec r15*/
|
||||
UNWIND_HINT_REGS
|
||||
.if \save_ret
|
||||
pushq %rsi /* return address on top of stack */
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro POP_REGS pop_rdi=1 skip_r11rcx=0
|
||||
|
@ -172,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with
|
|||
*/
|
||||
.macro ENCODE_FRAME_POINTER ptregs_offset=0
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
.if \ptregs_offset
|
||||
leaq \ptregs_offset(%rsp), %rbp
|
||||
.else
|
||||
mov %rsp, %rbp
|
||||
.endif
|
||||
orq $0x1, %rbp
|
||||
leaq 1+\ptregs_offset(%rsp), %rbp
|
||||
#endif
|
||||
.endm
|
||||
|
||||
|
|
|
@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
|
|||
* exist, overwrite the RSB with entries which capture
|
||||
* speculative execution to prevent attack.
|
||||
*/
|
||||
/* Clobbers %ebx */
|
||||
FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
|
||||
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
|
||||
#endif
|
||||
|
||||
/* restore callee-saved registers */
|
||||
|
|
|
@ -369,8 +369,7 @@ ENTRY(__switch_to_asm)
|
|||
* exist, overwrite the RSB with entries which capture
|
||||
* speculative execution to prevent attack.
|
||||
*/
|
||||
/* Clobbers %rbx */
|
||||
FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
|
||||
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
|
||||
#endif
|
||||
|
||||
/* restore callee-saved registers */
|
||||
|
@ -454,9 +453,19 @@ END(irq_entries_start)
|
|||
*
|
||||
* The invariant is that, if irq_count != -1, then the IRQ stack is in use.
|
||||
*/
|
||||
.macro ENTER_IRQ_STACK regs=1 old_rsp
|
||||
.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
|
||||
DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
|
||||
.if \save_ret
|
||||
/*
|
||||
* If save_ret is set, the original stack contains one additional
|
||||
* entry -- the return address. Therefore, move the address one
|
||||
* entry below %rsp to \old_rsp.
|
||||
*/
|
||||
leaq 8(%rsp), \old_rsp
|
||||
.else
|
||||
movq %rsp, \old_rsp
|
||||
.endif
|
||||
|
||||
.if \regs
|
||||
UNWIND_HINT_REGS base=\old_rsp
|
||||
|
@ -502,6 +511,15 @@ END(irq_entries_start)
|
|||
.if \regs
|
||||
UNWIND_HINT_REGS indirect=1
|
||||
.endif
|
||||
|
||||
.if \save_ret
|
||||
/*
|
||||
* Push the return address to the stack. This return address can
|
||||
* be found at the "real" original RSP, which was offset by 8 at
|
||||
* the beginning of this macro.
|
||||
*/
|
||||
pushq -8(\old_rsp)
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
|
@ -525,27 +543,65 @@ END(irq_entries_start)
|
|||
.endm
|
||||
|
||||
/*
|
||||
* Interrupt entry/exit.
|
||||
* Interrupt entry helper function.
|
||||
*
|
||||
* Interrupt entry points save only callee clobbered registers in fast path.
|
||||
*
|
||||
* Entry runs with interrupts off.
|
||||
* Entry runs with interrupts off. Stack layout at entry:
|
||||
* +----------------------------------------------------+
|
||||
* | regs->ss |
|
||||
* | regs->rsp |
|
||||
* | regs->eflags |
|
||||
* | regs->cs |
|
||||
* | regs->ip |
|
||||
* +----------------------------------------------------+
|
||||
* | regs->orig_ax = ~(interrupt number) |
|
||||
* +----------------------------------------------------+
|
||||
* | return address |
|
||||
* +----------------------------------------------------+
|
||||
*/
|
||||
|
||||
/* 0(%rsp): ~(interrupt number) */
|
||||
.macro interrupt func
|
||||
ENTRY(interrupt_entry)
|
||||
UNWIND_HINT_FUNC
|
||||
ASM_CLAC
|
||||
cld
|
||||
|
||||
testb $3, CS-ORIG_RAX(%rsp)
|
||||
testb $3, CS-ORIG_RAX+8(%rsp)
|
||||
jz 1f
|
||||
SWAPGS
|
||||
call switch_to_thread_stack
|
||||
|
||||
/*
|
||||
* Switch to the thread stack. The IRET frame and orig_ax are
|
||||
* on the stack, as well as the return address. RDI..R12 are
|
||||
* not (yet) on the stack and space has not (yet) been
|
||||
* allocated for them.
|
||||
*/
|
||||
pushq %rdi
|
||||
|
||||
/* Need to switch before accessing the thread stack. */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
|
||||
movq %rsp, %rdi
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
/*
|
||||
* We have RDI, return address, and orig_ax on the stack on
|
||||
* top of the IRET frame. That means offset=24
|
||||
*/
|
||||
UNWIND_HINT_IRET_REGS base=%rdi offset=24
|
||||
|
||||
pushq 7*8(%rdi) /* regs->ss */
|
||||
pushq 6*8(%rdi) /* regs->rsp */
|
||||
pushq 5*8(%rdi) /* regs->eflags */
|
||||
pushq 4*8(%rdi) /* regs->cs */
|
||||
pushq 3*8(%rdi) /* regs->ip */
|
||||
pushq 2*8(%rdi) /* regs->orig_ax */
|
||||
pushq 8(%rdi) /* return address */
|
||||
UNWIND_HINT_FUNC
|
||||
|
||||
movq (%rdi), %rdi
|
||||
1:
|
||||
|
||||
PUSH_AND_CLEAR_REGS
|
||||
ENCODE_FRAME_POINTER
|
||||
PUSH_AND_CLEAR_REGS save_ret=1
|
||||
ENCODE_FRAME_POINTER 8
|
||||
|
||||
testb $3, CS(%rsp)
|
||||
testb $3, CS+8(%rsp)
|
||||
jz 1f
|
||||
|
||||
/*
|
||||
|
@ -553,7 +609,7 @@ END(irq_entries_start)
|
|||
*
|
||||
* We need to tell lockdep that IRQs are off. We can't do this until
|
||||
* we fix gsbase, and we should do it before enter_from_user_mode
|
||||
* (which can take locks). Since TRACE_IRQS_OFF idempotent,
|
||||
* (which can take locks). Since TRACE_IRQS_OFF is idempotent,
|
||||
* the simplest way to handle it is to just call it twice if
|
||||
* we enter from user mode. There's no reason to optimize this since
|
||||
* TRACE_IRQS_OFF is a no-op if lockdep is off.
|
||||
|
@ -563,12 +619,15 @@ END(irq_entries_start)
|
|||
CALL_enter_from_user_mode
|
||||
|
||||
1:
|
||||
ENTER_IRQ_STACK old_rsp=%rdi
|
||||
ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
|
||||
/* We entered an interrupt context - irqs are off: */
|
||||
TRACE_IRQS_OFF
|
||||
|
||||
call \func /* rdi points to pt_regs */
|
||||
.endm
|
||||
ret
|
||||
END(interrupt_entry)
|
||||
|
||||
|
||||
/* Interrupt entry/exit. */
|
||||
|
||||
/*
|
||||
* The interrupt stubs push (~vector+0x80) onto the stack and
|
||||
|
@ -576,9 +635,10 @@ END(irq_entries_start)
|
|||
*/
|
||||
.p2align CONFIG_X86_L1_CACHE_SHIFT
|
||||
common_interrupt:
|
||||
ASM_CLAC
|
||||
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
|
||||
interrupt do_IRQ
|
||||
call interrupt_entry
|
||||
UNWIND_HINT_REGS indirect=1
|
||||
call do_IRQ /* rdi points to pt_regs */
|
||||
/* 0(%rsp): old RSP */
|
||||
ret_from_intr:
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
|
@ -771,10 +831,11 @@ END(common_interrupt)
|
|||
.macro apicinterrupt3 num sym do_sym
|
||||
ENTRY(\sym)
|
||||
UNWIND_HINT_IRET_REGS
|
||||
ASM_CLAC
|
||||
pushq $~(\num)
|
||||
.Lcommon_\sym:
|
||||
interrupt \do_sym
|
||||
call interrupt_entry
|
||||
UNWIND_HINT_REGS indirect=1
|
||||
call \do_sym /* rdi points to pt_regs */
|
||||
jmp ret_from_intr
|
||||
END(\sym)
|
||||
.endm
|
||||
|
@ -837,34 +898,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
|||
*/
|
||||
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
|
||||
|
||||
/*
|
||||
* Switch to the thread stack. This is called with the IRET frame and
|
||||
* orig_ax on the stack. (That is, RDI..R12 are not on the stack and
|
||||
* space has not been allocated for them.)
|
||||
*/
|
||||
ENTRY(switch_to_thread_stack)
|
||||
UNWIND_HINT_FUNC
|
||||
|
||||
pushq %rdi
|
||||
/* Need to switch before accessing the thread stack. */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
|
||||
movq %rsp, %rdi
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
|
||||
|
||||
pushq 7*8(%rdi) /* regs->ss */
|
||||
pushq 6*8(%rdi) /* regs->rsp */
|
||||
pushq 5*8(%rdi) /* regs->eflags */
|
||||
pushq 4*8(%rdi) /* regs->cs */
|
||||
pushq 3*8(%rdi) /* regs->ip */
|
||||
pushq 2*8(%rdi) /* regs->orig_ax */
|
||||
pushq 8(%rdi) /* return address */
|
||||
UNWIND_HINT_FUNC
|
||||
|
||||
movq (%rdi), %rdi
|
||||
ret
|
||||
END(switch_to_thread_stack)
|
||||
|
||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
||||
ENTRY(\sym)
|
||||
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
|
||||
|
@ -880,12 +913,8 @@ ENTRY(\sym)
|
|||
pushq $-1 /* ORIG_RAX: no syscall to restart */
|
||||
.endif
|
||||
|
||||
/* Save all registers in pt_regs */
|
||||
PUSH_AND_CLEAR_REGS
|
||||
ENCODE_FRAME_POINTER
|
||||
|
||||
.if \paranoid < 2
|
||||
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
|
||||
testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */
|
||||
jnz .Lfrom_usermode_switch_stack_\@
|
||||
.endif
|
||||
|
||||
|
@ -1135,13 +1164,15 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
|
|||
#endif
|
||||
|
||||
/*
|
||||
* Switch gs if needed.
|
||||
* Save all registers in pt_regs, and switch gs if needed.
|
||||
* Use slow, but surefire "are we in kernel?" check.
|
||||
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
|
||||
*/
|
||||
ENTRY(paranoid_entry)
|
||||
UNWIND_HINT_FUNC
|
||||
cld
|
||||
PUSH_AND_CLEAR_REGS save_ret=1
|
||||
ENCODE_FRAME_POINTER 8
|
||||
movl $1, %ebx
|
||||
movl $MSR_GS_BASE, %ecx
|
||||
rdmsr
|
||||
|
@ -1186,12 +1217,14 @@ ENTRY(paranoid_exit)
|
|||
END(paranoid_exit)
|
||||
|
||||
/*
|
||||
* Switch gs if needed.
|
||||
* Save all registers in pt_regs, and switch GS if needed.
|
||||
* Return: EBX=0: came from user mode; EBX=1: otherwise
|
||||
*/
|
||||
ENTRY(error_entry)
|
||||
UNWIND_HINT_REGS offset=8
|
||||
UNWIND_HINT_FUNC
|
||||
cld
|
||||
PUSH_AND_CLEAR_REGS save_ret=1
|
||||
ENCODE_FRAME_POINTER 8
|
||||
testb $3, CS+8(%rsp)
|
||||
jz .Lerror_kernelspace
|
||||
|
||||
|
@ -1582,8 +1615,6 @@ end_repeat_nmi:
|
|||
* frame to point back to repeat_nmi.
|
||||
*/
|
||||
pushq $-1 /* ORIG_RAX: no syscall to restart */
|
||||
PUSH_AND_CLEAR_REGS
|
||||
ENCODE_FRAME_POINTER
|
||||
|
||||
/*
|
||||
* Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
|
||||
|
|
|
@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
|
|||
pushq %rcx /* pt_regs->cx */
|
||||
pushq $-ENOSYS /* pt_regs->ax */
|
||||
pushq $0 /* pt_regs->r8 = 0 */
|
||||
xorq %r8, %r8 /* nospec r8 */
|
||||
xorl %r8d, %r8d /* nospec r8 */
|
||||
pushq $0 /* pt_regs->r9 = 0 */
|
||||
xorq %r9, %r9 /* nospec r9 */
|
||||
xorl %r9d, %r9d /* nospec r9 */
|
||||
pushq $0 /* pt_regs->r10 = 0 */
|
||||
xorq %r10, %r10 /* nospec r10 */
|
||||
xorl %r10d, %r10d /* nospec r10 */
|
||||
pushq $0 /* pt_regs->r11 = 0 */
|
||||
xorq %r11, %r11 /* nospec r11 */
|
||||
xorl %r11d, %r11d /* nospec r11 */
|
||||
pushq %rbx /* pt_regs->rbx */
|
||||
xorl %ebx, %ebx /* nospec rbx */
|
||||
pushq %rbp /* pt_regs->rbp (will be overwritten) */
|
||||
xorl %ebp, %ebp /* nospec rbp */
|
||||
pushq $0 /* pt_regs->r12 = 0 */
|
||||
xorq %r12, %r12 /* nospec r12 */
|
||||
xorl %r12d, %r12d /* nospec r12 */
|
||||
pushq $0 /* pt_regs->r13 = 0 */
|
||||
xorq %r13, %r13 /* nospec r13 */
|
||||
xorl %r13d, %r13d /* nospec r13 */
|
||||
pushq $0 /* pt_regs->r14 = 0 */
|
||||
xorq %r14, %r14 /* nospec r14 */
|
||||
xorl %r14d, %r14d /* nospec r14 */
|
||||
pushq $0 /* pt_regs->r15 = 0 */
|
||||
xorq %r15, %r15 /* nospec r15 */
|
||||
xorl %r15d, %r15d /* nospec r15 */
|
||||
cld
|
||||
|
||||
/*
|
||||
|
@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
|
|||
pushq %rbp /* pt_regs->cx (stashed in bp) */
|
||||
pushq $-ENOSYS /* pt_regs->ax */
|
||||
pushq $0 /* pt_regs->r8 = 0 */
|
||||
xorq %r8, %r8 /* nospec r8 */
|
||||
xorl %r8d, %r8d /* nospec r8 */
|
||||
pushq $0 /* pt_regs->r9 = 0 */
|
||||
xorq %r9, %r9 /* nospec r9 */
|
||||
xorl %r9d, %r9d /* nospec r9 */
|
||||
pushq $0 /* pt_regs->r10 = 0 */
|
||||
xorq %r10, %r10 /* nospec r10 */
|
||||
xorl %r10d, %r10d /* nospec r10 */
|
||||
pushq $0 /* pt_regs->r11 = 0 */
|
||||
xorq %r11, %r11 /* nospec r11 */
|
||||
xorl %r11d, %r11d /* nospec r11 */
|
||||
pushq %rbx /* pt_regs->rbx */
|
||||
xorl %ebx, %ebx /* nospec rbx */
|
||||
pushq %rbp /* pt_regs->rbp (will be overwritten) */
|
||||
xorl %ebp, %ebp /* nospec rbp */
|
||||
pushq $0 /* pt_regs->r12 = 0 */
|
||||
xorq %r12, %r12 /* nospec r12 */
|
||||
xorl %r12d, %r12d /* nospec r12 */
|
||||
pushq $0 /* pt_regs->r13 = 0 */
|
||||
xorq %r13, %r13 /* nospec r13 */
|
||||
xorl %r13d, %r13d /* nospec r13 */
|
||||
pushq $0 /* pt_regs->r14 = 0 */
|
||||
xorq %r14, %r14 /* nospec r14 */
|
||||
xorl %r14d, %r14d /* nospec r14 */
|
||||
pushq $0 /* pt_regs->r15 = 0 */
|
||||
xorq %r15, %r15 /* nospec r15 */
|
||||
xorl %r15d, %r15d /* nospec r15 */
|
||||
|
||||
/*
|
||||
* User mode is traced as though IRQs are on, and SYSENTER
|
||||
|
@ -298,9 +298,9 @@ sysret32_from_system_call:
|
|||
*/
|
||||
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
|
||||
|
||||
xorq %r8, %r8
|
||||
xorq %r9, %r9
|
||||
xorq %r10, %r10
|
||||
xorl %r8d, %r8d
|
||||
xorl %r9d, %r9d
|
||||
xorl %r10d, %r10d
|
||||
swapgs
|
||||
sysretl
|
||||
END(entry_SYSCALL_compat)
|
||||
|
@ -347,36 +347,47 @@ ENTRY(entry_INT80_compat)
|
|||
*/
|
||||
movl %eax, %eax
|
||||
|
||||
/* switch to thread stack expects orig_ax and rdi to be pushed */
|
||||
pushq %rax /* pt_regs->orig_ax */
|
||||
|
||||
/* switch to thread stack expects orig_ax to be pushed */
|
||||
call switch_to_thread_stack
|
||||
|
||||
pushq %rdi /* pt_regs->di */
|
||||
|
||||
/* Need to switch before accessing the thread stack. */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
|
||||
movq %rsp, %rdi
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
pushq 6*8(%rdi) /* regs->ss */
|
||||
pushq 5*8(%rdi) /* regs->rsp */
|
||||
pushq 4*8(%rdi) /* regs->eflags */
|
||||
pushq 3*8(%rdi) /* regs->cs */
|
||||
pushq 2*8(%rdi) /* regs->ip */
|
||||
pushq 1*8(%rdi) /* regs->orig_ax */
|
||||
|
||||
pushq (%rdi) /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
pushq %rdx /* pt_regs->dx */
|
||||
pushq %rcx /* pt_regs->cx */
|
||||
pushq $-ENOSYS /* pt_regs->ax */
|
||||
pushq $0 /* pt_regs->r8 = 0 */
|
||||
xorq %r8, %r8 /* nospec r8 */
|
||||
xorl %r8d, %r8d /* nospec r8 */
|
||||
pushq $0 /* pt_regs->r9 = 0 */
|
||||
xorq %r9, %r9 /* nospec r9 */
|
||||
xorl %r9d, %r9d /* nospec r9 */
|
||||
pushq $0 /* pt_regs->r10 = 0 */
|
||||
xorq %r10, %r10 /* nospec r10 */
|
||||
xorl %r10d, %r10d /* nospec r10 */
|
||||
pushq $0 /* pt_regs->r11 = 0 */
|
||||
xorq %r11, %r11 /* nospec r11 */
|
||||
xorl %r11d, %r11d /* nospec r11 */
|
||||
pushq %rbx /* pt_regs->rbx */
|
||||
xorl %ebx, %ebx /* nospec rbx */
|
||||
pushq %rbp /* pt_regs->rbp */
|
||||
xorl %ebp, %ebp /* nospec rbp */
|
||||
pushq %r12 /* pt_regs->r12 */
|
||||
xorq %r12, %r12 /* nospec r12 */
|
||||
xorl %r12d, %r12d /* nospec r12 */
|
||||
pushq %r13 /* pt_regs->r13 */
|
||||
xorq %r13, %r13 /* nospec r13 */
|
||||
xorl %r13d, %r13d /* nospec r13 */
|
||||
pushq %r14 /* pt_regs->r14 */
|
||||
xorq %r14, %r14 /* nospec r14 */
|
||||
xorl %r14d, %r14d /* nospec r14 */
|
||||
pushq %r15 /* pt_regs->r15 */
|
||||
xorq %r15, %r15 /* nospec r15 */
|
||||
xorl %r15d, %r15d /* nospec r15 */
|
||||
cld
|
||||
|
||||
/*
|
||||
|
@ -393,15 +404,3 @@ ENTRY(entry_INT80_compat)
|
|||
TRACE_IRQS_ON
|
||||
jmp swapgs_restore_regs_and_return_to_usermode
|
||||
END(entry_INT80_compat)
|
||||
|
||||
ENTRY(stub32_clone)
|
||||
/*
|
||||
* The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
|
||||
* The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
|
||||
*
|
||||
* The native 64-bit kernel's sys_clone() implements the latter,
|
||||
* so we need to swap arguments here before calling it:
|
||||
*/
|
||||
xchg %r8, %rcx
|
||||
jmp sys_clone
|
||||
ENDPROC(stub32_clone)
|
||||
|
|
|
@ -8,12 +8,12 @@
|
|||
#
|
||||
0 i386 restart_syscall sys_restart_syscall
|
||||
1 i386 exit sys_exit
|
||||
2 i386 fork sys_fork sys_fork
|
||||
2 i386 fork sys_fork
|
||||
3 i386 read sys_read
|
||||
4 i386 write sys_write
|
||||
5 i386 open sys_open compat_sys_open
|
||||
6 i386 close sys_close
|
||||
7 i386 waitpid sys_waitpid sys32_waitpid
|
||||
7 i386 waitpid sys_waitpid compat_sys_x86_waitpid
|
||||
8 i386 creat sys_creat
|
||||
9 i386 link sys_link
|
||||
10 i386 unlink sys_unlink
|
||||
|
@ -78,7 +78,7 @@
|
|||
69 i386 ssetmask sys_ssetmask
|
||||
70 i386 setreuid sys_setreuid16
|
||||
71 i386 setregid sys_setregid16
|
||||
72 i386 sigsuspend sys_sigsuspend sys_sigsuspend
|
||||
72 i386 sigsuspend sys_sigsuspend
|
||||
73 i386 sigpending sys_sigpending compat_sys_sigpending
|
||||
74 i386 sethostname sys_sethostname
|
||||
75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
|
||||
|
@ -96,7 +96,7 @@
|
|||
87 i386 swapon sys_swapon
|
||||
88 i386 reboot sys_reboot
|
||||
89 i386 readdir sys_old_readdir compat_sys_old_readdir
|
||||
90 i386 mmap sys_old_mmap sys32_mmap
|
||||
90 i386 mmap sys_old_mmap compat_sys_x86_mmap
|
||||
91 i386 munmap sys_munmap
|
||||
92 i386 truncate sys_truncate compat_sys_truncate
|
||||
93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
|
||||
|
@ -126,7 +126,7 @@
|
|||
117 i386 ipc sys_ipc compat_sys_ipc
|
||||
118 i386 fsync sys_fsync
|
||||
119 i386 sigreturn sys_sigreturn sys32_sigreturn
|
||||
120 i386 clone sys_clone stub32_clone
|
||||
120 i386 clone sys_clone compat_sys_x86_clone
|
||||
121 i386 setdomainname sys_setdomainname
|
||||
122 i386 uname sys_newuname
|
||||
123 i386 modify_ldt sys_modify_ldt
|
||||
|
@ -186,8 +186,8 @@
|
|||
177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
|
||||
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
|
||||
179 i386 rt_sigsuspend sys_rt_sigsuspend
|
||||
180 i386 pread64 sys_pread64 sys32_pread
|
||||
181 i386 pwrite64 sys_pwrite64 sys32_pwrite
|
||||
180 i386 pread64 sys_pread64 compat_sys_x86_pread
|
||||
181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite
|
||||
182 i386 chown sys_chown16
|
||||
183 i386 getcwd sys_getcwd
|
||||
184 i386 capget sys_capget
|
||||
|
@ -196,14 +196,14 @@
|
|||
187 i386 sendfile sys_sendfile compat_sys_sendfile
|
||||
188 i386 getpmsg
|
||||
189 i386 putpmsg
|
||||
190 i386 vfork sys_vfork sys_vfork
|
||||
190 i386 vfork sys_vfork
|
||||
191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
|
||||
192 i386 mmap2 sys_mmap_pgoff
|
||||
193 i386 truncate64 sys_truncate64 sys32_truncate64
|
||||
194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64
|
||||
195 i386 stat64 sys_stat64 sys32_stat64
|
||||
196 i386 lstat64 sys_lstat64 sys32_lstat64
|
||||
197 i386 fstat64 sys_fstat64 sys32_fstat64
|
||||
193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64
|
||||
194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64
|
||||
195 i386 stat64 sys_stat64 compat_sys_x86_stat64
|
||||
196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64
|
||||
197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64
|
||||
198 i386 lchown32 sys_lchown
|
||||
199 i386 getuid32 sys_getuid
|
||||
200 i386 getgid32 sys_getgid
|
||||
|
@ -231,7 +231,7 @@
|
|||
# 222 is unused
|
||||
# 223 is unused
|
||||
224 i386 gettid sys_gettid
|
||||
225 i386 readahead sys_readahead sys32_readahead
|
||||
225 i386 readahead sys_readahead compat_sys_x86_readahead
|
||||
226 i386 setxattr sys_setxattr
|
||||
227 i386 lsetxattr sys_lsetxattr
|
||||
228 i386 fsetxattr sys_fsetxattr
|
||||
|
@ -256,7 +256,7 @@
|
|||
247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
|
||||
248 i386 io_submit sys_io_submit compat_sys_io_submit
|
||||
249 i386 io_cancel sys_io_cancel
|
||||
250 i386 fadvise64 sys_fadvise64 sys32_fadvise64
|
||||
250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64
|
||||
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
|
||||
252 i386 exit_group sys_exit_group
|
||||
253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
|
||||
|
@ -278,7 +278,7 @@
|
|||
269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
|
||||
270 i386 tgkill sys_tgkill
|
||||
271 i386 utimes sys_utimes compat_sys_utimes
|
||||
272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
|
||||
272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64
|
||||
273 i386 vserver
|
||||
274 i386 mbind sys_mbind
|
||||
275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
|
||||
|
@ -306,7 +306,7 @@
|
|||
297 i386 mknodat sys_mknodat
|
||||
298 i386 fchownat sys_fchownat
|
||||
299 i386 futimesat sys_futimesat compat_sys_futimesat
|
||||
300 i386 fstatat64 sys_fstatat64 sys32_fstatat
|
||||
300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat
|
||||
301 i386 unlinkat sys_unlinkat
|
||||
302 i386 renameat sys_renameat
|
||||
303 i386 linkat sys_linkat
|
||||
|
@ -320,7 +320,7 @@
|
|||
311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
|
||||
312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
|
||||
313 i386 splice sys_splice
|
||||
314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range
|
||||
314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range
|
||||
315 i386 tee sys_tee
|
||||
316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
|
||||
317 i386 move_pages sys_move_pages compat_sys_move_pages
|
||||
|
@ -330,7 +330,7 @@
|
|||
321 i386 signalfd sys_signalfd compat_sys_signalfd
|
||||
322 i386 timerfd_create sys_timerfd_create
|
||||
323 i386 eventfd sys_eventfd
|
||||
324 i386 fallocate sys_fallocate sys32_fallocate
|
||||
324 i386 fallocate sys_fallocate compat_sys_x86_fallocate
|
||||
325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
|
||||
326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
|
||||
327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
|
||||
|
|
|
@ -42,10 +42,8 @@
|
|||
#define CREATE_TRACE_POINTS
|
||||
#include "vsyscall_trace.h"
|
||||
|
||||
static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
|
||||
#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
|
||||
NATIVE;
|
||||
#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
|
||||
static enum { EMULATE, NONE } vsyscall_mode =
|
||||
#ifdef CONFIG_LEGACY_VSYSCALL_NONE
|
||||
NONE;
|
||||
#else
|
||||
EMULATE;
|
||||
|
@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)
|
|||
if (str) {
|
||||
if (!strcmp("emulate", str))
|
||||
vsyscall_mode = EMULATE;
|
||||
else if (!strcmp("native", str))
|
||||
vsyscall_mode = NATIVE;
|
||||
else if (!strcmp("none", str))
|
||||
vsyscall_mode = NONE;
|
||||
else
|
||||
|
@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
|||
|
||||
WARN_ON_ONCE(address != regs->ip);
|
||||
|
||||
/* This should be unreachable in NATIVE mode. */
|
||||
if (WARN_ON(vsyscall_mode == NATIVE))
|
||||
return false;
|
||||
|
||||
if (vsyscall_mode == NONE) {
|
||||
warn_bad_vsyscall(KERN_INFO, regs,
|
||||
"vsyscall attempted with vsyscall=none");
|
||||
|
@ -370,9 +362,7 @@ void __init map_vsyscall(void)
|
|||
|
||||
if (vsyscall_mode != NONE) {
|
||||
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
|
||||
vsyscall_mode == NATIVE
|
||||
? PAGE_KERNEL_VSYSCALL
|
||||
: PAGE_KERNEL_VVAR);
|
||||
PAGE_KERNEL_VVAR);
|
||||
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
|
||||
}
|
||||
|
||||
|
|
|
@ -51,15 +51,14 @@
|
|||
#define AA(__x) ((unsigned long)(__x))
|
||||
|
||||
|
||||
asmlinkage long sys32_truncate64(const char __user *filename,
|
||||
unsigned long offset_low,
|
||||
unsigned long offset_high)
|
||||
COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename,
|
||||
unsigned long, offset_low, unsigned long, offset_high)
|
||||
{
|
||||
return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
|
||||
}
|
||||
|
||||
asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
|
||||
unsigned long offset_high)
|
||||
COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd,
|
||||
unsigned long, offset_low, unsigned long, offset_high)
|
||||
{
|
||||
return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
|
||||
}
|
||||
|
@ -96,8 +95,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
|
|||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long sys32_stat64(const char __user *filename,
|
||||
struct stat64 __user *statbuf)
|
||||
COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename,
|
||||
struct stat64 __user *, statbuf)
|
||||
{
|
||||
struct kstat stat;
|
||||
int ret = vfs_stat(filename, &stat);
|
||||
|
@ -107,8 +106,8 @@ asmlinkage long sys32_stat64(const char __user *filename,
|
|||
return ret;
|
||||
}
|
||||
|
||||
asmlinkage long sys32_lstat64(const char __user *filename,
|
||||
struct stat64 __user *statbuf)
|
||||
COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename,
|
||||
struct stat64 __user *, statbuf)
|
||||
{
|
||||
struct kstat stat;
|
||||
int ret = vfs_lstat(filename, &stat);
|
||||
|
@ -117,7 +116,8 @@ asmlinkage long sys32_lstat64(const char __user *filename,
|
|||
return ret;
|
||||
}
|
||||
|
||||
asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
|
||||
COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd,
|
||||
struct stat64 __user *, statbuf)
|
||||
{
|
||||
struct kstat stat;
|
||||
int ret = vfs_fstat(fd, &stat);
|
||||
|
@ -126,8 +126,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
|
|||
return ret;
|
||||
}
|
||||
|
||||
asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename,
|
||||
struct stat64 __user *statbuf, int flag)
|
||||
COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd,
|
||||
const char __user *, filename,
|
||||
struct stat64 __user *, statbuf, int, flag)
|
||||
{
|
||||
struct kstat stat;
|
||||
int error;
|
||||
|
@ -153,7 +154,7 @@ struct mmap_arg_struct32 {
|
|||
unsigned int offset;
|
||||
};
|
||||
|
||||
asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
|
||||
COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg)
|
||||
{
|
||||
struct mmap_arg_struct32 a;
|
||||
|
||||
|
@ -167,22 +168,22 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
|
|||
a.offset>>PAGE_SHIFT);
|
||||
}
|
||||
|
||||
asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
|
||||
int options)
|
||||
COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *,
|
||||
stat_addr, int, options)
|
||||
{
|
||||
return compat_sys_wait4(pid, stat_addr, options, NULL);
|
||||
}
|
||||
|
||||
/* warning: next two assume little endian */
|
||||
asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
|
||||
u32 poslo, u32 poshi)
|
||||
COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf,
|
||||
u32, count, u32, poslo, u32, poshi)
|
||||
{
|
||||
return sys_pread64(fd, ubuf, count,
|
||||
((loff_t)AA(poshi) << 32) | AA(poslo));
|
||||
}
|
||||
|
||||
asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
|
||||
u32 count, u32 poslo, u32 poshi)
|
||||
COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf,
|
||||
u32, count, u32, poslo, u32, poshi)
|
||||
{
|
||||
return sys_pwrite64(fd, ubuf, count,
|
||||
((loff_t)AA(poshi) << 32) | AA(poslo));
|
||||
|
@ -193,8 +194,9 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
|
|||
* Some system calls that need sign extended arguments. This could be
|
||||
* done by a generic wrapper.
|
||||
*/
|
||||
long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
|
||||
__u32 len_low, __u32 len_high, int advice)
|
||||
COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low,
|
||||
__u32, offset_high, __u32, len_low, __u32, len_high,
|
||||
int, advice)
|
||||
{
|
||||
return sys_fadvise64_64(fd,
|
||||
(((u64)offset_high)<<32) | offset_low,
|
||||
|
@ -202,31 +204,43 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
|
|||
advice);
|
||||
}
|
||||
|
||||
asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
|
||||
size_t count)
|
||||
COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo,
|
||||
unsigned int, off_hi, size_t, count)
|
||||
{
|
||||
return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
|
||||
}
|
||||
|
||||
asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
|
||||
unsigned n_low, unsigned n_hi, int flags)
|
||||
COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low,
|
||||
unsigned int, off_hi, unsigned int, n_low,
|
||||
unsigned int, n_hi, int, flags)
|
||||
{
|
||||
return sys_sync_file_range(fd,
|
||||
((u64)off_hi << 32) | off_low,
|
||||
((u64)n_hi << 32) | n_low, flags);
|
||||
}
|
||||
|
||||
asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
|
||||
size_t len, int advice)
|
||||
COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo,
|
||||
unsigned int, offset_hi, size_t, len, int, advice)
|
||||
{
|
||||
return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
|
||||
len, advice);
|
||||
}
|
||||
|
||||
asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
|
||||
unsigned offset_hi, unsigned len_lo,
|
||||
unsigned len_hi)
|
||||
COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode,
|
||||
unsigned int, offset_lo, unsigned int, offset_hi,
|
||||
unsigned int, len_lo, unsigned int, len_hi)
|
||||
{
|
||||
return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
|
||||
((u64)len_hi << 32) | len_lo);
|
||||
}
|
||||
|
||||
/*
|
||||
* The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS
|
||||
*/
|
||||
COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
|
||||
unsigned long, newsp, int __user *, parent_tidptr,
|
||||
unsigned long, tls_val, int __user *, child_tidptr)
|
||||
{
|
||||
return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr,
|
||||
tls_val);
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
|
||||
#define _ASM_X86_MACH_DEFAULT_APM_H
|
||||
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
#ifdef APM_ZERO_SEGS
|
||||
# define APM_DO_ZERO_SEGS \
|
||||
"pushl %%ds\n\t" \
|
||||
|
@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
|
|||
* N.B. We do NOT need a cld after the BIOS call
|
||||
* because we always save and restore the flags.
|
||||
*/
|
||||
firmware_restrict_branch_speculation_start();
|
||||
__asm__ __volatile__(APM_DO_ZERO_SEGS
|
||||
"pushl %%edi\n\t"
|
||||
"pushl %%ebp\n\t"
|
||||
|
@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
|
|||
"=S" (*esi)
|
||||
: "a" (func), "b" (ebx_in), "c" (ecx_in)
|
||||
: "memory", "cc");
|
||||
firmware_restrict_branch_speculation_end();
|
||||
}
|
||||
|
||||
static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
|
||||
|
@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
|
|||
* N.B. We do NOT need a cld after the BIOS call
|
||||
* because we always save and restore the flags.
|
||||
*/
|
||||
firmware_restrict_branch_speculation_start();
|
||||
__asm__ __volatile__(APM_DO_ZERO_SEGS
|
||||
"pushl %%edi\n\t"
|
||||
"pushl %%ebp\n\t"
|
||||
|
@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
|
|||
"=S" (si)
|
||||
: "a" (func), "b" (ebx_in), "c" (ecx_in)
|
||||
: "memory", "cc");
|
||||
firmware_restrict_branch_speculation_end();
|
||||
return error;
|
||||
}
|
||||
|
||||
|
|
|
@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
|
|||
INDIRECT_THUNK(si)
|
||||
INDIRECT_THUNK(di)
|
||||
INDIRECT_THUNK(bp)
|
||||
asmlinkage void __fill_rsb(void);
|
||||
asmlinkage void __clear_rsb(void);
|
||||
|
||||
#endif /* CONFIG_RETPOLINE */
|
||||
|
|
|
@ -213,6 +213,7 @@
|
|||
#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
|
||||
|
||||
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
|
||||
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
|
||||
|
||||
/* Virtualization flags: Linux defined, word 8 */
|
||||
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
|
||||
|
@ -315,6 +316,7 @@
|
|||
#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
|
||||
#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
|
||||
#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
|
||||
#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
|
||||
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
|
||||
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
|
||||
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
|
||||
|
@ -327,6 +329,7 @@
|
|||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
|
||||
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
|
||||
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
|
||||
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
|
||||
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
|
||||
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
|
||||
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <asm/pgtable.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
/*
|
||||
* We map the EFI regions needed for runtime services non-contiguously,
|
||||
|
@ -36,8 +37,18 @@
|
|||
|
||||
extern asmlinkage unsigned long efi_call_phys(void *, ...);
|
||||
|
||||
#define arch_efi_call_virt_setup() kernel_fpu_begin()
|
||||
#define arch_efi_call_virt_teardown() kernel_fpu_end()
|
||||
#define arch_efi_call_virt_setup() \
|
||||
({ \
|
||||
kernel_fpu_begin(); \
|
||||
firmware_restrict_branch_speculation_start(); \
|
||||
})
|
||||
|
||||
#define arch_efi_call_virt_teardown() \
|
||||
({ \
|
||||
firmware_restrict_branch_speculation_end(); \
|
||||
kernel_fpu_end(); \
|
||||
})
|
||||
|
||||
|
||||
/*
|
||||
* Wrap all the virtual calls in a way that forces the parameters on the stack.
|
||||
|
@ -73,6 +84,7 @@ struct efi_scratch {
|
|||
efi_sync_low_kernel_mappings(); \
|
||||
preempt_disable(); \
|
||||
__kernel_fpu_begin(); \
|
||||
firmware_restrict_branch_speculation_start(); \
|
||||
\
|
||||
if (efi_scratch.use_pgd) { \
|
||||
efi_scratch.prev_cr3 = __read_cr3(); \
|
||||
|
@ -91,6 +103,7 @@ struct efi_scratch {
|
|||
__flush_tlb_all(); \
|
||||
} \
|
||||
\
|
||||
firmware_restrict_branch_speculation_end(); \
|
||||
__kernel_fpu_end(); \
|
||||
preempt_enable(); \
|
||||
})
|
||||
|
|
|
@ -37,7 +37,12 @@ struct cpu_signature {
|
|||
|
||||
struct device;
|
||||
|
||||
enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
|
||||
enum ucode_state {
|
||||
UCODE_OK = 0,
|
||||
UCODE_UPDATED,
|
||||
UCODE_NFOUND,
|
||||
UCODE_ERROR,
|
||||
};
|
||||
|
||||
struct microcode_ops {
|
||||
enum ucode_state (*request_microcode_user) (int cpu,
|
||||
|
@ -54,7 +59,7 @@ struct microcode_ops {
|
|||
* are being called.
|
||||
* See also the "Synchronization" section in microcode_core.c.
|
||||
*/
|
||||
int (*apply_microcode) (int cpu);
|
||||
enum ucode_state (*apply_microcode) (int cpu);
|
||||
int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
|
||||
};
|
||||
|
||||
|
|
|
@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot)
|
|||
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
|
||||
#else
|
||||
BUG();
|
||||
return (void *)fix_to_virt(FIX_HOLE);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,50 @@
|
|||
#include <asm/cpufeatures.h>
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
/*
|
||||
* Fill the CPU return stack buffer.
|
||||
*
|
||||
* Each entry in the RSB, if used for a speculative 'ret', contains an
|
||||
* infinite 'pause; lfence; jmp' loop to capture speculative execution.
|
||||
*
|
||||
* This is required in various cases for retpoline and IBRS-based
|
||||
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
|
||||
* eliminate potentially bogus entries from the RSB, and sometimes
|
||||
* purely to ensure that it doesn't get empty, which on some CPUs would
|
||||
* allow predictions from other (unwanted!) sources to be used.
|
||||
*
|
||||
* We define a CPP macro such that it can be used from both .S files and
|
||||
* inline assembly. It's possible to do a .macro and then include that
|
||||
* from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
|
||||
*/
|
||||
|
||||
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
|
||||
#define RSB_FILL_LOOPS 16 /* To avoid underflow */
|
||||
|
||||
/*
|
||||
* Google experimented with loop-unrolling and this turned out to be
|
||||
* the optimal version — two calls, each with their own speculation
|
||||
* trap should their return address end up getting used, in a loop.
|
||||
*/
|
||||
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
|
||||
mov $(nr/2), reg; \
|
||||
771: \
|
||||
call 772f; \
|
||||
773: /* speculation trap */ \
|
||||
pause; \
|
||||
lfence; \
|
||||
jmp 773b; \
|
||||
772: \
|
||||
call 774f; \
|
||||
775: /* speculation trap */ \
|
||||
pause; \
|
||||
lfence; \
|
||||
jmp 775b; \
|
||||
774: \
|
||||
dec reg; \
|
||||
jnz 771b; \
|
||||
add $(BITS_PER_LONG/8) * nr, sp;
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
/*
|
||||
|
@ -23,6 +67,18 @@
|
|||
.popsection
|
||||
.endm
|
||||
|
||||
/*
|
||||
* This should be used immediately before an indirect jump/call. It tells
|
||||
* objtool the subsequent indirect jump/call is vouched safe for retpoline
|
||||
* builds.
|
||||
*/
|
||||
.macro ANNOTATE_RETPOLINE_SAFE
|
||||
.Lannotate_\@:
|
||||
.pushsection .discard.retpoline_safe
|
||||
_ASM_PTR .Lannotate_\@
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
/*
|
||||
* These are the bare retpoline primitives for indirect jmp and call.
|
||||
* Do not use these directly; they only exist to make the ALTERNATIVE
|
||||
|
@ -59,9 +115,9 @@
|
|||
.macro JMP_NOSPEC reg:req
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE
|
||||
ALTERNATIVE_2 __stringify(jmp *\reg), \
|
||||
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
|
||||
__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
|
||||
__stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
|
||||
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
|
||||
#else
|
||||
jmp *\reg
|
||||
#endif
|
||||
|
@ -70,18 +126,25 @@
|
|||
.macro CALL_NOSPEC reg:req
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE
|
||||
ALTERNATIVE_2 __stringify(call *\reg), \
|
||||
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
|
||||
__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
|
||||
__stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
|
||||
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
|
||||
#else
|
||||
call *\reg
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* This clobbers the BX register */
|
||||
.macro FILL_RETURN_BUFFER nr:req ftr:req
|
||||
/*
|
||||
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
|
||||
* monstrosity above, manually.
|
||||
*/
|
||||
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
ALTERNATIVE "", "call __clear_rsb", \ftr
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE
|
||||
ALTERNATIVE "jmp .Lskip_rsb_\@", \
|
||||
__stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
|
||||
\ftr
|
||||
.Lskip_rsb_\@:
|
||||
#endif
|
||||
.endm
|
||||
|
||||
|
@ -93,6 +156,12 @@
|
|||
".long 999b - .\n\t" \
|
||||
".popsection\n\t"
|
||||
|
||||
#define ANNOTATE_RETPOLINE_SAFE \
|
||||
"999:\n\t" \
|
||||
".pushsection .discard.retpoline_safe\n\t" \
|
||||
_ASM_PTR " 999b\n\t" \
|
||||
".popsection\n\t"
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(RETPOLINE)
|
||||
|
||||
/*
|
||||
|
@ -102,6 +171,7 @@
|
|||
# define CALL_NOSPEC \
|
||||
ANNOTATE_NOSPEC_ALTERNATIVE \
|
||||
ALTERNATIVE( \
|
||||
ANNOTATE_RETPOLINE_SAFE \
|
||||
"call *%[thunk_target]\n", \
|
||||
"call __x86_indirect_thunk_%V[thunk_target]\n", \
|
||||
X86_FEATURE_RETPOLINE)
|
||||
|
@ -156,26 +226,54 @@ extern char __indirect_thunk_end[];
|
|||
static inline void vmexit_fill_RSB(void)
|
||||
{
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
alternative_input("",
|
||||
"call __fill_rsb",
|
||||
X86_FEATURE_RETPOLINE,
|
||||
ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
|
||||
unsigned long loops;
|
||||
|
||||
asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
|
||||
ALTERNATIVE("jmp 910f",
|
||||
__stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
|
||||
X86_FEATURE_RETPOLINE)
|
||||
"910:"
|
||||
: "=r" (loops), ASM_CALL_CONSTRAINT
|
||||
: : "memory" );
|
||||
#endif
|
||||
}
|
||||
|
||||
#define alternative_msr_write(_msr, _val, _feature) \
|
||||
asm volatile(ALTERNATIVE("", \
|
||||
"movl %[msr], %%ecx\n\t" \
|
||||
"movl %[val], %%eax\n\t" \
|
||||
"movl $0, %%edx\n\t" \
|
||||
"wrmsr", \
|
||||
_feature) \
|
||||
: : [msr] "i" (_msr), [val] "i" (_val) \
|
||||
: "eax", "ecx", "edx", "memory")
|
||||
|
||||
static inline void indirect_branch_prediction_barrier(void)
|
||||
{
|
||||
asm volatile(ALTERNATIVE("",
|
||||
"movl %[msr], %%ecx\n\t"
|
||||
"movl %[val], %%eax\n\t"
|
||||
"movl $0, %%edx\n\t"
|
||||
"wrmsr",
|
||||
X86_FEATURE_USE_IBPB)
|
||||
: : [msr] "i" (MSR_IA32_PRED_CMD),
|
||||
[val] "i" (PRED_CMD_IBPB)
|
||||
: "eax", "ecx", "edx", "memory");
|
||||
alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
|
||||
X86_FEATURE_USE_IBPB);
|
||||
}
|
||||
|
||||
/*
|
||||
* With retpoline, we must use IBRS to restrict branch prediction
|
||||
* before calling into firmware.
|
||||
*
|
||||
* (Implemented as CPP macros due to header hell.)
|
||||
*/
|
||||
#define firmware_restrict_branch_speculation_start() \
|
||||
do { \
|
||||
preempt_disable(); \
|
||||
alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
|
||||
X86_FEATURE_USE_IBRS_FW); \
|
||||
} while (0)
|
||||
|
||||
#define firmware_restrict_branch_speculation_end() \
|
||||
do { \
|
||||
alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
|
||||
X86_FEATURE_USE_IBRS_FW); \
|
||||
preempt_enable(); \
|
||||
} while (0)
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
/*
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
#include <asm/paravirt_types.h>
|
||||
|
||||
|
@ -884,23 +885,27 @@ extern void default_banner(void);
|
|||
|
||||
#define INTERRUPT_RETURN \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
|
||||
ANNOTATE_RETPOLINE_SAFE; \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
|
||||
|
||||
#define DISABLE_INTERRUPTS(clobbers) \
|
||||
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
|
||||
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
|
||||
ANNOTATE_RETPOLINE_SAFE; \
|
||||
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
|
||||
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
|
||||
|
||||
#define ENABLE_INTERRUPTS(clobbers) \
|
||||
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
|
||||
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
|
||||
ANNOTATE_RETPOLINE_SAFE; \
|
||||
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
|
||||
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define GET_CR0_INTO_EAX \
|
||||
push %ecx; push %edx; \
|
||||
ANNOTATE_RETPOLINE_SAFE; \
|
||||
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
|
||||
pop %edx; pop %ecx
|
||||
#else /* !CONFIG_X86_32 */
|
||||
|
@ -922,21 +927,25 @@ extern void default_banner(void);
|
|||
*/
|
||||
#define SWAPGS \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
|
||||
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
|
||||
ANNOTATE_RETPOLINE_SAFE; \
|
||||
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
|
||||
)
|
||||
|
||||
#define GET_CR2_INTO_RAX \
|
||||
call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
|
||||
ANNOTATE_RETPOLINE_SAFE; \
|
||||
call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
|
||||
|
||||
#define USERGS_SYSRET64 \
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
|
||||
CLBR_NONE, \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
|
||||
ANNOTATE_RETPOLINE_SAFE; \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
|
||||
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
#define SAVE_FLAGS(clobbers) \
|
||||
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
|
||||
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
|
||||
ANNOTATE_RETPOLINE_SAFE; \
|
||||
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
|
||||
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
|
||||
#endif
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include <asm/desc_defs.h>
|
||||
#include <asm/kmap_types.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
struct page;
|
||||
struct thread_struct;
|
||||
|
@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);
|
|||
* offset into the paravirt_patch_template structure, and can therefore be
|
||||
* freely converted back into a structure offset.
|
||||
*/
|
||||
#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
|
||||
#define PARAVIRT_CALL \
|
||||
ANNOTATE_RETPOLINE_SAFE \
|
||||
"call *%c[paravirt_opptr];"
|
||||
|
||||
/*
|
||||
* These macros are intended to wrap calls through one of the paravirt
|
||||
|
|
|
@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
|
|||
{
|
||||
pmdval_t v = native_pmd_val(pmd);
|
||||
|
||||
return __pmd(v | set);
|
||||
return native_make_pmd(v | set);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
|
||||
{
|
||||
pmdval_t v = native_pmd_val(pmd);
|
||||
|
||||
return __pmd(v & ~clear);
|
||||
return native_make_pmd(v & ~clear);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkold(pmd_t pmd)
|
||||
|
@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
|
|||
{
|
||||
pudval_t v = native_pud_val(pud);
|
||||
|
||||
return __pud(v | set);
|
||||
return native_make_pud(v | set);
|
||||
}
|
||||
|
||||
static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
|
||||
{
|
||||
pudval_t v = native_pud_val(pud);
|
||||
|
||||
return __pud(v & ~clear);
|
||||
return native_make_pud(v & ~clear);
|
||||
}
|
||||
|
||||
static inline pud_t pud_mkold(pud_t pud)
|
||||
|
|
|
@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[];
|
|||
static inline void pgtable_cache_init(void) { }
|
||||
static inline void check_pgt_cache(void) { }
|
||||
void paging_init(void);
|
||||
void sync_initial_page_table(void);
|
||||
|
||||
static inline int pgd_large(pgd_t pgd) { return 0; }
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[];
|
|||
#define swapper_pg_dir init_top_pgt
|
||||
|
||||
extern void paging_init(void);
|
||||
static inline void sync_initial_page_table(void) { }
|
||||
|
||||
#define pte_ERROR(e) \
|
||||
pr_err("%s:%d: bad pte %p(%016lx)\n", \
|
||||
|
|
|
@ -174,7 +174,6 @@ enum page_cache_mode {
|
|||
#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
|
||||
#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
|
||||
#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
|
||||
#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
|
||||
#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
|
||||
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
|
||||
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
|
||||
|
@ -206,7 +205,6 @@ enum page_cache_mode {
|
|||
#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
|
||||
#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
|
||||
#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
|
||||
#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
|
||||
#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
|
||||
|
||||
#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
|
||||
|
@ -323,6 +321,11 @@ static inline pudval_t native_pud_val(pud_t pud)
|
|||
#else
|
||||
#include <asm-generic/pgtable-nopud.h>
|
||||
|
||||
static inline pud_t native_make_pud(pudval_t val)
|
||||
{
|
||||
return (pud_t) { .p4d.pgd = native_make_pgd(val) };
|
||||
}
|
||||
|
||||
static inline pudval_t native_pud_val(pud_t pud)
|
||||
{
|
||||
return native_pgd_val(pud.p4d.pgd);
|
||||
|
@ -344,6 +347,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
|
|||
#else
|
||||
#include <asm-generic/pgtable-nopmd.h>
|
||||
|
||||
static inline pmd_t native_make_pmd(pmdval_t val)
|
||||
{
|
||||
return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) };
|
||||
}
|
||||
|
||||
static inline pmdval_t native_pmd_val(pmd_t pmd)
|
||||
{
|
||||
return native_pgd_val(pmd.pud.p4d.pgd);
|
||||
|
|
|
@ -977,4 +977,5 @@ bool xen_set_default_idle(void);
|
|||
|
||||
void stop_this_cpu(void *dummy);
|
||||
void df_debug(struct pt_regs *regs, long error_code);
|
||||
void microcode_check(void);
|
||||
#endif /* _ASM_X86_PROCESSOR_H */
|
||||
|
|
|
@ -67,13 +67,13 @@ static __always_inline __must_check
|
|||
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
|
||||
{
|
||||
GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
|
||||
r->refs.counter, "er", i, "%0", e);
|
||||
r->refs.counter, "er", i, "%0", e, "cx");
|
||||
}
|
||||
|
||||
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
|
||||
{
|
||||
GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
|
||||
r->refs.counter, "%0", e);
|
||||
r->refs.counter, "%0", e, "cx");
|
||||
}
|
||||
|
||||
static __always_inline __must_check
|
||||
|
|
|
@ -2,8 +2,7 @@
|
|||
#ifndef _ASM_X86_RMWcc
|
||||
#define _ASM_X86_RMWcc
|
||||
|
||||
#define __CLOBBERS_MEM "memory"
|
||||
#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx"
|
||||
#define __CLOBBERS_MEM(clb...) "memory", ## clb
|
||||
|
||||
#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
|
||||
|
||||
|
@ -40,18 +39,19 @@ do { \
|
|||
#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
|
||||
|
||||
#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
|
||||
__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM)
|
||||
__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
|
||||
|
||||
#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \
|
||||
#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
|
||||
__GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \
|
||||
__CLOBBERS_MEM_CC_CX)
|
||||
__CLOBBERS_MEM(clobbers))
|
||||
|
||||
#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
|
||||
__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \
|
||||
__CLOBBERS_MEM, vcon (val))
|
||||
__CLOBBERS_MEM(), vcon (val))
|
||||
|
||||
#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \
|
||||
#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \
|
||||
clobbers...) \
|
||||
__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \
|
||||
__CLOBBERS_MEM_CC_CX, vcon (val))
|
||||
__CLOBBERS_MEM(clobbers), vcon (val))
|
||||
|
||||
#endif /* _ASM_X86_RMWcc */
|
||||
|
|
|
@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[];
|
|||
|
||||
#if defined(CONFIG_X86_64)
|
||||
extern char __end_rodata_hpage_align[];
|
||||
extern char __entry_trampoline_start[], __entry_trampoline_end[];
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_SECTIONS_H */
|
||||
|
|
|
@ -20,31 +20,43 @@
|
|||
#include <asm/ia32.h>
|
||||
|
||||
/* ia32/sys_ia32.c */
|
||||
asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long);
|
||||
asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
|
||||
asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long,
|
||||
unsigned long);
|
||||
asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long,
|
||||
unsigned long);
|
||||
|
||||
asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *);
|
||||
asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *);
|
||||
asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
|
||||
asmlinkage long sys32_fstatat(unsigned int, const char __user *,
|
||||
asmlinkage long compat_sys_x86_stat64(const char __user *,
|
||||
struct stat64 __user *);
|
||||
asmlinkage long compat_sys_x86_lstat64(const char __user *,
|
||||
struct stat64 __user *);
|
||||
asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *);
|
||||
asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *,
|
||||
struct stat64 __user *, int);
|
||||
struct mmap_arg_struct32;
|
||||
asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
|
||||
asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *);
|
||||
|
||||
asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
|
||||
asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *,
|
||||
int);
|
||||
|
||||
asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
|
||||
asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
|
||||
asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32,
|
||||
u32);
|
||||
asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32,
|
||||
u32, u32);
|
||||
|
||||
long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
|
||||
long sys32_vm86_warning(void);
|
||||
asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32,
|
||||
int);
|
||||
|
||||
asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
|
||||
asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
|
||||
unsigned, unsigned, int);
|
||||
asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
|
||||
asmlinkage long sys32_fallocate(int, int, unsigned,
|
||||
unsigned, unsigned, unsigned);
|
||||
asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int,
|
||||
size_t);
|
||||
asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int,
|
||||
unsigned int, unsigned int,
|
||||
int);
|
||||
asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int,
|
||||
size_t, int);
|
||||
asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int,
|
||||
unsigned int, unsigned int);
|
||||
asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *,
|
||||
unsigned long, int __user *);
|
||||
|
||||
/* ia32/ia32_signal.c */
|
||||
asmlinkage long sys32_sigreturn(void);
|
||||
|
|
|
@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void)
|
|||
do {
|
||||
rep_nop();
|
||||
now = rdtsc();
|
||||
} while ((now - start) < 40000000000UL / HZ &&
|
||||
} while ((now - start) < 40000000000ULL / HZ &&
|
||||
time_before_eq(jiffies, end));
|
||||
}
|
||||
|
||||
|
|
|
@ -300,6 +300,15 @@ retpoline_auto:
|
|||
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
|
||||
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Retpoline means the kernel is safe because it has no indirect
|
||||
* branches. But firmware isn't, so use IBRS to protect that.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_IBRS)) {
|
||||
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
|
||||
pr_info("Enabling Restricted Speculation for firmware calls\n");
|
||||
}
|
||||
}
|
||||
|
||||
#undef pr_fmt
|
||||
|
@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
|
|||
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
|
||||
return sprintf(buf, "Not affected\n");
|
||||
|
||||
return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
|
||||
return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
|
||||
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
|
||||
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
|
||||
spectre_v2_module_string());
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void)
|
|||
return 0;
|
||||
}
|
||||
core_initcall(init_cpu_syscore);
|
||||
|
||||
/*
|
||||
* The microcode loader calls this upon late microcode load to recheck features,
|
||||
* only when microcode has been updated. Caller holds microcode_mutex and CPU
|
||||
* hotplug lock.
|
||||
*/
|
||||
void microcode_check(void)
|
||||
{
|
||||
struct cpuinfo_x86 info;
|
||||
|
||||
perf_check_microcode();
|
||||
|
||||
/* Reload CPUID max function as it might've changed. */
|
||||
info.cpuid_level = cpuid_eax(0);
|
||||
|
||||
/*
|
||||
* Copy all capability leafs to pick up the synthetic ones so that
|
||||
* memcmp() below doesn't fail on that. The ones coming from CPUID will
|
||||
* get overwritten in get_cpu_cap().
|
||||
*/
|
||||
memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
|
||||
|
||||
get_cpu_cap(&info);
|
||||
|
||||
if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
|
||||
return;
|
||||
|
||||
pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
|
||||
pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
|
||||
}
|
||||
|
|
|
@ -144,6 +144,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
|
|||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* We know that the hypervisor lie to us on the microcode version so
|
||||
* we may as well hope that it is running the correct version.
|
||||
*/
|
||||
if (cpu_has(c, X86_FEATURE_HYPERVISOR))
|
||||
return false;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
|
||||
if (c->x86_model == spectre_bad_microcodes[i].model &&
|
||||
c->x86_stepping == spectre_bad_microcodes[i].stepping)
|
||||
|
|
|
@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
|
|||
return patch_size;
|
||||
}
|
||||
|
||||
static int apply_microcode_amd(int cpu)
|
||||
static enum ucode_state apply_microcode_amd(int cpu)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
struct microcode_amd *mc_amd;
|
||||
|
@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)
|
|||
|
||||
p = find_patch(cpu);
|
||||
if (!p)
|
||||
return 0;
|
||||
return UCODE_NFOUND;
|
||||
|
||||
mc_amd = p->data;
|
||||
uci->mc = p->data;
|
||||
|
@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)
|
|||
if (rev >= mc_amd->hdr.patch_id) {
|
||||
c->microcode = rev;
|
||||
uci->cpu_sig.rev = rev;
|
||||
return 0;
|
||||
return UCODE_OK;
|
||||
}
|
||||
|
||||
if (__apply_microcode_amd(mc_amd)) {
|
||||
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
|
||||
cpu, mc_amd->hdr.patch_id);
|
||||
return -1;
|
||||
return UCODE_ERROR;
|
||||
}
|
||||
pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
|
||||
mc_amd->hdr.patch_id);
|
||||
|
@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)
|
|||
uci->cpu_sig.rev = mc_amd->hdr.patch_id;
|
||||
c->microcode = mc_amd->hdr.patch_id;
|
||||
|
||||
return 0;
|
||||
return UCODE_UPDATED;
|
||||
}
|
||||
|
||||
static int install_equiv_cpu_table(const u8 *buf)
|
||||
|
|
|
@ -22,13 +22,16 @@
|
|||
#define pr_fmt(fmt) "microcode: " fmt
|
||||
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
|
@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
|
|||
*/
|
||||
static DEFINE_MUTEX(microcode_mutex);
|
||||
|
||||
/*
|
||||
* Serialize late loading so that CPUs get updated one-by-one.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(update_lock);
|
||||
|
||||
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
|
||||
|
||||
struct cpu_info_ctx {
|
||||
|
@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu)
|
|||
return ret;
|
||||
}
|
||||
|
||||
struct apply_microcode_ctx {
|
||||
int err;
|
||||
};
|
||||
|
||||
static void apply_microcode_local(void *arg)
|
||||
{
|
||||
struct apply_microcode_ctx *ctx = arg;
|
||||
enum ucode_state *err = arg;
|
||||
|
||||
ctx->err = microcode_ops->apply_microcode(smp_processor_id());
|
||||
*err = microcode_ops->apply_microcode(smp_processor_id());
|
||||
}
|
||||
|
||||
static int apply_microcode_on_target(int cpu)
|
||||
{
|
||||
struct apply_microcode_ctx ctx = { .err = 0 };
|
||||
enum ucode_state err;
|
||||
int ret;
|
||||
|
||||
ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
|
||||
if (!ret)
|
||||
ret = ctx.err;
|
||||
|
||||
ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1);
|
||||
if (!ret) {
|
||||
if (err == UCODE_ERROR)
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -489,31 +494,110 @@ static void __exit microcode_dev_exit(void)
|
|||
/* fake device for request_firmware */
|
||||
static struct platform_device *microcode_pdev;
|
||||
|
||||
static int reload_for_cpu(int cpu)
|
||||
/*
|
||||
* Late loading dance. Why the heavy-handed stomp_machine effort?
|
||||
*
|
||||
* - HT siblings must be idle and not execute other code while the other sibling
|
||||
* is loading microcode in order to avoid any negative interactions caused by
|
||||
* the loading.
|
||||
*
|
||||
* - In addition, microcode update on the cores must be serialized until this
|
||||
* requirement can be relaxed in the future. Right now, this is conservative
|
||||
* and good.
|
||||
*/
|
||||
#define SPINUNIT 100 /* 100 nsec */
|
||||
|
||||
static int check_online_cpus(void)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
enum ucode_state ustate;
|
||||
int err = 0;
|
||||
if (num_online_cpus() == num_present_cpus())
|
||||
return 0;
|
||||
|
||||
if (!uci->valid)
|
||||
return err;
|
||||
pr_err("Not all CPUs online, aborting microcode update.\n");
|
||||
|
||||
ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true);
|
||||
if (ustate == UCODE_OK)
|
||||
apply_microcode_on_target(cpu);
|
||||
else
|
||||
if (ustate == UCODE_ERROR)
|
||||
err = -EINVAL;
|
||||
return err;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static atomic_t late_cpus;
|
||||
|
||||
/*
|
||||
* Returns:
|
||||
* < 0 - on error
|
||||
* 0 - no update done
|
||||
* 1 - microcode was updated
|
||||
*/
|
||||
static int __reload_late(void *info)
|
||||
{
|
||||
unsigned int timeout = NSEC_PER_SEC;
|
||||
int all_cpus = num_online_cpus();
|
||||
int cpu = smp_processor_id();
|
||||
enum ucode_state err;
|
||||
int ret = 0;
|
||||
|
||||
atomic_dec(&late_cpus);
|
||||
|
||||
/*
|
||||
* Wait for all CPUs to arrive. A load will not be attempted unless all
|
||||
* CPUs show up.
|
||||
* */
|
||||
while (atomic_read(&late_cpus)) {
|
||||
if (timeout < SPINUNIT) {
|
||||
pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
|
||||
atomic_read(&late_cpus));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ndelay(SPINUNIT);
|
||||
timeout -= SPINUNIT;
|
||||
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
|
||||
spin_lock(&update_lock);
|
||||
apply_microcode_local(&err);
|
||||
spin_unlock(&update_lock);
|
||||
|
||||
if (err > UCODE_NFOUND) {
|
||||
pr_warn("Error reloading microcode on CPU %d\n", cpu);
|
||||
ret = -1;
|
||||
} else if (err == UCODE_UPDATED) {
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
atomic_inc(&late_cpus);
|
||||
|
||||
while (atomic_read(&late_cpus) != all_cpus)
|
||||
cpu_relax();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reload microcode late on all CPUs. Wait for a sec until they
|
||||
* all gather together.
|
||||
*/
|
||||
static int microcode_reload_late(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
atomic_set(&late_cpus, num_online_cpus());
|
||||
|
||||
ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
else if (ret > 0)
|
||||
microcode_check();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t reload_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
{
|
||||
enum ucode_state tmp_ret = UCODE_OK;
|
||||
int bsp = boot_cpu_data.cpu_index;
|
||||
unsigned long val;
|
||||
int cpu;
|
||||
ssize_t ret = 0, tmp_ret;
|
||||
ssize_t ret = 0;
|
||||
|
||||
ret = kstrtoul(buf, 0, &val);
|
||||
if (ret)
|
||||
|
@ -522,23 +606,24 @@ static ssize_t reload_store(struct device *dev,
|
|||
if (val != 1)
|
||||
return size;
|
||||
|
||||
get_online_cpus();
|
||||
mutex_lock(µcode_mutex);
|
||||
for_each_online_cpu(cpu) {
|
||||
tmp_ret = reload_for_cpu(cpu);
|
||||
if (tmp_ret != 0)
|
||||
pr_warn("Error reloading microcode on CPU %d\n", cpu);
|
||||
tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true);
|
||||
if (tmp_ret != UCODE_OK)
|
||||
return size;
|
||||
|
||||
/* save retval of the first encountered reload error */
|
||||
if (!ret)
|
||||
ret = tmp_ret;
|
||||
}
|
||||
if (!ret)
|
||||
perf_check_microcode();
|
||||
get_online_cpus();
|
||||
|
||||
ret = check_online_cpus();
|
||||
if (ret)
|
||||
goto put;
|
||||
|
||||
mutex_lock(µcode_mutex);
|
||||
ret = microcode_reload_late();
|
||||
mutex_unlock(µcode_mutex);
|
||||
|
||||
put:
|
||||
put_online_cpus();
|
||||
|
||||
if (!ret)
|
||||
if (ret >= 0)
|
||||
ret = size;
|
||||
|
||||
return ret;
|
||||
|
|
|
@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
|
|||
if (!mc)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Save us the MSR write below - which is a particular expensive
|
||||
* operation - when the other hyperthread has updated the microcode
|
||||
* already.
|
||||
*/
|
||||
rev = intel_get_microcode_revision();
|
||||
if (rev >= mc->hdr.rev) {
|
||||
uci->cpu_sig.rev = rev;
|
||||
return UCODE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Writeback and invalidate caches before updating microcode to avoid
|
||||
* internal issues depending on what the microcode is updating.
|
||||
*/
|
||||
native_wbinvd();
|
||||
|
||||
/* write microcode via MSR 0x79 */
|
||||
native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
|
||||
|
||||
|
@ -772,27 +789,44 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int apply_microcode_intel(int cpu)
|
||||
static enum ucode_state apply_microcode_intel(int cpu)
|
||||
{
|
||||
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
struct microcode_intel *mc;
|
||||
struct ucode_cpu_info *uci;
|
||||
struct cpuinfo_x86 *c;
|
||||
static int prev_rev;
|
||||
u32 rev;
|
||||
|
||||
/* We should bind the task to the CPU */
|
||||
if (WARN_ON(raw_smp_processor_id() != cpu))
|
||||
return -1;
|
||||
return UCODE_ERROR;
|
||||
|
||||
uci = ucode_cpu_info + cpu;
|
||||
mc = uci->mc;
|
||||
if (!mc) {
|
||||
/* Look for a newer patch in our cache: */
|
||||
mc = find_patch(uci);
|
||||
if (!mc) {
|
||||
mc = uci->mc;
|
||||
if (!mc)
|
||||
return 0;
|
||||
return UCODE_NFOUND;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save us the MSR write below - which is a particular expensive
|
||||
* operation - when the other hyperthread has updated the microcode
|
||||
* already.
|
||||
*/
|
||||
rev = intel_get_microcode_revision();
|
||||
if (rev >= mc->hdr.rev) {
|
||||
uci->cpu_sig.rev = rev;
|
||||
c->microcode = rev;
|
||||
return UCODE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Writeback and invalidate caches before updating microcode to avoid
|
||||
* internal issues depending on what the microcode is updating.
|
||||
*/
|
||||
native_wbinvd();
|
||||
|
||||
/* write microcode via MSR 0x79 */
|
||||
wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
|
||||
|
||||
|
@ -801,7 +835,7 @@ static int apply_microcode_intel(int cpu)
|
|||
if (rev != mc->hdr.rev) {
|
||||
pr_err("CPU%d update to revision 0x%x failed\n",
|
||||
cpu, mc->hdr.rev);
|
||||
return -1;
|
||||
return UCODE_ERROR;
|
||||
}
|
||||
|
||||
if (rev != prev_rev) {
|
||||
|
@ -813,12 +847,10 @@ static int apply_microcode_intel(int cpu)
|
|||
prev_rev = rev;
|
||||
}
|
||||
|
||||
c = &cpu_data(cpu);
|
||||
|
||||
uci->cpu_sig.rev = rev;
|
||||
c->microcode = rev;
|
||||
|
||||
return 0;
|
||||
return UCODE_UPDATED;
|
||||
}
|
||||
|
||||
static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <asm/nops.h>
|
||||
#include "../entry/calling.h"
|
||||
#include <asm/export.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#include <asm/asm-offsets.h>
|
||||
|
@ -137,6 +138,7 @@ ENTRY(secondary_startup_64)
|
|||
|
||||
/* Ensure I am executing from virtual addresses */
|
||||
movq $1f, %rax
|
||||
ANNOTATE_RETPOLINE_SAFE
|
||||
jmp *%rax
|
||||
1:
|
||||
UNWIND_HINT_EMPTY
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
/*
|
||||
* this changes the io permissions bitmap in the current task.
|
||||
*/
|
||||
asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
||||
SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
|
||||
{
|
||||
struct thread_struct *t = ¤t->thread;
|
||||
struct tss_struct *tss;
|
||||
|
|
|
@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler);
|
|||
|
||||
bool arch_within_kprobe_blacklist(unsigned long addr)
|
||||
{
|
||||
bool is_in_entry_trampoline_section = false;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
is_in_entry_trampoline_section =
|
||||
(addr >= (unsigned long)__entry_trampoline_start &&
|
||||
addr < (unsigned long)__entry_trampoline_end);
|
||||
#endif
|
||||
return (addr >= (unsigned long)__kprobes_text_start &&
|
||||
addr < (unsigned long)__kprobes_text_end) ||
|
||||
(addr >= (unsigned long)__entry_text_start &&
|
||||
addr < (unsigned long)__entry_text_end);
|
||||
addr < (unsigned long)__entry_text_end) ||
|
||||
is_in_entry_trampoline_section;
|
||||
}
|
||||
|
||||
int __init arch_init_kprobes(void)
|
||||
|
|
|
@ -1203,20 +1203,13 @@ void __init setup_arch(char **cmdline_p)
|
|||
|
||||
kasan_init();
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* sync back kernel address range */
|
||||
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
KERNEL_PGD_PTRS);
|
||||
|
||||
/*
|
||||
* sync back low identity map too. It is used for example
|
||||
* in the 32-bit EFI stub.
|
||||
* Sync back kernel address range.
|
||||
*
|
||||
* FIXME: Can the later sync in setup_cpu_entry_areas() replace
|
||||
* this call?
|
||||
*/
|
||||
clone_pgd_range(initial_page_table,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
|
||||
#endif
|
||||
sync_initial_page_table();
|
||||
|
||||
tboot_probe();
|
||||
|
||||
|
|
|
@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void)
|
|||
/* Setup cpu initialized, callin, callout masks */
|
||||
setup_cpu_local_masks();
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Sync back kernel address range again. We already did this in
|
||||
* setup_arch(), but percpu data also needs to be available in
|
||||
* the smpboot asm. We can't reliably pick up percpu mappings
|
||||
* using vmalloc_fault(), because exception dispatch needs
|
||||
* percpu data.
|
||||
*
|
||||
* FIXME: Can the later sync in setup_cpu_entry_areas() replace
|
||||
* this call?
|
||||
*/
|
||||
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
KERNEL_PGD_PTRS);
|
||||
|
||||
/*
|
||||
* sync back low identity map too. It is used for example
|
||||
* in the 32-bit EFI stub.
|
||||
*/
|
||||
clone_pgd_range(initial_page_table,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
|
||||
#endif
|
||||
sync_initial_page_table();
|
||||
}
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include <asm/unwind.h>
|
||||
#include <asm/orc_types.h>
|
||||
#include <asm/orc_lookup.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
#define orc_warn(fmt, ...) \
|
||||
printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
|
||||
|
@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)
|
|||
}
|
||||
|
||||
/* vmlinux .init slow lookup: */
|
||||
if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
|
||||
if (init_kernel_text(ip))
|
||||
return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
|
||||
__stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
|
||||
|
||||
|
|
|
@ -118,9 +118,11 @@ SECTIONS
|
|||
|
||||
#ifdef CONFIG_X86_64
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
VMLINUX_SYMBOL(__entry_trampoline_start) = .;
|
||||
_entry_trampoline = .;
|
||||
*(.entry_trampoline)
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
VMLINUX_SYMBOL(__entry_trampoline_end) = .;
|
||||
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
|
||||
#endif
|
||||
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
#include <asm/debugreg.h>
|
||||
#include <asm/kvm_para.h>
|
||||
#include <asm/irq_remapping.h>
|
||||
#include <asm/microcode.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
#include <asm/virtext.h>
|
||||
|
@ -5355,7 +5356,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
* being speculatively taken.
|
||||
*/
|
||||
if (svm->spec_ctrl)
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
|
||||
native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
|
||||
|
||||
asm volatile (
|
||||
"push %%" _ASM_BP "; \n\t"
|
||||
|
@ -5464,11 +5465,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
* If the L02 MSR bitmap does not intercept the MSR, then we need to
|
||||
* save it.
|
||||
*/
|
||||
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
|
||||
rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
|
||||
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
|
||||
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
|
||||
|
||||
if (svm->spec_ctrl)
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
|
||||
native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
|
||||
|
||||
/* Eliminate branch target predictions from guest mode */
|
||||
vmexit_fill_RSB();
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
#include <asm/apic.h>
|
||||
#include <asm/irq_remapping.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/microcode.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
#include "trace.h"
|
||||
|
@ -9452,7 +9453,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
* being speculatively taken.
|
||||
*/
|
||||
if (vmx->spec_ctrl)
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
|
||||
native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
|
||||
|
||||
vmx->__launched = vmx->loaded_vmcs->launched;
|
||||
asm(
|
||||
|
@ -9587,11 +9588,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
* If the L02 MSR bitmap does not intercept the MSR, then we need to
|
||||
* save it.
|
||||
*/
|
||||
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
|
||||
rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
|
||||
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
|
||||
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
|
||||
|
||||
if (vmx->spec_ctrl)
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
|
||||
native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
|
||||
|
||||
/* Eliminate branch target predictions from guest mode */
|
||||
vmexit_fill_RSB();
|
||||
|
|
|
@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
|
|||
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
|
||||
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
|
||||
lib-$(CONFIG_RETPOLINE) += retpoline.o
|
||||
OBJECT_FILES_NON_STANDARD_retpoline.o :=y
|
||||
|
||||
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
|
||||
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#include <asm/alternative-asm.h>
|
||||
#include <asm/export.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
#include <asm/bitsperlong.h>
|
||||
|
||||
.macro THUNK reg
|
||||
.section .text.__x86.indirect_thunk
|
||||
|
@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
|
|||
GENERATE_THUNK(r14)
|
||||
GENERATE_THUNK(r15)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Fill the CPU return stack buffer.
|
||||
*
|
||||
* Each entry in the RSB, if used for a speculative 'ret', contains an
|
||||
* infinite 'pause; lfence; jmp' loop to capture speculative execution.
|
||||
*
|
||||
* This is required in various cases for retpoline and IBRS-based
|
||||
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
|
||||
* eliminate potentially bogus entries from the RSB, and sometimes
|
||||
* purely to ensure that it doesn't get empty, which on some CPUs would
|
||||
* allow predictions from other (unwanted!) sources to be used.
|
||||
*
|
||||
* Google experimented with loop-unrolling and this turned out to be
|
||||
* the optimal version - two calls, each with their own speculation
|
||||
* trap should their return address end up getting used, in a loop.
|
||||
*/
|
||||
.macro STUFF_RSB nr:req sp:req
|
||||
mov $(\nr / 2), %_ASM_BX
|
||||
.align 16
|
||||
771:
|
||||
call 772f
|
||||
773: /* speculation trap */
|
||||
pause
|
||||
lfence
|
||||
jmp 773b
|
||||
.align 16
|
||||
772:
|
||||
call 774f
|
||||
775: /* speculation trap */
|
||||
pause
|
||||
lfence
|
||||
jmp 775b
|
||||
.align 16
|
||||
774:
|
||||
dec %_ASM_BX
|
||||
jnz 771b
|
||||
add $((BITS_PER_LONG/8) * \nr), \sp
|
||||
.endm
|
||||
|
||||
#define RSB_FILL_LOOPS 16 /* To avoid underflow */
|
||||
|
||||
ENTRY(__fill_rsb)
|
||||
STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
|
||||
ret
|
||||
END(__fill_rsb)
|
||||
EXPORT_SYMBOL_GPL(__fill_rsb)
|
||||
|
||||
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
|
||||
|
||||
ENTRY(__clear_rsb)
|
||||
STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
|
||||
ret
|
||||
END(__clear_rsb)
|
||||
EXPORT_SYMBOL_GPL(__clear_rsb)
|
||||
|
|
|
@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void)
|
|||
|
||||
for_each_possible_cpu(cpu)
|
||||
setup_cpu_entry_area(cpu);
|
||||
|
||||
/*
|
||||
* This is the last essential update to swapper_pgdir which needs
|
||||
* to be synchronized to initial_page_table on 32bit.
|
||||
*/
|
||||
sync_initial_page_table();
|
||||
}
|
||||
|
|
|
@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
|||
tsk = current;
|
||||
mm = tsk->mm;
|
||||
|
||||
/*
|
||||
* Detect and handle instructions that would cause a page fault for
|
||||
* both a tracked kernel page and a userspace page.
|
||||
*/
|
||||
prefetchw(&mm->mmap_sem);
|
||||
|
||||
if (unlikely(kmmio_fault(regs, address)))
|
||||
|
|
|
@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
|
|||
}
|
||||
#endif /* CONFIG_HIGHMEM */
|
||||
|
||||
void __init sync_initial_page_table(void)
|
||||
{
|
||||
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
KERNEL_PGD_PTRS);
|
||||
|
||||
/*
|
||||
* sync back low identity map too. It is used for example
|
||||
* in the 32-bit EFI stub.
|
||||
*/
|
||||
clone_pgd_range(initial_page_table,
|
||||
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
|
||||
min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
|
||||
}
|
||||
|
||||
void __init native_pagetable_init(void)
|
||||
{
|
||||
unsigned long pfn, va;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <asm/page.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/msr-index.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
.text
|
||||
.code64
|
||||
|
@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)
|
|||
movq %rax, %r8 /* Workarea encryption routine */
|
||||
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
|
||||
|
||||
ANNOTATE_RETPOLINE_SAFE
|
||||
call *%rax /* Call the encryption routine */
|
||||
|
||||
pop %r12
|
||||
|
|
|
@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void)
|
|||
}
|
||||
|
||||
/*
|
||||
* Clone the ESPFIX P4D into the user space visinble page table
|
||||
* Clone the ESPFIX P4D into the user space visible page table
|
||||
*/
|
||||
static void __init pti_setup_espfix64(void)
|
||||
{
|
||||
|
|
|
@ -102,7 +102,7 @@ ENTRY(startup_32)
|
|||
* don't we'll eventually crash trying to execute encrypted
|
||||
* instructions.
|
||||
*/
|
||||
bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
|
||||
btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
|
||||
jnc .Ldone
|
||||
movl $MSR_K8_SYSCFG, %ecx
|
||||
rdmsr
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/types.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/percpu-defs.h>
|
||||
|
||||
#include <xen/xen.h>
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/grant_table.h>
|
||||
#include <xen/events.h>
|
||||
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/msr-index.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
#include <asm/xen/page.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
@ -15,6 +18,8 @@
|
|||
#include "mmu.h"
|
||||
#include "pmu.h"
|
||||
|
||||
static DEFINE_PER_CPU(u64, spec_ctrl);
|
||||
|
||||
void xen_arch_pre_suspend(void)
|
||||
{
|
||||
xen_save_time_memory_area();
|
||||
|
@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled)
|
|||
|
||||
static void xen_vcpu_notify_restore(void *data)
|
||||
{
|
||||
if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
|
||||
|
||||
/* Boot processor notified via generic timekeeping_resume() */
|
||||
if (smp_processor_id() == 0)
|
||||
return;
|
||||
|
@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data)
|
|||
|
||||
static void xen_vcpu_notify_suspend(void *data)
|
||||
{
|
||||
u64 tmp;
|
||||
|
||||
tick_suspend_local();
|
||||
|
||||
if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
|
||||
rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
|
||||
this_cpu_write(spec_ctrl, tmp);
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void xen_arch_resume(void)
|
||||
|
|
|
@ -27,3 +27,8 @@
|
|||
#if __has_feature(address_sanitizer)
|
||||
#define __SANITIZE_ADDRESS__
|
||||
#endif
|
||||
|
||||
/* Clang doesn't have a way to turn it off per-function, yet. */
|
||||
#ifdef __noretpoline
|
||||
#undef __noretpoline
|
||||
#endif
|
||||
|
|
|
@ -93,6 +93,10 @@
|
|||
#define __weak __attribute__((weak))
|
||||
#define __alias(symbol) __attribute__((alias(#symbol)))
|
||||
|
||||
#ifdef RETPOLINE
|
||||
#define __noretpoline __attribute__((indirect_branch("keep")))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* it doesn't make sense on ARM (currently the only user of __naked)
|
||||
* to trace naked functions because then mcount is called without
|
||||
|
|
|
@ -6,10 +6,10 @@
|
|||
#include <linux/types.h>
|
||||
|
||||
/* Built-in __init functions needn't be compiled with retpoline */
|
||||
#if defined(RETPOLINE) && !defined(MODULE)
|
||||
#define __noretpoline __attribute__((indirect_branch("keep")))
|
||||
#if defined(__noretpoline) && !defined(MODULE)
|
||||
#define __noinitretpoline __noretpoline
|
||||
#else
|
||||
#define __noretpoline
|
||||
#define __noinitretpoline
|
||||
#endif
|
||||
|
||||
/* These macros are used to mark some functions or
|
||||
|
@ -47,7 +47,7 @@
|
|||
|
||||
/* These are for everybody (although not all archs will actually
|
||||
discard it in modules) */
|
||||
#define __init __section(.init.text) __cold __latent_entropy __noretpoline
|
||||
#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline
|
||||
#define __initdata __section(.init.data)
|
||||
#define __initconst __section(.init.rodata)
|
||||
#define __exitdata __section(.exit.data)
|
||||
|
|
|
@ -151,6 +151,7 @@ extern struct jump_entry __start___jump_table[];
|
|||
extern struct jump_entry __stop___jump_table[];
|
||||
|
||||
extern void jump_label_init(void);
|
||||
extern void jump_label_invalidate_init(void);
|
||||
extern void jump_label_lock(void);
|
||||
extern void jump_label_unlock(void);
|
||||
extern void arch_jump_label_transform(struct jump_entry *entry,
|
||||
|
@ -198,6 +199,8 @@ static __always_inline void jump_label_init(void)
|
|||
static_key_initialized = true;
|
||||
}
|
||||
|
||||
static inline void jump_label_invalidate_init(void) {}
|
||||
|
||||
static __always_inline bool static_key_false(struct static_key *key)
|
||||
{
|
||||
if (unlikely(static_key_count(key) > 0))
|
||||
|
|
|
@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option);
|
|||
extern char *next_arg(char *args, char **param, char **val);
|
||||
|
||||
extern int core_kernel_text(unsigned long addr);
|
||||
extern int init_kernel_text(unsigned long addr);
|
||||
extern int core_kernel_data(unsigned long addr);
|
||||
extern int __kernel_text_address(unsigned long addr);
|
||||
extern int kernel_text_address(unsigned long addr);
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#ifndef _LINUX_NOSPEC_H
|
||||
#define _LINUX_NOSPEC_H
|
||||
#include <asm/barrier.h>
|
||||
|
||||
/**
|
||||
* array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
|
||||
|
@ -29,26 +30,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Warn developers about inappropriate array_index_nospec() usage.
|
||||
*
|
||||
* Even if the CPU speculates past the WARN_ONCE branch, the
|
||||
* sign bit of @index is taken into account when generating the
|
||||
* mask.
|
||||
*
|
||||
* This warning is compiled out when the compiler can infer that
|
||||
* @index and @size are less than LONG_MAX.
|
||||
*/
|
||||
#define array_index_mask_nospec_check(index, size) \
|
||||
({ \
|
||||
if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \
|
||||
"array_index_nospec() limited to range of [0, LONG_MAX]\n")) \
|
||||
_mask = 0; \
|
||||
else \
|
||||
_mask = array_index_mask_nospec(index, size); \
|
||||
_mask; \
|
||||
})
|
||||
|
||||
/*
|
||||
* array_index_nospec - sanitize an array index after a bounds check
|
||||
*
|
||||
|
@ -67,12 +48,11 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
|
|||
({ \
|
||||
typeof(index) _i = (index); \
|
||||
typeof(size) _s = (size); \
|
||||
unsigned long _mask = array_index_mask_nospec_check(_i, _s); \
|
||||
unsigned long _mask = array_index_mask_nospec(_i, _s); \
|
||||
\
|
||||
BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
|
||||
BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
|
||||
\
|
||||
_i &= _mask; \
|
||||
_i; \
|
||||
(typeof(_i)) (_i & _mask); \
|
||||
})
|
||||
#endif /* _LINUX_NOSPEC_H */
|
||||
|
|
|
@ -89,6 +89,7 @@
|
|||
#include <linux/io.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/rodata_test.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/bugs.h>
|
||||
|
@ -1000,6 +1001,7 @@ static int __ref kernel_init(void *unused)
|
|||
/* need to finish all async __init code before freeing the memory */
|
||||
async_synchronize_full();
|
||||
ftrace_free_init_mem();
|
||||
jump_label_invalidate_init();
|
||||
free_initmem();
|
||||
mark_readonly();
|
||||
system_state = SYSTEM_RUNNING;
|
||||
|
|
|
@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
|
|||
return e;
|
||||
}
|
||||
|
||||
static inline int init_kernel_text(unsigned long addr)
|
||||
int init_kernel_text(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_sinittext &&
|
||||
addr < (unsigned long)_einittext)
|
||||
|
|
|
@ -366,12 +366,15 @@ static void __jump_label_update(struct static_key *key,
|
|||
{
|
||||
for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
|
||||
/*
|
||||
* entry->code set to 0 invalidates module init text sections
|
||||
* kernel_text_address() verifies we are not in core kernel
|
||||
* init code, see jump_label_invalidate_module_init().
|
||||
* An entry->code of 0 indicates an entry which has been
|
||||
* disabled because it was in an init text area.
|
||||
*/
|
||||
if (entry->code && kernel_text_address(entry->code))
|
||||
if (entry->code) {
|
||||
if (kernel_text_address(entry->code))
|
||||
arch_jump_label_transform(entry, jump_label_type(entry));
|
||||
else
|
||||
WARN_ONCE(1, "can't patch jump_label at %pS", (void *)entry->code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -417,6 +420,19 @@ void __init jump_label_init(void)
|
|||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
/* Disable any jump label entries in __init code */
|
||||
void __init jump_label_invalidate_init(void)
|
||||
{
|
||||
struct jump_entry *iter_start = __start___jump_table;
|
||||
struct jump_entry *iter_stop = __stop___jump_table;
|
||||
struct jump_entry *iter;
|
||||
|
||||
for (iter = iter_start; iter < iter_stop; iter++) {
|
||||
if (init_kernel_text(iter->code))
|
||||
iter->code = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
|
||||
static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
|
||||
|
@ -633,6 +649,7 @@ static void jump_label_del_module(struct module *mod)
|
|||
}
|
||||
}
|
||||
|
||||
/* Disable any jump label entries in module init code */
|
||||
static void jump_label_invalidate_module_init(struct module *mod)
|
||||
{
|
||||
struct jump_entry *iter_start = mod->jump_entries;
|
||||
|
|
|
@ -256,6 +256,8 @@ __objtool_obj := $(objtree)/tools/objtool/objtool
|
|||
|
||||
objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
|
||||
|
||||
objtool_args += $(if $(part-of-module), --module,)
|
||||
|
||||
ifndef CONFIG_FRAME_POINTER
|
||||
objtool_args += --no-fp
|
||||
endif
|
||||
|
@ -264,6 +266,12 @@ objtool_args += --no-unreachable
|
|||
else
|
||||
objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
|
||||
endif
|
||||
ifdef CONFIG_RETPOLINE
|
||||
ifneq ($(RETPOLINE_CFLAGS),)
|
||||
objtool_args += --retpoline
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifdef CONFIG_MODVERSIONS
|
||||
objtool_o = $(@D)/.tmp_$(@F)
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "builtin.h"
|
||||
#include "check.h"
|
||||
|
||||
bool no_fp, no_unreachable;
|
||||
bool no_fp, no_unreachable, retpoline, module;
|
||||
|
||||
static const char * const check_usage[] = {
|
||||
"objtool check [<options>] file.o",
|
||||
|
@ -39,6 +39,8 @@ static const char * const check_usage[] = {
|
|||
const struct option check_options[] = {
|
||||
OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
|
||||
OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
|
||||
OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
|
||||
OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
|
||||
OPT_END(),
|
||||
};
|
||||
|
||||
|
@ -53,5 +55,5 @@ int cmd_check(int argc, const char **argv)
|
|||
|
||||
objname = argv[0];
|
||||
|
||||
return check(objname, no_fp, no_unreachable, false);
|
||||
return check(objname, false);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <subcmd/parse-options.h>
|
||||
#include "builtin.h"
|
||||
#include "check.h"
|
||||
|
||||
|
@ -36,9 +35,6 @@ static const char *orc_usage[] = {
|
|||
NULL,
|
||||
};
|
||||
|
||||
extern const struct option check_options[];
|
||||
extern bool no_fp, no_unreachable;
|
||||
|
||||
int cmd_orc(int argc, const char **argv)
|
||||
{
|
||||
const char *objname;
|
||||
|
@ -54,7 +50,7 @@ int cmd_orc(int argc, const char **argv)
|
|||
|
||||
objname = argv[0];
|
||||
|
||||
return check(objname, no_fp, no_unreachable, true);
|
||||
return check(objname, true);
|
||||
}
|
||||
|
||||
if (!strcmp(argv[0], "dump")) {
|
||||
|
|
|
@ -17,6 +17,11 @@
|
|||
#ifndef _BUILTIN_H
|
||||
#define _BUILTIN_H
|
||||
|
||||
#include <subcmd/parse-options.h>
|
||||
|
||||
extern const struct option check_options[];
|
||||
extern bool no_fp, no_unreachable, retpoline, module;
|
||||
|
||||
extern int cmd_check(int argc, const char **argv);
|
||||
extern int cmd_orc(int argc, const char **argv);
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "builtin.h"
|
||||
#include "check.h"
|
||||
#include "elf.h"
|
||||
#include "special.h"
|
||||
|
@ -33,7 +34,6 @@ struct alternative {
|
|||
};
|
||||
|
||||
const char *objname;
|
||||
static bool no_fp;
|
||||
struct cfi_state initial_func_cfi;
|
||||
|
||||
struct instruction *find_insn(struct objtool_file *file,
|
||||
|
@ -497,6 +497,7 @@ static int add_jump_destinations(struct objtool_file *file)
|
|||
* disguise, so convert them accordingly.
|
||||
*/
|
||||
insn->type = INSN_JUMP_DYNAMIC;
|
||||
insn->retpoline_safe = true;
|
||||
continue;
|
||||
} else {
|
||||
/* sibling call */
|
||||
|
@ -548,6 +549,7 @@ static int add_call_destinations(struct objtool_file *file)
|
|||
if (!insn->call_dest && !insn->ignore) {
|
||||
WARN_FUNC("unsupported intra-function call",
|
||||
insn->sec, insn->offset);
|
||||
if (retpoline)
|
||||
WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
|
||||
return -1;
|
||||
}
|
||||
|
@ -923,7 +925,11 @@ static struct rela *find_switch_table(struct objtool_file *file,
|
|||
if (find_symbol_containing(file->rodata, text_rela->addend))
|
||||
continue;
|
||||
|
||||
return find_rela_by_dest(file->rodata, text_rela->addend);
|
||||
rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
|
||||
if (!rodata_rela)
|
||||
continue;
|
||||
|
||||
return rodata_rela;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
@ -1108,6 +1114,41 @@ static int read_unwind_hints(struct objtool_file *file)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int read_retpoline_hints(struct objtool_file *file)
|
||||
{
|
||||
struct section *sec;
|
||||
struct instruction *insn;
|
||||
struct rela *rela;
|
||||
|
||||
sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
|
||||
if (!sec)
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(rela, &sec->rela_list, list) {
|
||||
if (rela->sym->type != STT_SECTION) {
|
||||
WARN("unexpected relocation symbol type in %s", sec->name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
insn = find_insn(file, rela->sym->sec, rela->addend);
|
||||
if (!insn) {
|
||||
WARN("bad .discard.retpoline_safe entry");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (insn->type != INSN_JUMP_DYNAMIC &&
|
||||
insn->type != INSN_CALL_DYNAMIC) {
|
||||
WARN_FUNC("retpoline_safe hint not an indirect jump/call",
|
||||
insn->sec, insn->offset);
|
||||
return -1;
|
||||
}
|
||||
|
||||
insn->retpoline_safe = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int decode_sections(struct objtool_file *file)
|
||||
{
|
||||
int ret;
|
||||
|
@ -1146,6 +1187,10 @@ static int decode_sections(struct objtool_file *file)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = read_retpoline_hints(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1891,6 +1936,38 @@ static int validate_unwind_hints(struct objtool_file *file)
|
|||
return warnings;
|
||||
}
|
||||
|
||||
static int validate_retpoline(struct objtool_file *file)
|
||||
{
|
||||
struct instruction *insn;
|
||||
int warnings = 0;
|
||||
|
||||
for_each_insn(file, insn) {
|
||||
if (insn->type != INSN_JUMP_DYNAMIC &&
|
||||
insn->type != INSN_CALL_DYNAMIC)
|
||||
continue;
|
||||
|
||||
if (insn->retpoline_safe)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* .init.text code is ran before userspace and thus doesn't
|
||||
* strictly need retpolines, except for modules which are
|
||||
* loaded late, they very much do need retpoline in their
|
||||
* .init.text
|
||||
*/
|
||||
if (!strcmp(insn->sec->name, ".init.text") && !module)
|
||||
continue;
|
||||
|
||||
WARN_FUNC("indirect %s found in RETPOLINE build",
|
||||
insn->sec, insn->offset,
|
||||
insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
|
||||
|
||||
warnings++;
|
||||
}
|
||||
|
||||
return warnings;
|
||||
}
|
||||
|
||||
static bool is_kasan_insn(struct instruction *insn)
|
||||
{
|
||||
return (insn->type == INSN_CALL &&
|
||||
|
@ -2022,13 +2099,12 @@ static void cleanup(struct objtool_file *file)
|
|||
elf_close(file->elf);
|
||||
}
|
||||
|
||||
int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
|
||||
int check(const char *_objname, bool orc)
|
||||
{
|
||||
struct objtool_file file;
|
||||
int ret, warnings = 0;
|
||||
|
||||
objname = _objname;
|
||||
no_fp = _no_fp;
|
||||
|
||||
file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
|
||||
if (!file.elf)
|
||||
|
@ -2052,6 +2128,13 @@ int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
|
|||
if (list_empty(&file.insn_list))
|
||||
goto out;
|
||||
|
||||
if (retpoline) {
|
||||
ret = validate_retpoline(&file);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
warnings += ret;
|
||||
}
|
||||
|
||||
ret = validate_functions(&file);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
|
|
@ -45,6 +45,7 @@ struct instruction {
|
|||
unsigned char type;
|
||||
unsigned long immediate;
|
||||
bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
|
||||
bool retpoline_safe;
|
||||
struct symbol *call_dest;
|
||||
struct instruction *jump_dest;
|
||||
struct instruction *first_jump_src;
|
||||
|
@ -63,7 +64,7 @@ struct objtool_file {
|
|||
bool ignore_unreachables, c_file, hints;
|
||||
};
|
||||
|
||||
int check(const char *objname, bool no_fp, bool no_unreachable, bool orc);
|
||||
int check(const char *objname, bool orc);
|
||||
|
||||
struct instruction *find_insn(struct objtool_file *file,
|
||||
struct section *sec, unsigned long offset);
|
||||
|
|
|
@ -450,7 +450,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
|
|||
num_vsyscall_traps++;
|
||||
}
|
||||
|
||||
static int test_native_vsyscall(void)
|
||||
static int test_emulation(void)
|
||||
{
|
||||
time_t tmp;
|
||||
bool is_native;
|
||||
|
@ -458,7 +458,7 @@ static int test_native_vsyscall(void)
|
|||
if (!vtime)
|
||||
return 0;
|
||||
|
||||
printf("[RUN]\tchecking for native vsyscall\n");
|
||||
printf("[RUN]\tchecking that vsyscalls are emulated\n");
|
||||
sethandler(SIGTRAP, sigtrap, 0);
|
||||
set_eflags(get_eflags() | X86_EFLAGS_TF);
|
||||
vtime(&tmp);
|
||||
|
@ -474,11 +474,12 @@ static int test_native_vsyscall(void)
|
|||
*/
|
||||
is_native = (num_vsyscall_traps > 1);
|
||||
|
||||
printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
|
||||
printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
|
||||
(is_native ? "FAIL" : "OK"),
|
||||
(is_native ? "native" : "emulated"),
|
||||
(int)num_vsyscall_traps);
|
||||
|
||||
return 0;
|
||||
return is_native;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -498,7 +499,7 @@ int main(int argc, char **argv)
|
|||
nerrs += test_vsys_r();
|
||||
|
||||
#ifdef __x86_64__
|
||||
nerrs += test_native_vsyscall();
|
||||
nerrs += test_emulation();
|
||||
#endif
|
||||
|
||||
return nerrs ? 1 : 0;
|
||||
|
|
Загрузка…
Ссылка в новой задаче