The conversion of X86 syscall, interrupt and exception entry/exit handling
to the generic code. Pretty much a straight forward 1:1 conversion plus the consolidation of the KVM handling of pending work before entering guest mode. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl8pEFgTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYocEwD/474Eb7LzZ8yahyUBirWJP3k3qzgs9j dZUxqB6LNuDOstEyTGLPdx1dmQP2vHbFfjoM7YBOH37EGcHsqjGliLvn2Y05ZD7O 6kYwjz6qVnJcm3IMtfSUn/8LkfO5pGUdKd3U5ngDmPLpkeaQ4nPKqiO0uIb0wzwa cO7l10tG4YjMCWQxPNIaOh8kncLieQBediJPFjkQjV+Fh33kSU3LWTl3fccz6b5+ mgSUFL0qjQpp+Nl7lCaDQQiAop9GTUETfDtximRydZauiM2NpCfz+QBmQzq50Xv1 G3DWZoBIZBjmWJUgfSmS/s4GOYkBTBnT/fUcZmIDcgdRwvtEvRzIhcP87/wn7P3N UKpLdHqmvA0BFDXZbNZgS362++29pj5Lnb+u3QbWSKQ9UqHN0NUlSY4wzfTLXsGp Mzpp4TW0u/8kyOlo7wK3lVDgNJaPG31aiNVuDPgLe4cEluO5cq7/7g2GcFBqF1Ly SqNGD1IccteNQTNvDopczPy7qUl5Lal+Ia06szNSPR48gLrvhSWdyYr2i1sD7vx4 hAhR0Hsi9dacGv46TrRw1OdDzq9bOW68G8GIgLJgDXaayPXLnx6TQEUjzQtIkE/i ydTPUarp5QOFByt+RBjI90ZcW4RuLgMTOEVONPXtSn8IoCP2Kdg9u3gD9AmUW3Q2 JFkKMiSiJPGxlw== =84y7 -----END PGP SIGNATURE----- Merge tag 'x86-entry-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 conversion to generic entry code from Thomas Gleixner: "The conversion of X86 syscall, interrupt and exception entry/exit handling to the generic code. Pretty much a straight-forward 1:1 conversion plus the consolidation of the KVM handling of pending work before entering guest mode" * tag 'x86-entry-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/kvm: Use __xfer_to_guest_mode_work_pending() in kvm_run_vcpu() x86/kvm: Use generic xfer to guest work function x86/entry: Cleanup idtentry_enter/exit x86/entry: Use generic interrupt entry/exit code x86/entry: Cleanup idtentry_entry/exit_user x86/entry: Use generic syscall exit functionality x86/entry: Use generic syscall entry function x86/ptrace: Provide pt_regs helper for entry/exit x86/entry: Move user return notifier out of loop x86/entry: Consolidate 32/64 bit syscall entry x86/entry: Consolidate check_user_regs() x86: Correct noinstr qualifiers x86/idtentry: Remove stale comment
This commit is contained in:
Коммит
125cfa0d4d
|
@ -115,6 +115,7 @@ config X86
|
|||
select GENERIC_CPU_AUTOPROBE
|
||||
select GENERIC_CPU_VULNERABILITIES
|
||||
select GENERIC_EARLY_IOREMAP
|
||||
select GENERIC_ENTRY
|
||||
select GENERIC_FIND_FIRST_BIT
|
||||
select GENERIC_IOMAP
|
||||
select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
|
||||
|
|
|
@ -10,20 +10,13 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/entry-common.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/user-return-notifier.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/uprobes.h>
|
||||
#include <linux/livepatch.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
|
@ -42,343 +35,12 @@
|
|||
#include <asm/syscall.h>
|
||||
#include <asm/irq_stack.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
|
||||
/* Check that the stack and regs on entry from user mode are sane. */
|
||||
static noinstr void check_user_regs(struct pt_regs *regs)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
|
||||
/*
|
||||
* Make sure that the entry code gave us a sensible EFLAGS
|
||||
* register. Native because we want to check the actual CPU
|
||||
* state, not the interrupt state as imagined by Xen.
|
||||
*/
|
||||
unsigned long flags = native_save_fl();
|
||||
WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
|
||||
X86_EFLAGS_NT));
|
||||
|
||||
/* We think we came from user mode. Make sure pt_regs agrees. */
|
||||
WARN_ON_ONCE(!user_mode(regs));
|
||||
|
||||
/*
|
||||
* All entries from user mode (except #DF) should be on the
|
||||
* normal thread stack and should have user pt_regs in the
|
||||
* correct location.
|
||||
*/
|
||||
WARN_ON_ONCE(!on_thread_stack());
|
||||
WARN_ON_ONCE(regs != task_pt_regs(current));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
/**
|
||||
* enter_from_user_mode - Establish state when coming from user mode
|
||||
*
|
||||
* Syscall entry disables interrupts, but user mode is traced as interrupts
|
||||
* enabled. Also with NO_HZ_FULL RCU might be idle.
|
||||
*
|
||||
* 1) Tell lockdep that interrupts are disabled
|
||||
* 2) Invoke context tracking if enabled to reactivate RCU
|
||||
* 3) Trace interrupts off state
|
||||
*/
|
||||
static noinstr void enter_from_user_mode(void)
|
||||
{
|
||||
enum ctx_state state = ct_state();
|
||||
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
user_exit_irqoff();
|
||||
|
||||
instrumentation_begin();
|
||||
CT_WARN_ON(state != CONTEXT_USER);
|
||||
trace_hardirqs_off_finish();
|
||||
instrumentation_end();
|
||||
}
|
||||
#else
|
||||
static __always_inline void enter_from_user_mode(void)
|
||||
{
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
instrumentation_end();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* exit_to_user_mode - Fixup state when exiting to user mode
|
||||
*
|
||||
* Syscall exit enables interrupts, but the kernel state is interrupts
|
||||
* disabled when this is invoked. Also tell RCU about it.
|
||||
*
|
||||
* 1) Trace interrupts on state
|
||||
* 2) Invoke context tracking if enabled to adjust RCU state
|
||||
* 3) Clear CPU buffers if CPU is affected by MDS and the migitation is on.
|
||||
* 4) Tell lockdep that interrupts are enabled
|
||||
*/
|
||||
static __always_inline void exit_to_user_mode(void)
|
||||
{
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_on_prepare();
|
||||
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
||||
instrumentation_end();
|
||||
|
||||
user_enter_irqoff();
|
||||
mds_user_clear_cpu_buffers();
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
}
|
||||
|
||||
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
if (arch == AUDIT_ARCH_X86_64) {
|
||||
audit_syscall_entry(regs->orig_ax, regs->di,
|
||||
regs->si, regs->dx, regs->r10);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
audit_syscall_entry(regs->orig_ax, regs->bx,
|
||||
regs->cx, regs->dx, regs->si);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the syscall nr to run (which should match regs->orig_ax) or -1
|
||||
* to skip the syscall.
|
||||
*/
|
||||
static long syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
|
||||
|
||||
struct thread_info *ti = current_thread_info();
|
||||
unsigned long ret = 0;
|
||||
u32 work;
|
||||
|
||||
work = READ_ONCE(ti->flags);
|
||||
|
||||
if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
|
||||
ret = tracehook_report_syscall_entry(regs);
|
||||
if (ret || (work & _TIF_SYSCALL_EMU))
|
||||
return -1L;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
/*
|
||||
* Do seccomp after ptrace, to catch any tracer changes.
|
||||
*/
|
||||
if (work & _TIF_SECCOMP) {
|
||||
struct seccomp_data sd;
|
||||
|
||||
sd.arch = arch;
|
||||
sd.nr = regs->orig_ax;
|
||||
sd.instruction_pointer = regs->ip;
|
||||
#ifdef CONFIG_X86_64
|
||||
if (arch == AUDIT_ARCH_X86_64) {
|
||||
sd.args[0] = regs->di;
|
||||
sd.args[1] = regs->si;
|
||||
sd.args[2] = regs->dx;
|
||||
sd.args[3] = regs->r10;
|
||||
sd.args[4] = regs->r8;
|
||||
sd.args[5] = regs->r9;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
sd.args[0] = regs->bx;
|
||||
sd.args[1] = regs->cx;
|
||||
sd.args[2] = regs->dx;
|
||||
sd.args[3] = regs->si;
|
||||
sd.args[4] = regs->di;
|
||||
sd.args[5] = regs->bp;
|
||||
}
|
||||
|
||||
ret = __secure_computing(&sd);
|
||||
if (ret == -1)
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
|
||||
trace_sys_enter(regs, regs->orig_ax);
|
||||
|
||||
do_audit_syscall_entry(regs, arch);
|
||||
|
||||
return ret ?: regs->orig_ax;
|
||||
}
|
||||
|
||||
#define EXIT_TO_USERMODE_LOOP_FLAGS \
|
||||
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
|
||||
_TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING)
|
||||
|
||||
static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
|
||||
{
|
||||
/*
|
||||
* In order to return to user mode, we need to have IRQs off with
|
||||
* none of EXIT_TO_USERMODE_LOOP_FLAGS set. Several of these flags
|
||||
* can be set at any time on preemptible kernels if we have IRQs on,
|
||||
* so we need to loop. Disabling preemption wouldn't help: doing the
|
||||
* work to clear some of the flags can sleep.
|
||||
*/
|
||||
while (true) {
|
||||
/* We have work to do. */
|
||||
local_irq_enable();
|
||||
|
||||
if (cached_flags & _TIF_NEED_RESCHED)
|
||||
schedule();
|
||||
|
||||
if (cached_flags & _TIF_UPROBE)
|
||||
uprobe_notify_resume(regs);
|
||||
|
||||
if (cached_flags & _TIF_PATCH_PENDING)
|
||||
klp_update_patch_state(current);
|
||||
|
||||
/* deal with pending signal delivery */
|
||||
if (cached_flags & _TIF_SIGPENDING)
|
||||
do_signal(regs);
|
||||
|
||||
if (cached_flags & _TIF_NOTIFY_RESUME) {
|
||||
clear_thread_flag(TIF_NOTIFY_RESUME);
|
||||
tracehook_notify_resume(regs);
|
||||
rseq_handle_notify_resume(NULL, regs);
|
||||
}
|
||||
|
||||
if (cached_flags & _TIF_USER_RETURN_NOTIFY)
|
||||
fire_user_return_notifiers();
|
||||
|
||||
/* Disable IRQs and retry */
|
||||
local_irq_disable();
|
||||
|
||||
cached_flags = READ_ONCE(current_thread_info()->flags);
|
||||
|
||||
if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void __prepare_exit_to_usermode(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = current_thread_info();
|
||||
u32 cached_flags;
|
||||
|
||||
addr_limit_user_check();
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
lockdep_sys_exit();
|
||||
|
||||
cached_flags = READ_ONCE(ti->flags);
|
||||
|
||||
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
|
||||
exit_to_usermode_loop(regs, cached_flags);
|
||||
|
||||
/* Reload ti->flags; we may have rescheduled above. */
|
||||
cached_flags = READ_ONCE(ti->flags);
|
||||
|
||||
if (unlikely(cached_flags & _TIF_IO_BITMAP))
|
||||
tss_update_io_bitmap();
|
||||
|
||||
fpregs_assert_state_consistent();
|
||||
if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD))
|
||||
switch_fpu_return();
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
/*
|
||||
* Compat syscalls set TS_COMPAT. Make sure we clear it before
|
||||
* returning to user mode. We need to clear it *after* signal
|
||||
* handling, because syscall restart has a fixup for compat
|
||||
* syscalls. The fixup is exercised by the ptrace_syscall_32
|
||||
* selftest.
|
||||
*
|
||||
* We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
|
||||
* special case only applies after poking regs and before the
|
||||
* very next return to user mode.
|
||||
*/
|
||||
ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
|
||||
#endif
|
||||
}
|
||||
|
||||
static noinstr void prepare_exit_to_usermode(struct pt_regs *regs)
|
||||
{
|
||||
instrumentation_begin();
|
||||
__prepare_exit_to_usermode(regs);
|
||||
instrumentation_end();
|
||||
exit_to_user_mode();
|
||||
}
|
||||
|
||||
#define SYSCALL_EXIT_WORK_FLAGS \
|
||||
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
|
||||
_TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
|
||||
|
||||
static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
|
||||
{
|
||||
bool step;
|
||||
|
||||
audit_syscall_exit(regs);
|
||||
|
||||
if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
|
||||
trace_sys_exit(regs, regs->ax);
|
||||
|
||||
/*
|
||||
* If TIF_SYSCALL_EMU is set, we only get here because of
|
||||
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
|
||||
* We already reported this syscall instruction in
|
||||
* syscall_trace_enter().
|
||||
*/
|
||||
step = unlikely(
|
||||
(cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
|
||||
== _TIF_SINGLESTEP);
|
||||
if (step || cached_flags & _TIF_SYSCALL_TRACE)
|
||||
tracehook_report_syscall_exit(regs, step);
|
||||
}
|
||||
|
||||
static void __syscall_return_slowpath(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = current_thread_info();
|
||||
u32 cached_flags = READ_ONCE(ti->flags);
|
||||
|
||||
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
|
||||
|
||||
if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
|
||||
WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
|
||||
local_irq_enable();
|
||||
|
||||
rseq_syscall(regs);
|
||||
|
||||
/*
|
||||
* First do one-time work. If these work items are enabled, we
|
||||
* want to run them exactly once per syscall exit with IRQs on.
|
||||
*/
|
||||
if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
|
||||
syscall_slow_exit_work(regs, cached_flags);
|
||||
|
||||
local_irq_disable();
|
||||
__prepare_exit_to_usermode(regs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with IRQs on and fully valid regs. Returns with IRQs off in a
|
||||
* state such that we can immediately switch to user mode.
|
||||
*/
|
||||
__visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
|
||||
{
|
||||
instrumentation_begin();
|
||||
__syscall_return_slowpath(regs);
|
||||
instrumentation_end();
|
||||
exit_to_user_mode();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti;
|
||||
nr = syscall_enter_from_user_mode(regs, nr);
|
||||
|
||||
check_user_regs(regs);
|
||||
|
||||
enter_from_user_mode();
|
||||
instrumentation_begin();
|
||||
|
||||
local_irq_enable();
|
||||
ti = current_thread_info();
|
||||
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
|
||||
nr = syscall_trace_enter(regs);
|
||||
|
||||
if (likely(nr < NR_syscalls)) {
|
||||
nr = array_index_nospec(nr, NR_syscalls);
|
||||
regs->ax = sys_call_table[nr](regs);
|
||||
|
@ -390,66 +52,55 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
|
|||
regs->ax = x32_sys_call_table[nr](regs);
|
||||
#endif
|
||||
}
|
||||
__syscall_return_slowpath(regs);
|
||||
|
||||
instrumentation_end();
|
||||
exit_to_user_mode();
|
||||
syscall_exit_to_user_mode(regs);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
|
||||
/*
|
||||
* Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
|
||||
* all entry and exit work and returns with IRQs off. This function is
|
||||
* extremely hot in workloads that use it, and it's usually called from
|
||||
* do_fast_syscall_32, so forcibly inline it to improve performance.
|
||||
*/
|
||||
static void do_syscall_32_irqs_on(struct pt_regs *regs)
|
||||
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = current_thread_info();
|
||||
unsigned int nr = (unsigned int)regs->orig_ax;
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
ti->status |= TS_COMPAT;
|
||||
#endif
|
||||
|
||||
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
|
||||
if (IS_ENABLED(CONFIG_IA32_EMULATION))
|
||||
current_thread_info()->status |= TS_COMPAT;
|
||||
/*
|
||||
* Subtlety here: if ptrace pokes something larger than
|
||||
* 2^32-1 into orig_ax, this truncates it. This may or
|
||||
* may not be necessary, but it matches the old asm
|
||||
* behavior.
|
||||
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
|
||||
* orig_ax, the unsigned int return value truncates it. This may
|
||||
* or may not be necessary, but it matches the old asm behavior.
|
||||
*/
|
||||
nr = syscall_trace_enter(regs);
|
||||
return (unsigned int)syscall_enter_from_user_mode(regs, nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
|
||||
*/
|
||||
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
|
||||
unsigned int nr)
|
||||
{
|
||||
if (likely(nr < IA32_NR_syscalls)) {
|
||||
instrumentation_begin();
|
||||
nr = array_index_nospec(nr, IA32_NR_syscalls);
|
||||
regs->ax = ia32_sys_call_table[nr](regs);
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
__syscall_return_slowpath(regs);
|
||||
}
|
||||
|
||||
/* Handles int $0x80 */
|
||||
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
|
||||
{
|
||||
check_user_regs(regs);
|
||||
unsigned int nr = syscall_32_enter(regs);
|
||||
|
||||
enter_from_user_mode();
|
||||
instrumentation_begin();
|
||||
|
||||
local_irq_enable();
|
||||
do_syscall_32_irqs_on(regs);
|
||||
|
||||
instrumentation_end();
|
||||
exit_to_user_mode();
|
||||
do_syscall_32_irqs_on(regs, nr);
|
||||
syscall_exit_to_user_mode(regs);
|
||||
}
|
||||
|
||||
static bool __do_fast_syscall_32(struct pt_regs *regs)
|
||||
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
|
||||
{
|
||||
unsigned int nr = syscall_32_enter(regs);
|
||||
int res;
|
||||
|
||||
instrumentation_begin();
|
||||
/* Fetch EBP from where the vDSO stashed it. */
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
/*
|
||||
|
@ -462,17 +113,18 @@ static bool __do_fast_syscall_32(struct pt_regs *regs)
|
|||
res = get_user(*(u32 *)®s->bp,
|
||||
(u32 __user __force *)(unsigned long)(u32)regs->sp);
|
||||
}
|
||||
instrumentation_end();
|
||||
|
||||
if (res) {
|
||||
/* User code screwed up. */
|
||||
regs->ax = -EFAULT;
|
||||
local_irq_disable();
|
||||
__prepare_exit_to_usermode(regs);
|
||||
syscall_exit_to_user_mode(regs);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Now this is just like a normal syscall. */
|
||||
do_syscall_32_irqs_on(regs);
|
||||
do_syscall_32_irqs_on(regs, nr);
|
||||
syscall_exit_to_user_mode(regs);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -485,9 +137,6 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
|
|||
*/
|
||||
unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
|
||||
vdso_image_32.sym_int80_landing_pad;
|
||||
bool success;
|
||||
|
||||
check_user_regs(regs);
|
||||
|
||||
/*
|
||||
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
|
||||
|
@ -496,17 +145,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
|
|||
*/
|
||||
regs->ip = landing_pad;
|
||||
|
||||
enter_from_user_mode();
|
||||
instrumentation_begin();
|
||||
|
||||
local_irq_enable();
|
||||
success = __do_fast_syscall_32(regs);
|
||||
|
||||
instrumentation_end();
|
||||
exit_to_user_mode();
|
||||
|
||||
/* If it failed, keep it simple: use IRET. */
|
||||
if (!success)
|
||||
/* Invoke the syscall. If it failed, keep it simple: use IRET. */
|
||||
if (!__do_fast_syscall_32(regs))
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -558,204 +198,6 @@ SYSCALL_DEFINE0(ni_syscall)
|
|||
return -ENOSYS;
|
||||
}
|
||||
|
||||
/**
|
||||
* idtentry_enter - Handle state tracking on ordinary idtentries
|
||||
* @regs: Pointer to pt_regs of interrupted context
|
||||
*
|
||||
* Invokes:
|
||||
* - lockdep irqflag state tracking as low level ASM entry disabled
|
||||
* interrupts.
|
||||
*
|
||||
* - Context tracking if the exception hit user mode.
|
||||
*
|
||||
* - The hardirq tracer to keep the state consistent as low level ASM
|
||||
* entry disabled interrupts.
|
||||
*
|
||||
* As a precondition, this requires that the entry came from user mode,
|
||||
* idle, or a kernel context in which RCU is watching.
|
||||
*
|
||||
* For kernel mode entries RCU handling is done conditional. If RCU is
|
||||
* watching then the only RCU requirement is to check whether the tick has
|
||||
* to be restarted. If RCU is not watching then rcu_irq_enter() has to be
|
||||
* invoked on entry and rcu_irq_exit() on exit.
|
||||
*
|
||||
* Avoiding the rcu_irq_enter/exit() calls is an optimization but also
|
||||
* solves the problem of kernel mode pagefaults which can schedule, which
|
||||
* is not possible after invoking rcu_irq_enter() without undoing it.
|
||||
*
|
||||
* For user mode entries enter_from_user_mode() must be invoked to
|
||||
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
|
||||
* would not be possible.
|
||||
*
|
||||
* Returns: An opaque object that must be passed to idtentry_exit()
|
||||
*
|
||||
* The return value must be fed into the state argument of
|
||||
* idtentry_exit().
|
||||
*/
|
||||
noinstr idtentry_state_t idtentry_enter(struct pt_regs *regs)
|
||||
{
|
||||
idtentry_state_t ret = {
|
||||
.exit_rcu = false,
|
||||
};
|
||||
|
||||
if (user_mode(regs)) {
|
||||
check_user_regs(regs);
|
||||
enter_from_user_mode();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this entry hit the idle task invoke rcu_irq_enter() whether
|
||||
* RCU is watching or not.
|
||||
*
|
||||
* Interupts can nest when the first interrupt invokes softirq
|
||||
* processing on return which enables interrupts.
|
||||
*
|
||||
* Scheduler ticks in the idle task can mark quiescent state and
|
||||
* terminate a grace period, if and only if the timer interrupt is
|
||||
* not nested into another interrupt.
|
||||
*
|
||||
* Checking for __rcu_is_watching() here would prevent the nesting
|
||||
* interrupt to invoke rcu_irq_enter(). If that nested interrupt is
|
||||
* the tick then rcu_flavor_sched_clock_irq() would wrongfully
|
||||
* assume that it is the first interupt and eventually claim
|
||||
* quiescient state and end grace periods prematurely.
|
||||
*
|
||||
* Unconditionally invoke rcu_irq_enter() so RCU state stays
|
||||
* consistent.
|
||||
*
|
||||
* TINY_RCU does not support EQS, so let the compiler eliminate
|
||||
* this part when enabled.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
|
||||
/*
|
||||
* If RCU is not watching then the same careful
|
||||
* sequence vs. lockdep and tracing is required
|
||||
* as in enter_from_user_mode().
|
||||
*/
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
rcu_irq_enter();
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
instrumentation_end();
|
||||
|
||||
ret.exit_rcu = true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If RCU is watching then RCU only wants to check whether it needs
|
||||
* to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
|
||||
* already contains a warning when RCU is not watching, so no point
|
||||
* in having another one here.
|
||||
*/
|
||||
instrumentation_begin();
|
||||
rcu_irq_enter_check_tick();
|
||||
/* Use the combo lockdep/tracing function */
|
||||
trace_hardirqs_off();
|
||||
instrumentation_end();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched)
|
||||
{
|
||||
if (may_sched && !preempt_count()) {
|
||||
/* Sanity check RCU and thread stack */
|
||||
rcu_irq_exit_check_preempt();
|
||||
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
|
||||
WARN_ON_ONCE(!on_thread_stack());
|
||||
if (need_resched())
|
||||
preempt_schedule_irq();
|
||||
}
|
||||
/* Covers both tracing and lockdep */
|
||||
trace_hardirqs_on();
|
||||
}
|
||||
|
||||
/**
|
||||
* idtentry_exit - Handle return from exception that used idtentry_enter()
|
||||
* @regs: Pointer to pt_regs (exception entry regs)
|
||||
* @state: Return value from matching call to idtentry_enter()
|
||||
*
|
||||
* Depending on the return target (kernel/user) this runs the necessary
|
||||
* preemption and work checks if possible and reguired and returns to
|
||||
* the caller with interrupts disabled and no further work pending.
|
||||
*
|
||||
* This is the last action before returning to the low level ASM code which
|
||||
* just needs to return to the appropriate context.
|
||||
*
|
||||
* Counterpart to idtentry_enter(). The return value of the entry
|
||||
* function must be fed into the @state argument.
|
||||
*/
|
||||
noinstr void idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/* Check whether this returns to user mode */
|
||||
if (user_mode(regs)) {
|
||||
prepare_exit_to_usermode(regs);
|
||||
} else if (regs->flags & X86_EFLAGS_IF) {
|
||||
/*
|
||||
* If RCU was not watching on entry this needs to be done
|
||||
* carefully and needs the same ordering of lockdep/tracing
|
||||
* and RCU as the return to user mode path.
|
||||
*/
|
||||
if (state.exit_rcu) {
|
||||
instrumentation_begin();
|
||||
/* Tell the tracer that IRET will enable interrupts */
|
||||
trace_hardirqs_on_prepare();
|
||||
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
||||
instrumentation_end();
|
||||
rcu_irq_exit();
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
return;
|
||||
}
|
||||
|
||||
instrumentation_begin();
|
||||
idtentry_exit_cond_resched(regs, IS_ENABLED(CONFIG_PREEMPTION));
|
||||
instrumentation_end();
|
||||
} else {
|
||||
/*
|
||||
* IRQ flags state is correct already. Just tell RCU if it
|
||||
* was not watching on entry.
|
||||
*/
|
||||
if (state.exit_rcu)
|
||||
rcu_irq_exit();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* idtentry_enter_user - Handle state tracking on idtentry from user mode
|
||||
* @regs: Pointer to pt_regs of interrupted context
|
||||
*
|
||||
* Invokes enter_from_user_mode() to establish the proper context for
|
||||
* NOHZ_FULL. Otherwise scheduling on exit would not be possible.
|
||||
*/
|
||||
noinstr void idtentry_enter_user(struct pt_regs *regs)
|
||||
{
|
||||
check_user_regs(regs);
|
||||
enter_from_user_mode();
|
||||
}
|
||||
|
||||
/**
|
||||
* idtentry_exit_user - Handle return from exception to user mode
|
||||
* @regs: Pointer to pt_regs (exception entry regs)
|
||||
*
|
||||
* Runs the necessary preemption and work checks and returns to the caller
|
||||
* with interrupts disabled and no further work pending.
|
||||
*
|
||||
* This is the last action before returning to the low level ASM code which
|
||||
* just needs to return to the appropriate context.
|
||||
*
|
||||
* Counterpart to idtentry_enter_user().
|
||||
*/
|
||||
noinstr void idtentry_exit_user(struct pt_regs *regs)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
prepare_exit_to_usermode(regs);
|
||||
}
|
||||
|
||||
noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
|
||||
{
|
||||
bool irq_state = lockdep_hardirqs_enabled();
|
||||
|
@ -840,9 +282,9 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
|
|||
{
|
||||
struct pt_regs *old_regs;
|
||||
bool inhcall;
|
||||
idtentry_state_t state;
|
||||
irqentry_state_t state;
|
||||
|
||||
state = idtentry_enter(regs);
|
||||
state = irqentry_enter(regs);
|
||||
old_regs = set_irq_regs(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
|
@ -854,11 +296,11 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
|
|||
inhcall = get_and_clear_inhcall();
|
||||
if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
|
||||
instrumentation_begin();
|
||||
idtentry_exit_cond_resched(regs, true);
|
||||
irqentry_exit_cond_resched();
|
||||
instrumentation_end();
|
||||
restore_inhcall(inhcall);
|
||||
} else {
|
||||
idtentry_exit(regs, state);
|
||||
irqentry_exit(regs, state);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_XEN_PV */
|
||||
|
|
|
@ -846,7 +846,7 @@ SYM_CODE_START(ret_from_fork)
|
|||
2:
|
||||
/* When we fork, we trace the syscall return in the child, too. */
|
||||
movl %esp, %eax
|
||||
call syscall_return_slowpath
|
||||
call syscall_exit_to_user_mode
|
||||
jmp .Lsyscall_32_done
|
||||
|
||||
/* kernel thread */
|
||||
|
|
|
@ -283,7 +283,7 @@ SYM_CODE_START(ret_from_fork)
|
|||
2:
|
||||
UNWIND_HINT_REGS
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
call syscall_exit_to_user_mode /* returns with IRQs disabled */
|
||||
jmp swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
1:
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef _ASM_X86_ENTRY_COMMON_H
|
||||
#define _ASM_X86_ENTRY_COMMON_H
|
||||
|
||||
#include <linux/user-return-notifier.h>
|
||||
|
||||
#include <asm/nospec-branch.h>
|
||||
#include <asm/io_bitmap.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
/* Check that the stack and regs on entry from user mode are sane. */
|
||||
static __always_inline void arch_check_user_regs(struct pt_regs *regs)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
|
||||
/*
|
||||
* Make sure that the entry code gave us a sensible EFLAGS
|
||||
* register. Native because we want to check the actual CPU
|
||||
* state, not the interrupt state as imagined by Xen.
|
||||
*/
|
||||
unsigned long flags = native_save_fl();
|
||||
WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
|
||||
X86_EFLAGS_NT));
|
||||
|
||||
/* We think we came from user mode. Make sure pt_regs agrees. */
|
||||
WARN_ON_ONCE(!user_mode(regs));
|
||||
|
||||
/*
|
||||
* All entries from user mode (except #DF) should be on the
|
||||
* normal thread stack and should have user pt_regs in the
|
||||
* correct location.
|
||||
*/
|
||||
WARN_ON_ONCE(!on_thread_stack());
|
||||
WARN_ON_ONCE(regs != task_pt_regs(current));
|
||||
}
|
||||
}
|
||||
#define arch_check_user_regs arch_check_user_regs
|
||||
|
||||
#define ARCH_SYSCALL_EXIT_WORK (_TIF_SINGLESTEP)
|
||||
|
||||
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
||||
unsigned long ti_work)
|
||||
{
|
||||
if (ti_work & _TIF_USER_RETURN_NOTIFY)
|
||||
fire_user_return_notifiers();
|
||||
|
||||
if (unlikely(ti_work & _TIF_IO_BITMAP))
|
||||
tss_update_io_bitmap();
|
||||
|
||||
fpregs_assert_state_consistent();
|
||||
if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
|
||||
switch_fpu_return();
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
/*
|
||||
* Compat syscalls set TS_COMPAT. Make sure we clear it before
|
||||
* returning to user mode. We need to clear it *after* signal
|
||||
* handling, because syscall restart has a fixup for compat
|
||||
* syscalls. The fixup is exercised by the ptrace_syscall_32
|
||||
* selftest.
|
||||
*
|
||||
* We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
|
||||
* special case only applies after poking regs and before the
|
||||
* very next return to user mode.
|
||||
*/
|
||||
current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED);
|
||||
#endif
|
||||
}
|
||||
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
|
||||
|
||||
static __always_inline void arch_exit_to_user_mode(void)
|
||||
{
|
||||
mds_user_clear_cpu_buffers();
|
||||
}
|
||||
#define arch_exit_to_user_mode arch_exit_to_user_mode
|
||||
|
||||
#endif
|
|
@ -6,20 +6,11 @@
|
|||
#include <asm/trapnr.h>
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/entry-common.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/irq_stack.h>
|
||||
|
||||
void idtentry_enter_user(struct pt_regs *regs);
|
||||
void idtentry_exit_user(struct pt_regs *regs);
|
||||
|
||||
typedef struct idtentry_state {
|
||||
bool exit_rcu;
|
||||
} idtentry_state_t;
|
||||
|
||||
idtentry_state_t idtentry_enter(struct pt_regs *regs);
|
||||
void idtentry_exit(struct pt_regs *regs, idtentry_state_t state);
|
||||
|
||||
bool idtentry_enter_nmi(struct pt_regs *regs);
|
||||
void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
|
||||
|
||||
|
@ -52,8 +43,8 @@ void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
|
|||
* The macro is written so it acts as function definition. Append the
|
||||
* body with a pair of curly brackets.
|
||||
*
|
||||
* idtentry_enter() contains common code which has to be invoked before
|
||||
* arbitrary code in the body. idtentry_exit() contains common code
|
||||
* irqentry_enter() contains common code which has to be invoked before
|
||||
* arbitrary code in the body. irqentry_exit() contains common code
|
||||
* which has to run before returning to the low level assembly code.
|
||||
*/
|
||||
#define DEFINE_IDTENTRY(func) \
|
||||
|
@ -61,12 +52,12 @@ static __always_inline void __##func(struct pt_regs *regs); \
|
|||
\
|
||||
__visible noinstr void func(struct pt_regs *regs) \
|
||||
{ \
|
||||
idtentry_state_t state = idtentry_enter(regs); \
|
||||
irqentry_state_t state = irqentry_enter(regs); \
|
||||
\
|
||||
instrumentation_begin(); \
|
||||
__##func (regs); \
|
||||
instrumentation_end(); \
|
||||
idtentry_exit(regs, state); \
|
||||
irqentry_exit(regs, state); \
|
||||
} \
|
||||
\
|
||||
static __always_inline void __##func(struct pt_regs *regs)
|
||||
|
@ -108,12 +99,12 @@ static __always_inline void __##func(struct pt_regs *regs, \
|
|||
__visible noinstr void func(struct pt_regs *regs, \
|
||||
unsigned long error_code) \
|
||||
{ \
|
||||
idtentry_state_t state = idtentry_enter(regs); \
|
||||
irqentry_state_t state = irqentry_enter(regs); \
|
||||
\
|
||||
instrumentation_begin(); \
|
||||
__##func (regs, error_code); \
|
||||
instrumentation_end(); \
|
||||
idtentry_exit(regs, state); \
|
||||
irqentry_exit(regs, state); \
|
||||
} \
|
||||
\
|
||||
static __always_inline void __##func(struct pt_regs *regs, \
|
||||
|
@ -168,7 +159,7 @@ __visible noinstr void func(struct pt_regs *regs)
|
|||
* body with a pair of curly brackets.
|
||||
*
|
||||
* Contrary to DEFINE_IDTENTRY_ERRORCODE() this does not invoke the
|
||||
* idtentry_enter/exit() helpers before and after the body invocation. This
|
||||
* irqentry_enter/exit() helpers before and after the body invocation. This
|
||||
* needs to be done in the body itself if applicable. Use if extra work
|
||||
* is required before the enter/exit() helpers are invoked.
|
||||
*/
|
||||
|
@ -194,11 +185,9 @@ __visible noinstr void func(struct pt_regs *regs, unsigned long error_code)
|
|||
* to the function as error_code argument which needs to be truncated
|
||||
* to an u8 because the push is sign extending.
|
||||
*
|
||||
* On 64-bit idtentry_enter/exit() are invoked in the ASM entry code before
|
||||
* and after switching to the interrupt stack. On 32-bit this happens in C.
|
||||
*
|
||||
* irq_enter/exit_rcu() are invoked before the function body and the
|
||||
* KVM L1D flush request is set.
|
||||
* KVM L1D flush request is set. Stack switching to the interrupt stack
|
||||
* has to be done in the function body if necessary.
|
||||
*/
|
||||
#define DEFINE_IDTENTRY_IRQ(func) \
|
||||
static __always_inline void __##func(struct pt_regs *regs, u8 vector); \
|
||||
|
@ -206,7 +195,7 @@ static __always_inline void __##func(struct pt_regs *regs, u8 vector); \
|
|||
__visible noinstr void func(struct pt_regs *regs, \
|
||||
unsigned long error_code) \
|
||||
{ \
|
||||
idtentry_state_t state = idtentry_enter(regs); \
|
||||
irqentry_state_t state = irqentry_enter(regs); \
|
||||
\
|
||||
instrumentation_begin(); \
|
||||
irq_enter_rcu(); \
|
||||
|
@ -214,7 +203,7 @@ __visible noinstr void func(struct pt_regs *regs, \
|
|||
__##func (regs, (u8)error_code); \
|
||||
irq_exit_rcu(); \
|
||||
instrumentation_end(); \
|
||||
idtentry_exit(regs, state); \
|
||||
irqentry_exit(regs, state); \
|
||||
} \
|
||||
\
|
||||
static __always_inline void __##func(struct pt_regs *regs, u8 vector)
|
||||
|
@ -238,7 +227,7 @@ static __always_inline void __##func(struct pt_regs *regs, u8 vector)
|
|||
* DEFINE_IDTENTRY_SYSVEC - Emit code for system vector IDT entry points
|
||||
* @func: Function name of the entry point
|
||||
*
|
||||
* idtentry_enter/exit() and irq_enter/exit_rcu() are invoked before the
|
||||
* irqentry_enter/exit() and irq_enter/exit_rcu() are invoked before the
|
||||
* function body. KVM L1D flush request is set.
|
||||
*
|
||||
* Runs the function on the interrupt stack if the entry hit kernel mode
|
||||
|
@ -248,7 +237,7 @@ static void __##func(struct pt_regs *regs); \
|
|||
\
|
||||
__visible noinstr void func(struct pt_regs *regs) \
|
||||
{ \
|
||||
idtentry_state_t state = idtentry_enter(regs); \
|
||||
irqentry_state_t state = irqentry_enter(regs); \
|
||||
\
|
||||
instrumentation_begin(); \
|
||||
irq_enter_rcu(); \
|
||||
|
@ -256,7 +245,7 @@ __visible noinstr void func(struct pt_regs *regs) \
|
|||
run_on_irqstack_cond(__##func, regs, regs); \
|
||||
irq_exit_rcu(); \
|
||||
instrumentation_end(); \
|
||||
idtentry_exit(regs, state); \
|
||||
irqentry_exit(regs, state); \
|
||||
} \
|
||||
\
|
||||
static noinline void __##func(struct pt_regs *regs)
|
||||
|
@ -277,7 +266,7 @@ static __always_inline void __##func(struct pt_regs *regs); \
|
|||
\
|
||||
__visible noinstr void func(struct pt_regs *regs) \
|
||||
{ \
|
||||
idtentry_state_t state = idtentry_enter(regs); \
|
||||
irqentry_state_t state = irqentry_enter(regs); \
|
||||
\
|
||||
instrumentation_begin(); \
|
||||
__irq_enter_raw(); \
|
||||
|
@ -285,7 +274,7 @@ __visible noinstr void func(struct pt_regs *regs) \
|
|||
__##func (regs); \
|
||||
__irq_exit_raw(); \
|
||||
instrumentation_end(); \
|
||||
idtentry_exit(regs, state); \
|
||||
irqentry_exit(regs, state); \
|
||||
} \
|
||||
\
|
||||
static __always_inline void __##func(struct pt_regs *regs)
|
||||
|
|
|
@ -209,6 +209,11 @@ static inline void user_stack_pointer_set(struct pt_regs *regs,
|
|||
regs->sp = val;
|
||||
}
|
||||
|
||||
static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
|
||||
{
|
||||
return !(regs->flags & X86_EFLAGS_IF);
|
||||
}
|
||||
|
||||
/* Query offset/name of register from its name/offset */
|
||||
extern int regs_query_register_offset(const char *name);
|
||||
extern const char *regs_query_register_name(unsigned int offset);
|
||||
|
|
|
@ -35,7 +35,6 @@ typedef sigset_t compat_sigset_t;
|
|||
#endif /* __ASSEMBLY__ */
|
||||
#include <uapi/asm/signal.h>
|
||||
#ifndef __ASSEMBLY__
|
||||
extern void do_signal(struct pt_regs *regs);
|
||||
|
||||
#define __ARCH_HAS_SA_RESTORER
|
||||
|
||||
|
|
|
@ -133,11 +133,6 @@ struct thread_info {
|
|||
#define _TIF_X32 (1 << TIF_X32)
|
||||
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
|
||||
|
||||
/* Work to do before invoking the actual syscall. */
|
||||
#define _TIF_WORK_SYSCALL_ENTRY \
|
||||
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
|
||||
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
|
||||
|
||||
/* flags to check in __switch_to() */
|
||||
#define _TIF_WORK_CTXSW_BASE \
|
||||
(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
|
||||
|
|
|
@ -1047,7 +1047,7 @@ static __always_inline int patch_cmp(const void *key, const void *elt)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int noinstr poke_int3_handler(struct pt_regs *regs)
|
||||
noinstr int poke_int3_handler(struct pt_regs *regs)
|
||||
{
|
||||
struct bp_patching_desc *desc;
|
||||
struct text_poke_loc *tp;
|
||||
|
|
|
@ -1215,7 +1215,7 @@ static void kill_me_maybe(struct callback_head *cb)
|
|||
* backing the user stack, tracing that reads the user stack will cause
|
||||
* potentially infinite recursion.
|
||||
*/
|
||||
void noinstr do_machine_check(struct pt_regs *regs)
|
||||
noinstr void do_machine_check(struct pt_regs *regs)
|
||||
{
|
||||
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
|
||||
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
|
||||
|
@ -1930,11 +1930,11 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
|
|||
|
||||
static __always_inline void exc_machine_check_user(struct pt_regs *regs)
|
||||
{
|
||||
idtentry_enter_user(regs);
|
||||
irqentry_enter_from_user_mode(regs);
|
||||
instrumentation_begin();
|
||||
machine_check_vector(regs);
|
||||
instrumentation_end();
|
||||
idtentry_exit_user(regs);
|
||||
irqentry_exit_to_user_mode(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
|
|
@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
|
|||
noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
|
||||
{
|
||||
u32 reason = kvm_read_and_reset_apf_flags();
|
||||
idtentry_state_t state;
|
||||
irqentry_state_t state;
|
||||
|
||||
switch (reason) {
|
||||
case KVM_PV_REASON_PAGE_NOT_PRESENT:
|
||||
|
@ -243,7 +243,7 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
|
|||
return false;
|
||||
}
|
||||
|
||||
state = idtentry_enter(regs);
|
||||
state = irqentry_enter(regs);
|
||||
instrumentation_begin();
|
||||
|
||||
/*
|
||||
|
@ -264,7 +264,7 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
|
|||
}
|
||||
|
||||
instrumentation_end();
|
||||
idtentry_exit(regs, state);
|
||||
irqentry_exit(regs, state);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <linux/user-return-notifier.h>
|
||||
#include <linux/uprobes.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/entry-common.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
|
@ -803,7 +804,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
|
|||
* want to handle. Thus you cannot kill init even with a SIGKILL even by
|
||||
* mistake.
|
||||
*/
|
||||
void do_signal(struct pt_regs *regs)
|
||||
void arch_do_signal(struct pt_regs *regs)
|
||||
{
|
||||
struct ksignal ksig;
|
||||
|
||||
|
|
|
@ -245,7 +245,7 @@ static noinstr bool handle_bug(struct pt_regs *regs)
|
|||
|
||||
DEFINE_IDTENTRY_RAW(exc_invalid_op)
|
||||
{
|
||||
idtentry_state_t state;
|
||||
irqentry_state_t state;
|
||||
|
||||
/*
|
||||
* We use UD2 as a short encoding for 'CALL __WARN', as such
|
||||
|
@ -255,11 +255,11 @@ DEFINE_IDTENTRY_RAW(exc_invalid_op)
|
|||
if (!user_mode(regs) && handle_bug(regs))
|
||||
return;
|
||||
|
||||
state = idtentry_enter(regs);
|
||||
state = irqentry_enter(regs);
|
||||
instrumentation_begin();
|
||||
handle_invalid_op(regs);
|
||||
instrumentation_end();
|
||||
idtentry_exit(regs, state);
|
||||
irqentry_exit(regs, state);
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY(exc_coproc_segment_overrun)
|
||||
|
@ -638,18 +638,18 @@ DEFINE_IDTENTRY_RAW(exc_int3)
|
|||
return;
|
||||
|
||||
/*
|
||||
* idtentry_enter_user() uses static_branch_{,un}likely() and therefore
|
||||
* can trigger INT3, hence poke_int3_handler() must be done
|
||||
* before. If the entry came from kernel mode, then use nmi_enter()
|
||||
* because the INT3 could have been hit in any context including
|
||||
* NMI.
|
||||
* irqentry_enter_from_user_mode() uses static_branch_{,un}likely()
|
||||
* and therefore can trigger INT3, hence poke_int3_handler() must
|
||||
* be done before. If the entry came from kernel mode, then use
|
||||
* nmi_enter() because the INT3 could have been hit in any context
|
||||
* including NMI.
|
||||
*/
|
||||
if (user_mode(regs)) {
|
||||
idtentry_enter_user(regs);
|
||||
irqentry_enter_from_user_mode(regs);
|
||||
instrumentation_begin();
|
||||
do_int3_user(regs);
|
||||
instrumentation_end();
|
||||
idtentry_exit_user(regs);
|
||||
irqentry_exit_to_user_mode(regs);
|
||||
} else {
|
||||
bool irq_state = idtentry_enter_nmi(regs);
|
||||
instrumentation_begin();
|
||||
|
@ -895,13 +895,13 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
|
|||
*/
|
||||
WARN_ON_ONCE(!user_mode(regs));
|
||||
|
||||
idtentry_enter_user(regs);
|
||||
irqentry_enter_from_user_mode(regs);
|
||||
instrumentation_begin();
|
||||
|
||||
handle_debug(regs, dr6, true);
|
||||
|
||||
instrumentation_end();
|
||||
idtentry_exit_user(regs);
|
||||
irqentry_exit_to_user_mode(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
|
|
@ -42,6 +42,7 @@ config KVM
|
|||
select HAVE_KVM_MSI
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_NO_POLL
|
||||
select KVM_XFER_TO_GUEST_WORK
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select KVM_VFIO
|
||||
select SRCU
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/tboot.h>
|
||||
#include <linux/trace_events.h>
|
||||
#include <linux/entry-kvm.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
#include <asm/asm.h>
|
||||
|
@ -5373,14 +5374,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
/*
|
||||
* Note, return 1 and not 0, vcpu_run() is responsible for
|
||||
* morphing the pending signal into the proper return code.
|
||||
* Note, return 1 and not 0, vcpu_run() will invoke
|
||||
* xfer_to_guest_mode() which will create a proper return
|
||||
* code.
|
||||
*/
|
||||
if (signal_pending(current))
|
||||
if (__xfer_to_guest_mode_work_pending())
|
||||
return 1;
|
||||
|
||||
if (need_resched())
|
||||
schedule();
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
|
|
@ -56,6 +56,7 @@
|
|||
#include <linux/sched/stat.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/mem_encrypt.h>
|
||||
#include <linux/entry-kvm.h>
|
||||
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
|
@ -1587,7 +1588,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
|
|||
bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
|
||||
need_resched() || signal_pending(current);
|
||||
xfer_to_guest_mode_work_pending();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request);
|
||||
|
||||
|
@ -8681,15 +8682,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
|||
break;
|
||||
}
|
||||
|
||||
if (signal_pending(current)) {
|
||||
r = -EINTR;
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
||||
++vcpu->stat.signal_exits;
|
||||
break;
|
||||
}
|
||||
if (need_resched()) {
|
||||
if (__xfer_to_guest_mode_work_pending()) {
|
||||
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
|
||||
cond_resched();
|
||||
r = xfer_to_guest_mode_handle_work(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1377,7 +1377,7 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code,
|
|||
DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
|
||||
{
|
||||
unsigned long address = read_cr2();
|
||||
idtentry_state_t state;
|
||||
irqentry_state_t state;
|
||||
|
||||
prefetchw(¤t->mm->mmap_lock);
|
||||
|
||||
|
@ -1412,11 +1412,11 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
|
|||
* code reenabled RCU to avoid subsequent wreckage which helps
|
||||
* debugability.
|
||||
*/
|
||||
state = idtentry_enter(regs);
|
||||
state = irqentry_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
handle_page_fault(regs, error_code, address);
|
||||
instrumentation_end();
|
||||
|
||||
idtentry_exit(regs, state);
|
||||
irqentry_exit(regs, state);
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче