Merge branch 'rcu/idle' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu
Pull the RCU adaptive-idle feature from Paul E. McKenney: "This series adds RCU APIs that allow non-idle tasks to enter RCU idle mode and provides x86 code to make use of them, allowing RCU to treat user-mode execution as an extended quiescent state when the new RCU_USER_QS kernel configuration parameter is specified. Work is in progress to port this to a few other architectures, but is not part of this series." Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Коммит
fa34da708c
10
arch/Kconfig
10
arch/Kconfig
|
@ -281,4 +281,14 @@ config SECCOMP_FILTER
|
|||
|
||||
See Documentation/prctl/seccomp_filter.txt for details.
|
||||
|
||||
config HAVE_RCU_USER_QS
|
||||
bool
|
||||
help
|
||||
Provide kernel entry/exit hooks necessary for userspace
|
||||
RCU extended quiescent state. Syscalls need to be wrapped inside
|
||||
rcu_user_exit()-rcu_user_enter() through the slow path using
|
||||
TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs
|
||||
are already protected inside rcu_irq_enter/rcu_irq_exit() but
|
||||
preemption or signal handling on irq exit still need to be protected.
|
||||
|
||||
source "kernel/gcov/Kconfig"
|
||||
|
|
|
@ -705,6 +705,7 @@ static void stack_proc(void *arg)
|
|||
struct task_struct *from = current, *to = arg;
|
||||
|
||||
to->thread.saved_task = from;
|
||||
rcu_switch(from, to);
|
||||
switch_to(from, to, from);
|
||||
}
|
||||
|
||||
|
|
|
@ -97,6 +97,7 @@ config X86
|
|||
select KTIME_SCALAR if X86_32
|
||||
select GENERIC_STRNCPY_FROM_USER
|
||||
select GENERIC_STRNLEN_USER
|
||||
select HAVE_RCU_USER_QS if X86_64
|
||||
|
||||
config INSTRUCTION_DECODER
|
||||
def_bool (KPROBES || PERF_EVENTS || UPROBES)
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
#ifndef _ASM_X86_RCU_H
|
||||
#define _ASM_X86_RCU_H
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/rcupdate.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
static inline void exception_enter(struct pt_regs *regs)
|
||||
{
|
||||
rcu_user_exit();
|
||||
}
|
||||
|
||||
static inline void exception_exit(struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
if (user_mode(regs))
|
||||
rcu_user_enter();
|
||||
#endif
|
||||
}
|
||||
|
||||
#else /* __ASSEMBLY__ */
|
||||
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
# define SCHEDULE_USER call schedule_user
|
||||
#else
|
||||
# define SCHEDULE_USER call schedule
|
||||
#endif
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#endif
|
|
@ -89,6 +89,7 @@ struct thread_info {
|
|||
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
|
||||
#define TIF_IA32 17 /* IA32 compatibility process */
|
||||
#define TIF_FORK 18 /* ret_from_fork */
|
||||
#define TIF_NOHZ 19 /* in adaptive nohz mode */
|
||||
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
|
||||
#define TIF_DEBUG 21 /* uses debug registers */
|
||||
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
|
||||
|
@ -114,6 +115,7 @@ struct thread_info {
|
|||
#define _TIF_NOTSC (1 << TIF_NOTSC)
|
||||
#define _TIF_IA32 (1 << TIF_IA32)
|
||||
#define _TIF_FORK (1 << TIF_FORK)
|
||||
#define _TIF_NOHZ (1 << TIF_NOHZ)
|
||||
#define _TIF_DEBUG (1 << TIF_DEBUG)
|
||||
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
|
||||
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
|
||||
|
@ -126,12 +128,13 @@ struct thread_info {
|
|||
/* work to do in syscall_trace_enter() */
|
||||
#define _TIF_WORK_SYSCALL_ENTRY \
|
||||
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
|
||||
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
|
||||
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
|
||||
_TIF_NOHZ)
|
||||
|
||||
/* work to do in syscall_trace_leave() */
|
||||
#define _TIF_WORK_SYSCALL_EXIT \
|
||||
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
|
||||
_TIF_SYSCALL_TRACEPOINT)
|
||||
_TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
|
||||
|
||||
/* work to do on interrupt/exception return */
|
||||
#define _TIF_WORK_MASK \
|
||||
|
@ -141,7 +144,8 @@ struct thread_info {
|
|||
|
||||
/* work to do on any return to user space */
|
||||
#define _TIF_ALLWORK_MASK \
|
||||
((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT)
|
||||
((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
|
||||
_TIF_NOHZ)
|
||||
|
||||
/* Only used for 64 bit */
|
||||
#define _TIF_DO_NOTIFY_MASK \
|
||||
|
|
|
@ -56,6 +56,7 @@
|
|||
#include <asm/ftrace.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/rcu.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
|
||||
|
@ -565,7 +566,7 @@ sysret_careful:
|
|||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
pushq_cfi %rdi
|
||||
call schedule
|
||||
SCHEDULE_USER
|
||||
popq_cfi %rdi
|
||||
jmp sysret_check
|
||||
|
||||
|
@ -678,7 +679,7 @@ int_careful:
|
|||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
pushq_cfi %rdi
|
||||
call schedule
|
||||
SCHEDULE_USER
|
||||
popq_cfi %rdi
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
|
@ -974,7 +975,7 @@ retint_careful:
|
|||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
pushq_cfi %rdi
|
||||
call schedule
|
||||
SCHEDULE_USER
|
||||
popq_cfi %rdi
|
||||
GET_THREAD_INFO(%rcx)
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
@ -1449,7 +1450,7 @@ paranoid_userspace:
|
|||
paranoid_schedule:
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_ANY)
|
||||
call schedule
|
||||
SCHEDULE_USER
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
jmp paranoid_userspace
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <linux/signal.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
@ -1463,6 +1464,8 @@ long syscall_trace_enter(struct pt_regs *regs)
|
|||
{
|
||||
long ret = 0;
|
||||
|
||||
rcu_user_exit();
|
||||
|
||||
/*
|
||||
* If we stepped into a sysenter/syscall insn, it trapped in
|
||||
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
|
||||
|
@ -1526,4 +1529,6 @@ void syscall_trace_leave(struct pt_regs *regs)
|
|||
!test_thread_flag(TIF_SYSCALL_EMU);
|
||||
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
|
||||
tracehook_report_syscall_exit(regs, step);
|
||||
|
||||
rcu_user_enter();
|
||||
}
|
||||
|
|
|
@ -779,6 +779,8 @@ static void do_signal(struct pt_regs *regs)
|
|||
void
|
||||
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
{
|
||||
rcu_user_exit();
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
/* notify userspace of pending MCEs */
|
||||
if (thread_info_flags & _TIF_MCE_NOTIFY)
|
||||
|
@ -804,6 +806,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
|||
#ifdef CONFIG_X86_32
|
||||
clear_thread_flag(TIF_IRET);
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
rcu_user_enter();
|
||||
}
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
||||
|
|
|
@ -55,6 +55,7 @@
|
|||
#include <asm/i387.h>
|
||||
#include <asm/fpu-internal.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/rcu.h>
|
||||
|
||||
#include <asm/mach_traps.h>
|
||||
|
||||
|
@ -180,11 +181,15 @@ vm86_trap:
|
|||
#define DO_ERROR(trapnr, signr, str, name) \
|
||||
dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
|
||||
{ \
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
||||
== NOTIFY_STOP) \
|
||||
exception_enter(regs); \
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, \
|
||||
trapnr, signr) == NOTIFY_STOP) { \
|
||||
exception_exit(regs); \
|
||||
return; \
|
||||
} \
|
||||
conditional_sti(regs); \
|
||||
do_trap(trapnr, signr, str, regs, error_code, NULL); \
|
||||
exception_exit(regs); \
|
||||
}
|
||||
|
||||
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
|
||||
|
@ -195,11 +200,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
|
|||
info.si_errno = 0; \
|
||||
info.si_code = sicode; \
|
||||
info.si_addr = (void __user *)siaddr; \
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
||||
== NOTIFY_STOP) \
|
||||
exception_enter(regs); \
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, \
|
||||
trapnr, signr) == NOTIFY_STOP) { \
|
||||
exception_exit(regs); \
|
||||
return; \
|
||||
} \
|
||||
conditional_sti(regs); \
|
||||
do_trap(trapnr, signr, str, regs, error_code, &info); \
|
||||
exception_exit(regs); \
|
||||
}
|
||||
|
||||
DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
|
||||
|
@ -222,12 +231,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
|
|||
/* Runs on IST stack */
|
||||
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
exception_enter(regs);
|
||||
if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
|
||||
X86_TRAP_SS, SIGBUS) == NOTIFY_STOP)
|
||||
return;
|
||||
preempt_conditional_sti(regs);
|
||||
do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
|
||||
preempt_conditional_cli(regs);
|
||||
X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
|
||||
preempt_conditional_sti(regs);
|
||||
do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
|
||||
preempt_conditional_cli(regs);
|
||||
}
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
|
@ -235,6 +246,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
|||
static const char str[] = "double fault";
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
exception_enter(regs);
|
||||
/* Return not checked because double check cannot be ignored */
|
||||
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
|
||||
|
||||
|
@ -255,16 +267,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
|||
{
|
||||
struct task_struct *tsk;
|
||||
|
||||
exception_enter(regs);
|
||||
conditional_sti(regs);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
if (regs->flags & X86_VM_MASK)
|
||||
goto gp_in_vm86;
|
||||
if (regs->flags & X86_VM_MASK) {
|
||||
local_irq_enable();
|
||||
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
||||
goto exit;
|
||||
}
|
||||
#endif
|
||||
|
||||
tsk = current;
|
||||
if (!user_mode(regs))
|
||||
goto gp_in_kernel;
|
||||
if (!user_mode(regs)) {
|
||||
if (fixup_exception(regs))
|
||||
goto exit;
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
|
||||
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
|
||||
die("general protection fault", regs, error_code);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
|
@ -279,25 +304,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
|||
}
|
||||
|
||||
force_sig(SIGSEGV, tsk);
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
gp_in_vm86:
|
||||
local_irq_enable();
|
||||
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
||||
return;
|
||||
#endif
|
||||
|
||||
gp_in_kernel:
|
||||
if (fixup_exception(regs))
|
||||
return;
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
|
||||
X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
|
||||
return;
|
||||
die("general protection fault", regs, error_code);
|
||||
exit:
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
/* May run on IST stack. */
|
||||
|
@ -312,15 +320,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
|
|||
ftrace_int3_handler(regs))
|
||||
return;
|
||||
#endif
|
||||
exception_enter(regs);
|
||||
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
|
||||
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
return;
|
||||
goto exit;
|
||||
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
|
||||
|
||||
if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
return;
|
||||
goto exit;
|
||||
|
||||
/*
|
||||
* Let others (NMI) know that the debug stack is in use
|
||||
|
@ -331,6 +340,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
|
|||
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
|
||||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
exit:
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -391,6 +402,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|||
unsigned long dr6;
|
||||
int si_code;
|
||||
|
||||
exception_enter(regs);
|
||||
|
||||
get_debugreg(dr6, 6);
|
||||
|
||||
/* Filter out all the reserved bits which are preset to 1 */
|
||||
|
@ -406,7 +419,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|||
|
||||
/* Catch kmemcheck conditions first of all! */
|
||||
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
|
||||
return;
|
||||
goto exit;
|
||||
|
||||
/* DR6 may or may not be cleared by the CPU */
|
||||
set_debugreg(0, 6);
|
||||
|
@ -421,7 +434,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|||
|
||||
if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
return;
|
||||
goto exit;
|
||||
|
||||
/*
|
||||
* Let others (NMI) know that the debug stack is in use
|
||||
|
@ -437,7 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|||
X86_TRAP_DB);
|
||||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
return;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -458,7 +471,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|||
preempt_conditional_cli(regs);
|
||||
debug_stack_usage_dec();
|
||||
|
||||
return;
|
||||
exit:
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -555,14 +569,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
|
|||
#ifdef CONFIG_X86_32
|
||||
ignore_fpu_irq = 1;
|
||||
#endif
|
||||
|
||||
exception_enter(regs);
|
||||
math_error(regs, error_code, X86_TRAP_MF);
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
exception_enter(regs);
|
||||
math_error(regs, error_code, X86_TRAP_XF);
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
|
@ -629,6 +646,7 @@ EXPORT_SYMBOL_GPL(math_state_restore);
|
|||
dotraplinkage void __kprobes
|
||||
do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
exception_enter(regs);
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
if (read_cr0() & X86_CR0_EM) {
|
||||
struct math_emu_info info = { };
|
||||
|
@ -637,6 +655,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
|||
|
||||
info.regs = regs;
|
||||
math_emulate(&info);
|
||||
exception_exit(regs);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -644,12 +663,15 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
|||
#ifdef CONFIG_X86_32
|
||||
conditional_sti(regs);
|
||||
#endif
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
siginfo_t info;
|
||||
|
||||
exception_enter(regs);
|
||||
local_irq_enable();
|
||||
|
||||
info.si_signo = SIGILL;
|
||||
|
@ -657,10 +679,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
|||
info.si_code = ILL_BADSTK;
|
||||
info.si_addr = NULL;
|
||||
if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
|
||||
X86_TRAP_IRET, SIGILL) == NOTIFY_STOP)
|
||||
return;
|
||||
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
|
||||
&info);
|
||||
X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
|
||||
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
|
||||
&info);
|
||||
}
|
||||
exception_exit(regs);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <asm/pgalloc.h> /* pgd_*(), ... */
|
||||
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
|
||||
#include <asm/fixmap.h> /* VSYSCALL_START */
|
||||
#include <asm/rcu.h> /* exception_enter(), ... */
|
||||
|
||||
/*
|
||||
* Page fault error code bits:
|
||||
|
@ -1000,8 +1001,8 @@ static int fault_in_kernel_space(unsigned long address)
|
|||
* and the problem, and then passes it off to one of the appropriate
|
||||
* routines.
|
||||
*/
|
||||
dotraplinkage void __kprobes
|
||||
do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
static void __kprobes
|
||||
__do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct task_struct *tsk;
|
||||
|
@ -1209,3 +1210,11 @@ good_area:
|
|||
|
||||
up_read(&mm->mmap_sem);
|
||||
}
|
||||
|
||||
dotraplinkage void __kprobes
|
||||
do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
{
|
||||
exception_enter(regs);
|
||||
__do_page_fault(regs, error_code);
|
||||
exception_exit(regs);
|
||||
}
|
||||
|
|
|
@ -191,6 +191,21 @@ extern void rcu_idle_enter(void);
|
|||
extern void rcu_idle_exit(void);
|
||||
extern void rcu_irq_enter(void);
|
||||
extern void rcu_irq_exit(void);
|
||||
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
extern void rcu_user_enter(void);
|
||||
extern void rcu_user_exit(void);
|
||||
extern void rcu_user_enter_after_irq(void);
|
||||
extern void rcu_user_exit_after_irq(void);
|
||||
extern void rcu_user_hooks_switch(struct task_struct *prev,
|
||||
struct task_struct *next);
|
||||
#else
|
||||
static inline void rcu_user_enter(void) { }
|
||||
static inline void rcu_user_exit(void) { }
|
||||
static inline void rcu_user_enter_after_irq(void) { }
|
||||
static inline void rcu_user_exit_after_irq(void) { }
|
||||
#endif /* CONFIG_RCU_USER_QS */
|
||||
|
||||
extern void exit_rcu(void);
|
||||
|
||||
/**
|
||||
|
|
|
@ -1885,6 +1885,14 @@ static inline void rcu_copy_process(struct task_struct *p)
|
|||
|
||||
#endif
|
||||
|
||||
static inline void rcu_switch(struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
rcu_user_hooks_switch(prev, next);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void tsk_restore_flags(struct task_struct *task,
|
||||
unsigned long orig_flags, unsigned long flags)
|
||||
{
|
||||
|
|
18
init/Kconfig
18
init/Kconfig
|
@ -441,6 +441,24 @@ config PREEMPT_RCU
|
|||
This option enables preemptible-RCU code that is common between
|
||||
the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
|
||||
|
||||
config RCU_USER_QS
|
||||
bool "Consider userspace as in RCU extended quiescent state"
|
||||
depends on HAVE_RCU_USER_QS && SMP
|
||||
help
|
||||
This option sets hooks on kernel / userspace boundaries and
|
||||
puts RCU in extended quiescent state when the CPU runs in
|
||||
userspace. It means that when a CPU runs in userspace, it is
|
||||
excluded from the global RCU state machine and thus doesn't
|
||||
to keep the timer tick on for RCU.
|
||||
|
||||
config RCU_USER_QS_FORCE
|
||||
bool "Force userspace extended QS by default"
|
||||
depends on RCU_USER_QS
|
||||
help
|
||||
Set the hooks in user/kernel boundaries by default in order to
|
||||
test this feature that treats userspace as an extended quiescent
|
||||
state until we have a real user like a full adaptive nohz option.
|
||||
|
||||
config RCU_FANOUT
|
||||
int "Tree-based hierarchical RCU fanout value"
|
||||
range 2 64 if 64BIT
|
||||
|
|
212
kernel/rcutree.c
212
kernel/rcutree.c
|
@ -206,6 +206,9 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
|
|||
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
|
||||
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
|
||||
.dynticks = ATOMIC_INIT(1),
|
||||
#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
|
||||
.ignore_user_qs = true,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
|
||||
|
@ -322,16 +325,17 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
|
|||
}
|
||||
|
||||
/*
|
||||
* rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
|
||||
* rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
|
||||
*
|
||||
* If the new value of the ->dynticks_nesting counter now is zero,
|
||||
* we really have entered idle, and must do the appropriate accounting.
|
||||
* The caller must have disabled interrupts.
|
||||
*/
|
||||
static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
|
||||
static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
|
||||
bool user)
|
||||
{
|
||||
trace_rcu_dyntick("Start", oldval, 0);
|
||||
if (!is_idle_task(current)) {
|
||||
if (!user && !is_idle_task(current)) {
|
||||
struct task_struct *idle = idle_task(smp_processor_id());
|
||||
|
||||
trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
|
||||
|
@ -348,7 +352,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
|
|||
WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
|
||||
|
||||
/*
|
||||
* The idle task is not permitted to enter the idle loop while
|
||||
* It is illegal to enter an extended quiescent state while
|
||||
* in an RCU read-side critical section.
|
||||
*/
|
||||
rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
|
||||
|
@ -359,6 +363,25 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
|
|||
"Illegal idle entry in RCU-sched read-side critical section.");
|
||||
}
|
||||
|
||||
/*
|
||||
* Enter an RCU extended quiescent state, which can be either the
|
||||
* idle loop or adaptive-tickless usermode execution.
|
||||
*/
|
||||
static void rcu_eqs_enter(bool user)
|
||||
{
|
||||
long long oldval;
|
||||
struct rcu_dynticks *rdtp;
|
||||
|
||||
rdtp = &__get_cpu_var(rcu_dynticks);
|
||||
oldval = rdtp->dynticks_nesting;
|
||||
WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
|
||||
if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
|
||||
rdtp->dynticks_nesting = 0;
|
||||
else
|
||||
rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
|
||||
rcu_eqs_enter_common(rdtp, oldval, user);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_idle_enter - inform RCU that current CPU is entering idle
|
||||
*
|
||||
|
@ -374,21 +397,70 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
|
|||
void rcu_idle_enter(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
long long oldval;
|
||||
|
||||
local_irq_save(flags);
|
||||
rcu_eqs_enter(false);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_idle_enter);
|
||||
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
/**
|
||||
* rcu_user_enter - inform RCU that we are resuming userspace.
|
||||
*
|
||||
* Enter RCU idle mode right before resuming userspace. No use of RCU
|
||||
* is permitted between this call and rcu_user_exit(). This way the
|
||||
* CPU doesn't need to maintain the tick for RCU maintenance purposes
|
||||
* when the CPU runs in userspace.
|
||||
*/
|
||||
void rcu_user_enter(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_dynticks *rdtp;
|
||||
|
||||
/*
|
||||
* Some contexts may involve an exception occuring in an irq,
|
||||
* leading to that nesting:
|
||||
* rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
|
||||
* This would mess up the dyntick_nesting count though. And rcu_irq_*()
|
||||
* helpers are enough to protect RCU uses inside the exception. So
|
||||
* just return immediately if we detect we are in an IRQ.
|
||||
*/
|
||||
if (in_interrupt())
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!current->mm);
|
||||
|
||||
local_irq_save(flags);
|
||||
rdtp = &__get_cpu_var(rcu_dynticks);
|
||||
if (!rdtp->ignore_user_qs && !rdtp->in_user) {
|
||||
rdtp->in_user = true;
|
||||
rcu_eqs_enter(true);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_user_enter_after_irq - inform RCU that we are going to resume userspace
|
||||
* after the current irq returns.
|
||||
*
|
||||
* This is similar to rcu_user_enter() but in the context of a non-nesting
|
||||
* irq. After this call, RCU enters into idle mode when the interrupt
|
||||
* returns.
|
||||
*/
|
||||
void rcu_user_enter_after_irq(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_dynticks *rdtp;
|
||||
|
||||
local_irq_save(flags);
|
||||
rdtp = &__get_cpu_var(rcu_dynticks);
|
||||
oldval = rdtp->dynticks_nesting;
|
||||
WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
|
||||
if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
|
||||
rdtp->dynticks_nesting = 0;
|
||||
else
|
||||
rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
|
||||
rcu_idle_enter_common(rdtp, oldval);
|
||||
/* Ensure this irq is interrupting a non-idle RCU state. */
|
||||
WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK));
|
||||
rdtp->dynticks_nesting = 1;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_idle_enter);
|
||||
#endif /* CONFIG_RCU_USER_QS */
|
||||
|
||||
/**
|
||||
* rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
|
||||
|
@ -420,18 +492,19 @@ void rcu_irq_exit(void)
|
|||
if (rdtp->dynticks_nesting)
|
||||
trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
|
||||
else
|
||||
rcu_idle_enter_common(rdtp, oldval);
|
||||
rcu_eqs_enter_common(rdtp, oldval, true);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
|
||||
* rcu_eqs_exit_common - current CPU moving away from extended quiescent state
|
||||
*
|
||||
* If the new value of the ->dynticks_nesting counter was previously zero,
|
||||
* we really have exited idle, and must do the appropriate accounting.
|
||||
* The caller must have disabled interrupts.
|
||||
*/
|
||||
static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
|
||||
static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
|
||||
int user)
|
||||
{
|
||||
smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
|
||||
atomic_inc(&rdtp->dynticks);
|
||||
|
@ -440,7 +513,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
|
|||
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
|
||||
rcu_cleanup_after_idle(smp_processor_id());
|
||||
trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
|
||||
if (!is_idle_task(current)) {
|
||||
if (!user && !is_idle_task(current)) {
|
||||
struct task_struct *idle = idle_task(smp_processor_id());
|
||||
|
||||
trace_rcu_dyntick("Error on exit: not idle task",
|
||||
|
@ -452,6 +525,25 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Exit an RCU extended quiescent state, which can be either the
|
||||
* idle loop or adaptive-tickless usermode execution.
|
||||
*/
|
||||
static void rcu_eqs_exit(bool user)
|
||||
{
|
||||
struct rcu_dynticks *rdtp;
|
||||
long long oldval;
|
||||
|
||||
rdtp = &__get_cpu_var(rcu_dynticks);
|
||||
oldval = rdtp->dynticks_nesting;
|
||||
WARN_ON_ONCE(oldval < 0);
|
||||
if (oldval & DYNTICK_TASK_NEST_MASK)
|
||||
rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
|
||||
else
|
||||
rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
|
||||
rcu_eqs_exit_common(rdtp, oldval, user);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_idle_exit - inform RCU that current CPU is leaving idle
|
||||
*
|
||||
|
@ -466,22 +558,68 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
|
|||
void rcu_idle_exit(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_dynticks *rdtp;
|
||||
long long oldval;
|
||||
|
||||
local_irq_save(flags);
|
||||
rdtp = &__get_cpu_var(rcu_dynticks);
|
||||
oldval = rdtp->dynticks_nesting;
|
||||
WARN_ON_ONCE(oldval < 0);
|
||||
if (oldval & DYNTICK_TASK_NEST_MASK)
|
||||
rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
|
||||
else
|
||||
rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
|
||||
rcu_idle_exit_common(rdtp, oldval);
|
||||
rcu_eqs_exit(false);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_idle_exit);
|
||||
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
/**
|
||||
* rcu_user_exit - inform RCU that we are exiting userspace.
|
||||
*
|
||||
* Exit RCU idle mode while entering the kernel because it can
|
||||
* run a RCU read side critical section anytime.
|
||||
*/
|
||||
void rcu_user_exit(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_dynticks *rdtp;
|
||||
|
||||
/*
|
||||
* Some contexts may involve an exception occuring in an irq,
|
||||
* leading to that nesting:
|
||||
* rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
|
||||
* This would mess up the dyntick_nesting count though. And rcu_irq_*()
|
||||
* helpers are enough to protect RCU uses inside the exception. So
|
||||
* just return immediately if we detect we are in an IRQ.
|
||||
*/
|
||||
if (in_interrupt())
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
rdtp = &__get_cpu_var(rcu_dynticks);
|
||||
if (rdtp->in_user) {
|
||||
rdtp->in_user = false;
|
||||
rcu_eqs_exit(true);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_user_exit_after_irq - inform RCU that we won't resume to userspace
|
||||
* idle mode after the current non-nesting irq returns.
|
||||
*
|
||||
* This is similar to rcu_user_exit() but in the context of an irq.
|
||||
* This is called when the irq has interrupted a userspace RCU idle mode
|
||||
* context. When the current non-nesting interrupt returns after this call,
|
||||
* the CPU won't restore the RCU idle mode.
|
||||
*/
|
||||
void rcu_user_exit_after_irq(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_dynticks *rdtp;
|
||||
|
||||
local_irq_save(flags);
|
||||
rdtp = &__get_cpu_var(rcu_dynticks);
|
||||
/* Ensure we are interrupting an RCU idle mode. */
|
||||
WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK);
|
||||
rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
#endif /* CONFIG_RCU_USER_QS */
|
||||
|
||||
/**
|
||||
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
|
||||
*
|
||||
|
@ -515,7 +653,7 @@ void rcu_irq_enter(void)
|
|||
if (oldval)
|
||||
trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
|
||||
else
|
||||
rcu_idle_exit_common(rdtp, oldval);
|
||||
rcu_eqs_exit_common(rdtp, oldval, true);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
@ -579,6 +717,21 @@ int rcu_is_cpu_idle(void)
|
|||
}
|
||||
EXPORT_SYMBOL(rcu_is_cpu_idle);
|
||||
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
void rcu_user_hooks_switch(struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
struct rcu_dynticks *rdtp;
|
||||
|
||||
/* Interrupts are disabled in context switch */
|
||||
rdtp = &__get_cpu_var(rcu_dynticks);
|
||||
if (!rdtp->ignore_user_qs) {
|
||||
clear_tsk_thread_flag(prev, TIF_NOHZ);
|
||||
set_tsk_thread_flag(next, TIF_NOHZ);
|
||||
}
|
||||
}
|
||||
#endif /* #ifdef CONFIG_RCU_USER_QS */
|
||||
|
||||
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
|
||||
|
||||
/*
|
||||
|
@ -2473,6 +2626,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
|
|||
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
|
||||
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
|
||||
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
WARN_ON_ONCE(rdp->dynticks->in_user);
|
||||
#endif
|
||||
rdp->cpu = cpu;
|
||||
rdp->rsp = rsp;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
|
|
|
@ -102,6 +102,10 @@ struct rcu_dynticks {
|
|||
/* idle-period nonlazy_posted snapshot. */
|
||||
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
|
||||
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
bool ignore_user_qs; /* Treat userspace as extended QS or not */
|
||||
bool in_user; /* Is the CPU in userland from RCU POV? */
|
||||
#endif
|
||||
};
|
||||
|
||||
/* RCU's kthread states for tracing. */
|
||||
|
|
|
@ -1757,6 +1757,26 @@ static void rcu_prepare_for_idle(int cpu)
|
|||
if (!tne)
|
||||
return;
|
||||
|
||||
/* Adaptive-tick mode, where usermode execution is idle to RCU. */
|
||||
if (!is_idle_task(current)) {
|
||||
rdtp->dyntick_holdoff = jiffies - 1;
|
||||
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
|
||||
trace_rcu_prep_idle("User dyntick with callbacks");
|
||||
rdtp->idle_gp_timer_expires =
|
||||
round_up(jiffies + RCU_IDLE_GP_DELAY,
|
||||
RCU_IDLE_GP_DELAY);
|
||||
} else if (rcu_cpu_has_callbacks(cpu)) {
|
||||
rdtp->idle_gp_timer_expires =
|
||||
round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
|
||||
trace_rcu_prep_idle("User dyntick with lazy callbacks");
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
tp = &rdtp->idle_gp_timer;
|
||||
mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is an idle re-entry, for example, due to use of
|
||||
* RCU_NONIDLE() or the new idle-loop tracing API within the idle
|
||||
|
|
|
@ -2081,6 +2081,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
|||
#endif
|
||||
|
||||
/* Here we just switch the register state and the stack. */
|
||||
rcu_switch(prev, next);
|
||||
switch_to(prev, next, prev);
|
||||
|
||||
barrier();
|
||||
|
@ -3468,6 +3469,21 @@ asmlinkage void __sched schedule(void)
|
|||
}
|
||||
EXPORT_SYMBOL(schedule);
|
||||
|
||||
#ifdef CONFIG_RCU_USER_QS
|
||||
asmlinkage void __sched schedule_user(void)
|
||||
{
|
||||
/*
|
||||
* If we come here after a random call to set_need_resched(),
|
||||
* or we have been woken up remotely but the IPI has not yet arrived,
|
||||
* we haven't yet exited the RCU idle mode. Do it here manually until
|
||||
* we find a better solution.
|
||||
*/
|
||||
rcu_user_exit();
|
||||
schedule();
|
||||
rcu_user_enter();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* schedule_preempt_disabled - called with preemption disabled
|
||||
*
|
||||
|
@ -3569,6 +3585,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
|
|||
/* Catch callers which need to be fixed */
|
||||
BUG_ON(ti->preempt_count || !irqs_disabled());
|
||||
|
||||
rcu_user_exit();
|
||||
do {
|
||||
add_preempt_count(PREEMPT_ACTIVE);
|
||||
local_irq_enable();
|
||||
|
|
Загрузка…
Ссылка в новой задаче