Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fpu changes from Ingo Molnar: "Various x86 FPU handling cleanups, refactorings and fixes (Borislav Petkov, Oleg Nesterov, Rik van Riel)" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) x86/fpu: Kill eager_fpu_init_bp() x86/fpu: Don't allocate fpu->state for swapper/0 x86/fpu: Rename drop_init_fpu() to fpu_reset_state() x86/fpu: Fold __drop_fpu() into its sole user x86/fpu: Don't abuse drop_init_fpu() in flush_thread() x86/fpu: Use restore_init_xstate() instead of math_state_restore() on kthread exec x86/fpu: Introduce restore_init_xstate() x86/fpu: Document user_fpu_begin() x86/fpu: Factor out memset(xstate, 0) in fpu_finit() paths x86/fpu: Change xstateregs_get()/set() to use ->xsave.i387 rather than ->fxsave x86/fpu: Don't abuse FPU in kernel threads if use_eager_fpu() x86/fpu: Always allow FPU in interrupt if use_eager_fpu() x86/fpu: __kernel_fpu_begin() should clear fpu_owner_task even if use_eager_fpu() x86/fpu: Also check fpu_lazy_restore() when use_eager_fpu() x86/fpu: Use task_disable_lazy_fpu_restore() helper x86/fpu: Use an explicit if/else in switch_fpu_prepare() x86/fpu: Introduce task_disable_lazy_fpu_restore() helper x86/fpu: Move lazy restore functions up a few lines x86/fpu: Change math_error() to use unlazy_fpu(), kill (now) unused save_init_fpu() x86/fpu: Don't do __thread_fpu_end() if use_eager_fpu() ...
This commit is contained in:
Коммит
421ec9017f
|
@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
|
|||
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Must be run with preemption disabled: this clears the fpu_owner_task,
|
||||
* on this CPU.
|
||||
*
|
||||
* This will disable any lazy FPU state restore of the current FPU state,
|
||||
* but if the current thread owns the FPU, it will still be saved by.
|
||||
*/
|
||||
static inline void __cpu_disable_lazy_restore(unsigned int cpu)
|
||||
{
|
||||
per_cpu(fpu_owner_task, cpu) = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Used to indicate that the FPU state in memory is newer than the FPU
|
||||
* state in registers, and the FPU state should be reloaded next time the
|
||||
* task is run. Only safe on the current task, or non-running tasks.
|
||||
*/
|
||||
static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
|
||||
{
|
||||
tsk->thread.fpu.last_cpu = ~0;
|
||||
}
|
||||
|
||||
static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
|
||||
{
|
||||
return new == this_cpu_read_stable(fpu_owner_task) &&
|
||||
cpu == new->thread.fpu.last_cpu;
|
||||
}
|
||||
|
||||
static inline int is_ia32_compat_frame(void)
|
||||
{
|
||||
return config_enabled(CONFIG_IA32_EMULATION) &&
|
||||
|
@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void)
|
|||
|
||||
static inline void fx_finit(struct i387_fxsave_struct *fx)
|
||||
{
|
||||
memset(fx, 0, xstate_size);
|
||||
fx->cwd = 0x37f;
|
||||
fx->mxcsr = MXCSR_DEFAULT;
|
||||
}
|
||||
|
@ -351,17 +378,6 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
|
|||
__thread_set_has_fpu(tsk);
|
||||
}
|
||||
|
||||
static inline void __drop_fpu(struct task_struct *tsk)
|
||||
{
|
||||
if (__thread_has_fpu(tsk)) {
|
||||
/* Ignore delayed exceptions from user space */
|
||||
asm volatile("1: fwait\n"
|
||||
"2:\n"
|
||||
_ASM_EXTABLE(1b, 2b));
|
||||
__thread_fpu_end(tsk);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void drop_fpu(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
|
@ -369,21 +385,37 @@ static inline void drop_fpu(struct task_struct *tsk)
|
|||
*/
|
||||
preempt_disable();
|
||||
tsk->thread.fpu_counter = 0;
|
||||
__drop_fpu(tsk);
|
||||
|
||||
if (__thread_has_fpu(tsk)) {
|
||||
/* Ignore delayed exceptions from user space */
|
||||
asm volatile("1: fwait\n"
|
||||
"2:\n"
|
||||
_ASM_EXTABLE(1b, 2b));
|
||||
__thread_fpu_end(tsk);
|
||||
}
|
||||
|
||||
clear_stopped_child_used_math(tsk);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void drop_init_fpu(struct task_struct *tsk)
|
||||
static inline void restore_init_xstate(void)
|
||||
{
|
||||
if (use_xsave())
|
||||
xrstor_state(init_xstate_buf, -1);
|
||||
else
|
||||
fxrstor_checking(&init_xstate_buf->i387);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset the FPU state in the eager case and drop it in the lazy case (later use
|
||||
* will reinit it).
|
||||
*/
|
||||
static inline void fpu_reset_state(struct task_struct *tsk)
|
||||
{
|
||||
if (!use_eager_fpu())
|
||||
drop_fpu(tsk);
|
||||
else {
|
||||
if (use_xsave())
|
||||
xrstor_state(init_xstate_buf, -1);
|
||||
else
|
||||
fxrstor_checking(&init_xstate_buf->i387);
|
||||
}
|
||||
else
|
||||
restore_init_xstate();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -400,24 +432,6 @@ static inline void drop_init_fpu(struct task_struct *tsk)
|
|||
*/
|
||||
typedef struct { int preload; } fpu_switch_t;
|
||||
|
||||
/*
|
||||
* Must be run with preemption disabled: this clears the fpu_owner_task,
|
||||
* on this CPU.
|
||||
*
|
||||
* This will disable any lazy FPU state restore of the current FPU state,
|
||||
* but if the current thread owns the FPU, it will still be saved by.
|
||||
*/
|
||||
static inline void __cpu_disable_lazy_restore(unsigned int cpu)
|
||||
{
|
||||
per_cpu(fpu_owner_task, cpu) = NULL;
|
||||
}
|
||||
|
||||
static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
|
||||
{
|
||||
return new == this_cpu_read_stable(fpu_owner_task) &&
|
||||
cpu == new->thread.fpu.last_cpu;
|
||||
}
|
||||
|
||||
static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
|
||||
{
|
||||
fpu_switch_t fpu;
|
||||
|
@ -426,13 +440,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
|
|||
* If the task has used the math, pre-load the FPU on xsave processors
|
||||
* or if the past 5 consecutive context-switches used math.
|
||||
*/
|
||||
fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
|
||||
new->thread.fpu_counter > 5);
|
||||
fpu.preload = tsk_used_math(new) &&
|
||||
(use_eager_fpu() || new->thread.fpu_counter > 5);
|
||||
|
||||
if (__thread_has_fpu(old)) {
|
||||
if (!__save_init_fpu(old))
|
||||
cpu = ~0;
|
||||
old->thread.fpu.last_cpu = cpu;
|
||||
old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
|
||||
task_disable_lazy_fpu_restore(old);
|
||||
else
|
||||
old->thread.fpu.last_cpu = cpu;
|
||||
|
||||
/* But leave fpu_owner_task! */
|
||||
old->thread.fpu.has_fpu = 0;
|
||||
|
||||
/* Don't change CR0.TS if we just switch! */
|
||||
if (fpu.preload) {
|
||||
|
@ -443,10 +461,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
|
|||
stts();
|
||||
} else {
|
||||
old->thread.fpu_counter = 0;
|
||||
old->thread.fpu.last_cpu = ~0;
|
||||
task_disable_lazy_fpu_restore(old);
|
||||
if (fpu.preload) {
|
||||
new->thread.fpu_counter++;
|
||||
if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
|
||||
if (fpu_lazy_restore(new, cpu))
|
||||
fpu.preload = 0;
|
||||
else
|
||||
prefetch(new->thread.fpu.state);
|
||||
|
@ -466,7 +484,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
|
|||
{
|
||||
if (fpu.preload) {
|
||||
if (unlikely(restore_fpu_checking(new)))
|
||||
drop_init_fpu(new);
|
||||
fpu_reset_state(new);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -495,10 +513,12 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
|
|||
}
|
||||
|
||||
/*
|
||||
* Need to be preemption-safe.
|
||||
* Needs to be preemption-safe.
|
||||
*
|
||||
* NOTE! user_fpu_begin() must be used only immediately before restoring
|
||||
* it. This function does not do any save/restore on their own.
|
||||
* the save state. It does not do any saving/restoring on its own. In
|
||||
* lazy FPU mode, it is just an optimization to avoid a #NM exception,
|
||||
* the task can lose the FPU right after preempt_enable().
|
||||
*/
|
||||
static inline void user_fpu_begin(void)
|
||||
{
|
||||
|
@ -519,24 +539,6 @@ static inline void __save_fpu(struct task_struct *tsk)
|
|||
fpu_fxsave(&tsk->thread.fpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* These disable preemption on their own and are safe
|
||||
*/
|
||||
static inline void save_init_fpu(struct task_struct *tsk)
|
||||
{
|
||||
WARN_ON_ONCE(!__thread_has_fpu(tsk));
|
||||
|
||||
if (use_eager_fpu()) {
|
||||
__save_fpu(tsk);
|
||||
return;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
__save_init_fpu(tsk);
|
||||
__thread_fpu_end(tsk);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* i387 state interaction
|
||||
*/
|
||||
|
|
|
@ -42,8 +42,8 @@ void kernel_fpu_enable(void)
|
|||
* be set (so that the clts/stts pair does nothing that is
|
||||
* visible in the interrupted kernel thread).
|
||||
*
|
||||
* Except for the eagerfpu case when we return 1 unless we've already
|
||||
* been eager and saved the state in kernel_fpu_begin().
|
||||
* Except for the eagerfpu case when we return true; in the likely case
|
||||
* the thread has FPU but we are not going to set/clear TS.
|
||||
*/
|
||||
static inline bool interrupted_kernel_fpu_idle(void)
|
||||
{
|
||||
|
@ -51,7 +51,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
|
|||
return false;
|
||||
|
||||
if (use_eager_fpu())
|
||||
return __thread_has_fpu(current);
|
||||
return true;
|
||||
|
||||
return !__thread_has_fpu(current) &&
|
||||
(read_cr0() & X86_CR0_TS);
|
||||
|
@ -94,9 +94,10 @@ void __kernel_fpu_begin(void)
|
|||
|
||||
if (__thread_has_fpu(me)) {
|
||||
__save_init_fpu(me);
|
||||
} else if (!use_eager_fpu()) {
|
||||
} else {
|
||||
this_cpu_write(fpu_owner_task, NULL);
|
||||
clts();
|
||||
if (!use_eager_fpu())
|
||||
clts();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__kernel_fpu_begin);
|
||||
|
@ -107,7 +108,7 @@ void __kernel_fpu_end(void)
|
|||
|
||||
if (__thread_has_fpu(me)) {
|
||||
if (WARN_ON(restore_fpu_checking(me)))
|
||||
drop_init_fpu(me);
|
||||
fpu_reset_state(me);
|
||||
} else if (!use_eager_fpu()) {
|
||||
stts();
|
||||
}
|
||||
|
@ -120,10 +121,13 @@ void unlazy_fpu(struct task_struct *tsk)
|
|||
{
|
||||
preempt_disable();
|
||||
if (__thread_has_fpu(tsk)) {
|
||||
__save_init_fpu(tsk);
|
||||
__thread_fpu_end(tsk);
|
||||
} else
|
||||
tsk->thread.fpu_counter = 0;
|
||||
if (use_eager_fpu()) {
|
||||
__save_fpu(tsk);
|
||||
} else {
|
||||
__save_init_fpu(tsk);
|
||||
__thread_fpu_end(tsk);
|
||||
}
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(unlazy_fpu);
|
||||
|
@ -221,11 +225,12 @@ void fpu_finit(struct fpu *fpu)
|
|||
return;
|
||||
}
|
||||
|
||||
memset(fpu->state, 0, xstate_size);
|
||||
|
||||
if (cpu_has_fxsr) {
|
||||
fx_finit(&fpu->state->fxsave);
|
||||
} else {
|
||||
struct i387_fsave_struct *fp = &fpu->state->fsave;
|
||||
memset(fp, 0, xstate_size);
|
||||
fp->cwd = 0xffff037fu;
|
||||
fp->swd = 0xffff0000u;
|
||||
fp->twd = 0xffffffffu;
|
||||
|
@ -247,7 +252,7 @@ int init_fpu(struct task_struct *tsk)
|
|||
if (tsk_used_math(tsk)) {
|
||||
if (cpu_has_fpu && tsk == current)
|
||||
unlazy_fpu(tsk);
|
||||
tsk->thread.fpu.last_cpu = ~0;
|
||||
task_disable_lazy_fpu_restore(tsk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -336,6 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
|
|||
unsigned int pos, unsigned int count,
|
||||
void *kbuf, void __user *ubuf)
|
||||
{
|
||||
struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
|
||||
int ret;
|
||||
|
||||
if (!cpu_has_xsave)
|
||||
|
@ -350,14 +356,12 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
|
|||
* memory layout in the thread struct, so that we can copy the entire
|
||||
* xstateregs to the user using one user_regset_copyout().
|
||||
*/
|
||||
memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
|
||||
xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
|
||||
|
||||
memcpy(&xsave->i387.sw_reserved,
|
||||
xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
|
||||
/*
|
||||
* Copy the xstate memory layout.
|
||||
*/
|
||||
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
|
||||
&target->thread.fpu.state->xsave, 0, -1);
|
||||
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -365,8 +369,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
|
|||
unsigned int pos, unsigned int count,
|
||||
const void *kbuf, const void __user *ubuf)
|
||||
{
|
||||
struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
|
||||
int ret;
|
||||
struct xsave_hdr_struct *xsave_hdr;
|
||||
|
||||
if (!cpu_has_xsave)
|
||||
return -ENODEV;
|
||||
|
@ -375,22 +379,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
|
||||
&target->thread.fpu.state->xsave, 0, -1);
|
||||
|
||||
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
|
||||
/*
|
||||
* mxcsr reserved bits must be masked to zero for security reasons.
|
||||
*/
|
||||
target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
|
||||
|
||||
xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
|
||||
|
||||
xsave_hdr->xstate_bv &= pcntxt_mask;
|
||||
xsave->i387.mxcsr &= mxcsr_feature_mask;
|
||||
xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
|
||||
/*
|
||||
* These bits must be zero.
|
||||
*/
|
||||
memset(xsave_hdr->reserved, 0, 48);
|
||||
|
||||
memset(&xsave->xsave_hdr.reserved, 0, 48);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -89,8 +89,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
|||
|
||||
dst->thread.fpu_counter = 0;
|
||||
dst->thread.fpu.has_fpu = 0;
|
||||
dst->thread.fpu.last_cpu = ~0;
|
||||
dst->thread.fpu.state = NULL;
|
||||
task_disable_lazy_fpu_restore(dst);
|
||||
if (tsk_used_math(src)) {
|
||||
int err = fpu_alloc(&dst->thread.fpu);
|
||||
if (err)
|
||||
|
@ -151,13 +151,18 @@ void flush_thread(void)
|
|||
|
||||
flush_ptrace_hw_breakpoint(tsk);
|
||||
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
||||
drop_init_fpu(tsk);
|
||||
/*
|
||||
* Free the FPU state for non xsave platforms. They get reallocated
|
||||
* lazily at the first use.
|
||||
*/
|
||||
if (!use_eager_fpu())
|
||||
|
||||
if (!use_eager_fpu()) {
|
||||
/* FPU state will be reallocated lazily at the first use. */
|
||||
drop_fpu(tsk);
|
||||
free_thread_xstate(tsk);
|
||||
} else if (!used_math()) {
|
||||
/* kthread execs. TODO: cleanup this horror. */
|
||||
if (WARN_ON(init_fpu(tsk)))
|
||||
force_sig(SIGKILL, tsk);
|
||||
user_fpu_begin();
|
||||
restore_init_xstate();
|
||||
}
|
||||
}
|
||||
|
||||
static void hard_disable_TSC(void)
|
||||
|
|
|
@ -680,7 +680,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
|||
* Ensure the signal handler starts with the new fpu state.
|
||||
*/
|
||||
if (used_math())
|
||||
drop_init_fpu(current);
|
||||
fpu_reset_state(current);
|
||||
}
|
||||
signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
|
||||
}
|
||||
|
|
|
@ -731,7 +731,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
|||
/*
|
||||
* Save the info for the exception handler and clear the error.
|
||||
*/
|
||||
save_init_fpu(task);
|
||||
unlazy_fpu(task);
|
||||
task->thread.trap_nr = trapnr;
|
||||
task->thread.error_code = error_code;
|
||||
info.si_signo = SIGFPE;
|
||||
|
@ -860,7 +860,7 @@ void math_state_restore(void)
|
|||
kernel_fpu_disable();
|
||||
__thread_fpu_begin(tsk);
|
||||
if (unlikely(restore_fpu_checking(tsk))) {
|
||||
drop_init_fpu(tsk);
|
||||
fpu_reset_state(tsk);
|
||||
force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
|
||||
} else {
|
||||
tsk->thread.fpu_counter++;
|
||||
|
|
|
@ -342,7 +342,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
|
|||
config_enabled(CONFIG_IA32_EMULATION));
|
||||
|
||||
if (!buf) {
|
||||
drop_init_fpu(tsk);
|
||||
fpu_reset_state(tsk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -416,7 +416,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
|
|||
*/
|
||||
user_fpu_begin();
|
||||
if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
|
||||
drop_init_fpu(tsk);
|
||||
fpu_reset_state(tsk);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -678,19 +678,13 @@ void xsave_init(void)
|
|||
this_func();
|
||||
}
|
||||
|
||||
static inline void __init eager_fpu_init_bp(void)
|
||||
/*
|
||||
* setup_init_fpu_buf() is __init and it is OK to call it here because
|
||||
* init_xstate_buf will be unset only once during boot.
|
||||
*/
|
||||
void __init_refok eager_fpu_init(void)
|
||||
{
|
||||
current->thread.fpu.state =
|
||||
alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
|
||||
if (!init_xstate_buf)
|
||||
setup_init_fpu_buf();
|
||||
}
|
||||
|
||||
void eager_fpu_init(void)
|
||||
{
|
||||
static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
|
||||
|
||||
clear_used_math();
|
||||
WARN_ON(used_math());
|
||||
current_thread_info()->status = 0;
|
||||
|
||||
if (eagerfpu == ENABLE)
|
||||
|
@ -701,21 +695,8 @@ void eager_fpu_init(void)
|
|||
return;
|
||||
}
|
||||
|
||||
if (boot_func) {
|
||||
boot_func();
|
||||
boot_func = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is same as math_state_restore(). But use_xsave() is
|
||||
* not yet patched to use math_state_restore().
|
||||
*/
|
||||
init_fpu(current);
|
||||
__thread_fpu_begin(current);
|
||||
if (cpu_has_xsave)
|
||||
xrstor_state(init_xstate_buf, -1);
|
||||
else
|
||||
fxrstor_checking(&init_xstate_buf->i387);
|
||||
if (!init_xstate_buf)
|
||||
setup_init_fpu_buf();
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Загрузка…
Ссылка в новой задаче