diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 72ba21a8b5fc..da5e96756570 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft); static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif +/* + * Must be run with preemption disabled: this clears the fpu_owner_task, + * on this CPU. + * + * This will disable any lazy FPU state restore of the current FPU state, + * but if the current thread owns the FPU, it will still be saved by. + */ +static inline void __cpu_disable_lazy_restore(unsigned int cpu) +{ + per_cpu(fpu_owner_task, cpu) = NULL; +} + +/* + * Used to indicate that the FPU state in memory is newer than the FPU + * state in registers, and the FPU state should be reloaded next time the + * task is run. Only safe on the current task, or non-running tasks. + */ +static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk) +{ + tsk->thread.fpu.last_cpu = ~0; +} + +static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) +{ + return new == this_cpu_read_stable(fpu_owner_task) && + cpu == new->thread.fpu.last_cpu; +} + static inline int is_ia32_compat_frame(void) { return config_enabled(CONFIG_IA32_EMULATION) && @@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void) static inline void fx_finit(struct i387_fxsave_struct *fx) { - memset(fx, 0, xstate_size); fx->cwd = 0x37f; fx->mxcsr = MXCSR_DEFAULT; } @@ -351,17 +378,6 @@ static inline void __thread_fpu_begin(struct task_struct *tsk) __thread_set_has_fpu(tsk); } -static inline void __drop_fpu(struct task_struct *tsk) -{ - if (__thread_has_fpu(tsk)) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(tsk); - } -} - static inline void drop_fpu(struct task_struct *tsk) { /* @@ -369,21 +385,37 @@ static inline void drop_fpu(struct task_struct *tsk) */ preempt_disable(); tsk->thread.fpu_counter = 0; - __drop_fpu(tsk); + + if (__thread_has_fpu(tsk)) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(tsk); + } + clear_stopped_child_used_math(tsk); preempt_enable(); } -static inline void drop_init_fpu(struct task_struct *tsk) +static inline void restore_init_xstate(void) +{ + if (use_xsave()) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); +} + +/* + * Reset the FPU state in the eager case and drop it in the lazy case (later use + * will reinit it). + */ +static inline void fpu_reset_state(struct task_struct *tsk) { if (!use_eager_fpu()) drop_fpu(tsk); - else { - if (use_xsave()) - xrstor_state(init_xstate_buf, -1); - else - fxrstor_checking(&init_xstate_buf->i387); - } + else + restore_init_xstate(); } /* @@ -400,24 +432,6 @@ static inline void drop_init_fpu(struct task_struct *tsk) */ typedef struct { int preload; } fpu_switch_t; -/* - * Must be run with preemption disabled: this clears the fpu_owner_task, - * on this CPU. - * - * This will disable any lazy FPU state restore of the current FPU state, - * but if the current thread owns the FPU, it will still be saved by. - */ -static inline void __cpu_disable_lazy_restore(unsigned int cpu) -{ - per_cpu(fpu_owner_task, cpu) = NULL; -} - -static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) -{ - return new == this_cpu_read_stable(fpu_owner_task) && - cpu == new->thread.fpu.last_cpu; -} - static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) { fpu_switch_t fpu; @@ -426,13 +440,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta * If the task has used the math, pre-load the FPU on xsave processors * or if the past 5 consecutive context-switches used math. */ - fpu.preload = tsk_used_math(new) && (use_eager_fpu() || - new->thread.fpu_counter > 5); + fpu.preload = tsk_used_math(new) && + (use_eager_fpu() || new->thread.fpu_counter > 5); + if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) - cpu = ~0; - old->thread.fpu.last_cpu = cpu; - old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ + task_disable_lazy_fpu_restore(old); + else + old->thread.fpu.last_cpu = cpu; + + /* But leave fpu_owner_task! */ + old->thread.fpu.has_fpu = 0; /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { @@ -443,10 +461,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta stts(); } else { old->thread.fpu_counter = 0; - old->thread.fpu.last_cpu = ~0; + task_disable_lazy_fpu_restore(old); if (fpu.preload) { new->thread.fpu_counter++; - if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) + if (fpu_lazy_restore(new, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state); @@ -466,7 +484,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) { if (fpu.preload) { if (unlikely(restore_fpu_checking(new))) - drop_init_fpu(new); + fpu_reset_state(new); } } @@ -495,10 +513,12 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame) } /* - * Need to be preemption-safe. + * Needs to be preemption-safe. * * NOTE! user_fpu_begin() must be used only immediately before restoring - * it. This function does not do any save/restore on their own. + * the save state. It does not do any saving/restoring on its own. In + * lazy FPU mode, it is just an optimization to avoid a #NM exception, + * the task can lose the FPU right after preempt_enable(). */ static inline void user_fpu_begin(void) { @@ -519,24 +539,6 @@ static inline void __save_fpu(struct task_struct *tsk) fpu_fxsave(&tsk->thread.fpu); } -/* - * These disable preemption on their own and are safe - */ -static inline void save_init_fpu(struct task_struct *tsk) -{ - WARN_ON_ONCE(!__thread_has_fpu(tsk)); - - if (use_eager_fpu()) { - __save_fpu(tsk); - return; - } - - preempt_disable(); - __save_init_fpu(tsk); - __thread_fpu_end(tsk); - preempt_enable(); -} - /* * i387 state interaction */ diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 29c740deafec..367f39d35e9c 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -42,8 +42,8 @@ void kernel_fpu_enable(void) * be set (so that the clts/stts pair does nothing that is * visible in the interrupted kernel thread). * - * Except for the eagerfpu case when we return 1 unless we've already - * been eager and saved the state in kernel_fpu_begin(). + * Except for the eagerfpu case when we return true; in the likely case + * the thread has FPU but we are not going to set/clear TS. */ static inline bool interrupted_kernel_fpu_idle(void) { @@ -51,7 +51,7 @@ static inline bool interrupted_kernel_fpu_idle(void) return false; if (use_eager_fpu()) - return __thread_has_fpu(current); + return true; return !__thread_has_fpu(current) && (read_cr0() & X86_CR0_TS); @@ -94,9 +94,10 @@ void __kernel_fpu_begin(void) if (__thread_has_fpu(me)) { __save_init_fpu(me); - } else if (!use_eager_fpu()) { + } else { this_cpu_write(fpu_owner_task, NULL); - clts(); + if (!use_eager_fpu()) + clts(); } } EXPORT_SYMBOL(__kernel_fpu_begin); @@ -107,7 +108,7 @@ void __kernel_fpu_end(void) if (__thread_has_fpu(me)) { if (WARN_ON(restore_fpu_checking(me))) - drop_init_fpu(me); + fpu_reset_state(me); } else if (!use_eager_fpu()) { stts(); } @@ -120,10 +121,13 @@ void unlazy_fpu(struct task_struct *tsk) { preempt_disable(); if (__thread_has_fpu(tsk)) { - __save_init_fpu(tsk); - __thread_fpu_end(tsk); - } else - tsk->thread.fpu_counter = 0; + if (use_eager_fpu()) { + __save_fpu(tsk); + } else { + __save_init_fpu(tsk); + __thread_fpu_end(tsk); + } + } preempt_enable(); } EXPORT_SYMBOL(unlazy_fpu); @@ -221,11 +225,12 @@ void fpu_finit(struct fpu *fpu) return; } + memset(fpu->state, 0, xstate_size); + if (cpu_has_fxsr) { fx_finit(&fpu->state->fxsave); } else { struct i387_fsave_struct *fp = &fpu->state->fsave; - memset(fp, 0, xstate_size); fp->cwd = 0xffff037fu; fp->swd = 0xffff0000u; fp->twd = 0xffffffffu; @@ -247,7 +252,7 @@ int init_fpu(struct task_struct *tsk) if (tsk_used_math(tsk)) { if (cpu_has_fpu && tsk == current) unlazy_fpu(tsk); - tsk->thread.fpu.last_cpu = ~0; + task_disable_lazy_fpu_restore(tsk); return 0; } @@ -336,6 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + struct xsave_struct *xsave = &target->thread.fpu.state->xsave; int ret; if (!cpu_has_xsave) @@ -350,14 +356,12 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, * memory layout in the thread struct, so that we can copy the entire * xstateregs to the user using one user_regset_copyout(). */ - memcpy(&target->thread.fpu.state->fxsave.sw_reserved, - xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); - + memcpy(&xsave->i387.sw_reserved, + xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); /* * Copy the xstate memory layout. */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->xsave, 0, -1); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); return ret; } @@ -365,8 +369,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + struct xsave_struct *xsave = &target->thread.fpu.state->xsave; int ret; - struct xsave_hdr_struct *xsave_hdr; if (!cpu_has_xsave) return -ENODEV; @@ -375,22 +379,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->xsave, 0, -1); - + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); /* * mxcsr reserved bits must be masked to zero for security reasons. */ - target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; - - xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr; - - xsave_hdr->xstate_bv &= pcntxt_mask; + xsave->i387.mxcsr &= mxcsr_feature_mask; + xsave->xsave_hdr.xstate_bv &= pcntxt_mask; /* * These bits must be zero. */ - memset(xsave_hdr->reserved, 0, 48); - + memset(&xsave->xsave_hdr.reserved, 0, 48); return ret; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0c8992dbead5..8213da62b1b7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -89,8 +89,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.fpu_counter = 0; dst->thread.fpu.has_fpu = 0; - dst->thread.fpu.last_cpu = ~0; dst->thread.fpu.state = NULL; + task_disable_lazy_fpu_restore(dst); if (tsk_used_math(src)) { int err = fpu_alloc(&dst->thread.fpu); if (err) @@ -151,13 +151,18 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - drop_init_fpu(tsk); - /* - * Free the FPU state for non xsave platforms. They get reallocated - * lazily at the first use. - */ - if (!use_eager_fpu()) + + if (!use_eager_fpu()) { + /* FPU state will be reallocated lazily at the first use. */ + drop_fpu(tsk); free_thread_xstate(tsk); + } else if (!used_math()) { + /* kthread execs. TODO: cleanup this horror. */ + if (WARN_ON(init_fpu(tsk))) + force_sig(SIGKILL, tsk); + user_fpu_begin(); + restore_init_xstate(); + } } static void hard_disable_TSC(void) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 53cc4085c3d7..3e581865c8e2 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -680,7 +680,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * Ensure the signal handler starts with the new fpu state. */ if (used_math()) - drop_init_fpu(current); + fpu_reset_state(current); } signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6751c5c58eec..f4fa991406cd 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -731,7 +731,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) /* * Save the info for the exception handler and clear the error. */ - save_init_fpu(task); + unlazy_fpu(task); task->thread.trap_nr = trapnr; task->thread.error_code = error_code; info.si_signo = SIGFPE; @@ -860,7 +860,7 @@ void math_state_restore(void) kernel_fpu_disable(); __thread_fpu_begin(tsk); if (unlikely(restore_fpu_checking(tsk))) { - drop_init_fpu(tsk); + fpu_reset_state(tsk); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); } else { tsk->thread.fpu_counter++; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index cdc6cf903078..87a815b85f3e 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -342,7 +342,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) config_enabled(CONFIG_IA32_EMULATION)); if (!buf) { - drop_init_fpu(tsk); + fpu_reset_state(tsk); return 0; } @@ -416,7 +416,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) */ user_fpu_begin(); if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { - drop_init_fpu(tsk); + fpu_reset_state(tsk); return -1; } } @@ -678,19 +678,13 @@ void xsave_init(void) this_func(); } -static inline void __init eager_fpu_init_bp(void) +/* + * setup_init_fpu_buf() is __init and it is OK to call it here because + * init_xstate_buf will be unset only once during boot. + */ +void __init_refok eager_fpu_init(void) { - current->thread.fpu.state = - alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); - if (!init_xstate_buf) - setup_init_fpu_buf(); -} - -void eager_fpu_init(void) -{ - static __refdata void (*boot_func)(void) = eager_fpu_init_bp; - - clear_used_math(); + WARN_ON(used_math()); current_thread_info()->status = 0; if (eagerfpu == ENABLE) @@ -701,21 +695,8 @@ void eager_fpu_init(void) return; } - if (boot_func) { - boot_func(); - boot_func = NULL; - } - - /* - * This is same as math_state_restore(). But use_xsave() is - * not yet patched to use math_state_restore(). - */ - init_fpu(current); - __thread_fpu_begin(current); - if (cpu_has_xsave) - xrstor_state(init_xstate_buf, -1); - else - fxrstor_checking(&init_xstate_buf->i387); + if (!init_xstate_buf) + setup_init_fpu_buf(); } /*