2005-04-17 02:20:36 +04:00
|
|
|
/* thread_info.h: i386 low-level thread information
|
|
|
|
*
|
|
|
|
* Copyright (C) 2002 David Howells (dhowells@redhat.com)
|
|
|
|
* - Incorporating suggestions made by Linus Torvalds and Dave Miller
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _ASM_THREAD_INFO_H
|
|
|
|
#define _ASM_THREAD_INFO_H
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <asm/page.h>
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* low level task data that entry.S needs immediate access to
|
|
|
|
* - this struct should fit entirely inside of one cache line
|
|
|
|
* - this struct shares the supervisor stack pages
|
|
|
|
* - if the contents of this structure are changed, the assembly constants must also be changed
|
|
|
|
*/
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
|
|
struct thread_info {
|
|
|
|
struct task_struct *task; /* main task structure */
|
|
|
|
struct exec_domain *exec_domain; /* execution domain */
|
|
|
|
unsigned long flags; /* low level flags */
|
|
|
|
unsigned long status; /* thread-synchronous flags */
|
|
|
|
__u32 cpu; /* current CPU */
|
2005-06-23 11:09:07 +04:00
|
|
|
int preempt_count; /* 0 => preemptable, <0 => BUG */
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
|
|
|
|
mm_segment_t addr_limit; /* thread address space:
|
|
|
|
0-0xBFFFFFFF for user-thead
|
|
|
|
0-0xFFFFFFFF for kernel-thread
|
|
|
|
*/
|
[PATCH] vdso: randomize the i386 vDSO by moving it into a vma
Move the i386 VDSO down into a vma and thus randomize it.
Besides the security implications, this feature also helps debuggers, which
can COW a vma-backed VDSO just like a normal DSO and can thus do
single-stepping and other debugging features.
It's good for hypervisors (Xen, VMWare) too, which typically live in the same
high-mapped address space as the VDSO, hence whenever the VDSO is used, they
get lots of guest pagefaults and have to fix such guest accesses up - which
slows things down instead of speeding things up (the primary purpose of the
VDSO).
There's a new CONFIG_COMPAT_VDSO (default=y) option, which provides support
for older glibcs that still rely on a prelinked high-mapped VDSO. Newer
distributions (using glibc 2.3.3 or later) can turn this option off. Turning
it off is also recommended for security reasons: attackers cannot use the
predictable high-mapped VDSO page as syscall trampoline anymore.
There is a new vdso=[0|1] boot option as well, and a runtime
/proc/sys/vm/vdso_enabled sysctl switch, that allows the VDSO to be turned
on/off.
(This version of the VDSO-randomization patch also has working ELF
coredumping, the previous patch crashed in the coredumping code.)
This code is a combined work of the exec-shield VDSO randomization
code and Gerd Hoffmann's hypervisor-centric VDSO patch. Rusty Russell
started this patch and i completed it.
[akpm@osdl.org: cleanups]
[akpm@osdl.org: compile fix]
[akpm@osdl.org: compile fix 2]
[akpm@osdl.org: compile fix 3]
[akpm@osdl.org: revernt MAXMEM change]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Cc: Gerd Hoffmann <kraxel@suse.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 13:53:50 +04:00
|
|
|
void *sysenter_return;
|
2005-04-17 02:20:36 +04:00
|
|
|
struct restart_block restart_block;
|
|
|
|
|
|
|
|
unsigned long previous_esp; /* ESP of the previous stack in case
|
|
|
|
of nested (IRQ) stacks
|
|
|
|
*/
|
|
|
|
__u8 supervisor_stack[0];
|
|
|
|
};
|
|
|
|
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
|
|
|
2005-09-09 21:28:28 +04:00
|
|
|
#include <asm/asm-offsets.h>
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define PREEMPT_ACTIVE 0x10000000
|
|
|
|
#ifdef CONFIG_4KSTACKS
|
|
|
|
#define THREAD_SIZE (4096)
|
|
|
|
#else
|
|
|
|
#define THREAD_SIZE (8192)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define STACK_WARN (THREAD_SIZE/8)
|
|
|
|
/*
|
|
|
|
* macros/functions for gaining access to the thread information structure
|
|
|
|
*
|
|
|
|
* preempt_count needs to be 1 initially, until the scheduler is functional.
|
|
|
|
*/
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
|
|
#define INIT_THREAD_INFO(tsk) \
|
|
|
|
{ \
|
|
|
|
.task = &tsk, \
|
|
|
|
.exec_domain = &default_exec_domain, \
|
|
|
|
.flags = 0, \
|
|
|
|
.cpu = 0, \
|
|
|
|
.preempt_count = 1, \
|
|
|
|
.addr_limit = KERNEL_DS, \
|
|
|
|
.restart_block = { \
|
|
|
|
.fn = do_no_restart_syscall, \
|
|
|
|
}, \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define init_thread_info (init_thread_union.thread_info)
|
|
|
|
#define init_stack (init_thread_union.stack)
|
|
|
|
|
|
|
|
|
2006-06-27 13:53:47 +04:00
|
|
|
/* how to get the current stack pointer from C */
|
|
|
|
register unsigned long current_stack_pointer asm("esp") __attribute_used__;
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/* how to get the thread information struct from C */
|
|
|
|
static inline struct thread_info *current_thread_info(void)
|
|
|
|
{
|
2006-06-27 13:53:47 +04:00
|
|
|
return (struct thread_info *)(current_stack_pointer & ~(THREAD_SIZE - 1));
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* thread information allocation */
|
|
|
|
#ifdef CONFIG_DEBUG_STACK_USAGE
|
|
|
|
#define alloc_thread_info(tsk) \
|
|
|
|
({ \
|
|
|
|
struct thread_info *ret; \
|
|
|
|
\
|
|
|
|
ret = kmalloc(THREAD_SIZE, GFP_KERNEL); \
|
|
|
|
if (ret) \
|
|
|
|
memset(ret, 0, THREAD_SIZE); \
|
|
|
|
ret; \
|
|
|
|
})
|
|
|
|
#else
|
|
|
|
#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define free_thread_info(info) kfree(info)
|
|
|
|
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
|
|
|
|
|
|
/* how to get the thread information struct from ASM */
|
|
|
|
#define GET_THREAD_INFO(reg) \
|
|
|
|
movl $-THREAD_SIZE, reg; \
|
|
|
|
andl %esp, reg
|
|
|
|
|
|
|
|
/* use this one if reg already contains %esp */
|
|
|
|
#define GET_THREAD_INFO_WITH_ESP(reg) \
|
|
|
|
andl $-THREAD_SIZE, reg
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* thread information flags
|
|
|
|
* - these are process state flags that various assembly files may need to access
|
|
|
|
* - pending work-to-be-done flags are in LSW
|
|
|
|
* - other flags in MSW
|
|
|
|
*/
|
|
|
|
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
|
|
|
|
#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */
|
|
|
|
#define TIF_SIGPENDING 2 /* signal pending */
|
|
|
|
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
|
|
|
|
#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
|
|
|
|
#define TIF_IRET 5 /* return with iret */
|
[PATCH] UML Support - Ptrace: adds the host SYSEMU support, for UML and general usage
Jeff Dike <jdike@addtoit.com>,
Paolo 'Blaisorblade' Giarrusso <blaisorblade_spam@yahoo.it>,
Bodo Stroesser <bstroesser@fujitsu-siemens.com>
Adds a new ptrace(2) mode, called PTRACE_SYSEMU, resembling PTRACE_SYSCALL
except that the kernel does not execute the requested syscall; this is useful
to improve performance for virtual environments, like UML, which want to run
the syscall on their own.
In fact, using PTRACE_SYSCALL means stopping child execution twice, on entry
and on exit, and each time you also have two context switches; with SYSEMU you
avoid the 2nd stop and so save two context switches per syscall.
Also, some architectures don't have support in the host for changing the
syscall number via ptrace(), which is currently needed to skip syscall
execution (UML turns any syscall into getpid() to avoid it being executed on
the host). Fixing that is hard, while SYSEMU is easier to implement.
* This version of the patch includes some suggestions of Jeff Dike to avoid
adding any instructions to the syscall fast path, plus some other little
changes, by myself, to make it work even when the syscall is executed with
SYSENTER (but I'm unsure about them). It has been widely tested for quite a
lot of time.
* Various fixed were included to handle the various switches between
various states, i.e. when for instance a syscall entry is traced with one of
PT_SYSCALL / _SYSEMU / _SINGLESTEP and another one is used on exit.
Basically, this is done by remembering which one of them was used even after
the call to ptrace_notify().
* We're combining TIF_SYSCALL_EMU with TIF_SYSCALL_TRACE or TIF_SINGLESTEP
to make do_syscall_trace() notice that the current syscall was started with
SYSEMU on entry, so that no notification ought to be done in the exit path;
this is a bit of a hack, so this problem is solved in another way in next
patches.
* Also, the effects of the patch:
"Ptrace - i386: fix Syscall Audit interaction with singlestep"
are cancelled; they are restored back in the last patch of this series.
Detailed descriptions of the patches doing this kind of processing follow (but
I've already summed everything up).
* Fix behaviour when changing interception kind #1.
In do_syscall_trace(), we check the status of the TIF_SYSCALL_EMU flag
only after doing the debugger notification; but the debugger might have
changed the status of this flag because he continued execution with
PTRACE_SYSCALL, so this is wrong. This patch fixes it by saving the flag
status before calling ptrace_notify().
* Fix behaviour when changing interception kind #2:
avoid intercepting syscall on return when using SYSCALL again.
A guest process switching from using PTRACE_SYSEMU to PTRACE_SYSCALL
crashes.
The problem is in arch/i386/kernel/entry.S. The current SYSEMU patch
inhibits the syscall-handler to be called, but does not prevent
do_syscall_trace() to be called after this for syscall completion
interception.
The appended patch fixes this. It reuses the flag TIF_SYSCALL_EMU to
remember "we come from PTRACE_SYSEMU and now are in PTRACE_SYSCALL", since
the flag is unused in the depicted situation.
* Fix behaviour when changing interception kind #3:
avoid intercepting syscall on return when using SINGLESTEP.
When testing 2.6.9 and the skas3.v6 patch, with my latest patch and had
problems with singlestepping on UML in SKAS with SYSEMU. It looped
receiving SIGTRAPs without moving forward. EIP of the traced process was
the same for all SIGTRAPs.
What's missing is to handle switching from PTRACE_SYSCALL_EMU to
PTRACE_SINGLESTEP in a way very similar to what is done for the change from
PTRACE_SYSCALL_EMU to PTRACE_SYSCALL_TRACE.
I.e., after calling ptrace(PTRACE_SYSEMU), on the return path, the debugger is
notified and then wake ups the process; the syscall is executed (or skipped,
when do_syscall_trace() returns 0, i.e. when using PTRACE_SYSEMU), and
do_syscall_trace() is called again. Since we are on the return path of a
SYSEMU'd syscall, if the wake up is performed through ptrace(PTRACE_SYSCALL),
we must still avoid notifying the parent of the syscall exit. Now, this
behaviour is extended even to resuming with PTRACE_SINGLESTEP.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-04 02:57:18 +04:00
|
|
|
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
|
2005-04-17 02:20:36 +04:00
|
|
|
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
|
|
|
|
#define TIF_SECCOMP 8 /* secure computing */
|
2006-01-19 04:44:00 +03:00
|
|
|
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
|
2006-06-26 15:59:11 +04:00
|
|
|
#define TIF_MEMDIE 16
|
2006-07-10 05:12:39 +04:00
|
|
|
#define TIF_DEBUG 17 /* uses debug registers */
|
|
|
|
#define TIF_IO_BITMAP 18 /* uses I/O bitmap */
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
|
|
|
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
|
|
|
|
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
|
|
|
|
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
|
|
|
|
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
|
|
|
|
#define _TIF_IRET (1<<TIF_IRET)
|
[PATCH] UML Support - Ptrace: adds the host SYSEMU support, for UML and general usage
Jeff Dike <jdike@addtoit.com>,
Paolo 'Blaisorblade' Giarrusso <blaisorblade_spam@yahoo.it>,
Bodo Stroesser <bstroesser@fujitsu-siemens.com>
Adds a new ptrace(2) mode, called PTRACE_SYSEMU, resembling PTRACE_SYSCALL
except that the kernel does not execute the requested syscall; this is useful
to improve performance for virtual environments, like UML, which want to run
the syscall on their own.
In fact, using PTRACE_SYSCALL means stopping child execution twice, on entry
and on exit, and each time you also have two context switches; with SYSEMU you
avoid the 2nd stop and so save two context switches per syscall.
Also, some architectures don't have support in the host for changing the
syscall number via ptrace(), which is currently needed to skip syscall
execution (UML turns any syscall into getpid() to avoid it being executed on
the host). Fixing that is hard, while SYSEMU is easier to implement.
* This version of the patch includes some suggestions of Jeff Dike to avoid
adding any instructions to the syscall fast path, plus some other little
changes, by myself, to make it work even when the syscall is executed with
SYSENTER (but I'm unsure about them). It has been widely tested for quite a
lot of time.
* Various fixed were included to handle the various switches between
various states, i.e. when for instance a syscall entry is traced with one of
PT_SYSCALL / _SYSEMU / _SINGLESTEP and another one is used on exit.
Basically, this is done by remembering which one of them was used even after
the call to ptrace_notify().
* We're combining TIF_SYSCALL_EMU with TIF_SYSCALL_TRACE or TIF_SINGLESTEP
to make do_syscall_trace() notice that the current syscall was started with
SYSEMU on entry, so that no notification ought to be done in the exit path;
this is a bit of a hack, so this problem is solved in another way in next
patches.
* Also, the effects of the patch:
"Ptrace - i386: fix Syscall Audit interaction with singlestep"
are cancelled; they are restored back in the last patch of this series.
Detailed descriptions of the patches doing this kind of processing follow (but
I've already summed everything up).
* Fix behaviour when changing interception kind #1.
In do_syscall_trace(), we check the status of the TIF_SYSCALL_EMU flag
only after doing the debugger notification; but the debugger might have
changed the status of this flag because he continued execution with
PTRACE_SYSCALL, so this is wrong. This patch fixes it by saving the flag
status before calling ptrace_notify().
* Fix behaviour when changing interception kind #2:
avoid intercepting syscall on return when using SYSCALL again.
A guest process switching from using PTRACE_SYSEMU to PTRACE_SYSCALL
crashes.
The problem is in arch/i386/kernel/entry.S. The current SYSEMU patch
inhibits the syscall-handler to be called, but does not prevent
do_syscall_trace() to be called after this for syscall completion
interception.
The appended patch fixes this. It reuses the flag TIF_SYSCALL_EMU to
remember "we come from PTRACE_SYSEMU and now are in PTRACE_SYSCALL", since
the flag is unused in the depicted situation.
* Fix behaviour when changing interception kind #3:
avoid intercepting syscall on return when using SINGLESTEP.
When testing 2.6.9 and the skas3.v6 patch, with my latest patch and had
problems with singlestepping on UML in SKAS with SYSEMU. It looped
receiving SIGTRAPs without moving forward. EIP of the traced process was
the same for all SIGTRAPs.
What's missing is to handle switching from PTRACE_SYSCALL_EMU to
PTRACE_SINGLESTEP in a way very similar to what is done for the change from
PTRACE_SYSCALL_EMU to PTRACE_SYSCALL_TRACE.
I.e., after calling ptrace(PTRACE_SYSEMU), on the return path, the debugger is
notified and then wake ups the process; the syscall is executed (or skipped,
when do_syscall_trace() returns 0, i.e. when using PTRACE_SYSEMU), and
do_syscall_trace() is called again. Since we are on the return path of a
SYSEMU'd syscall, if the wake up is performed through ptrace(PTRACE_SYSCALL),
we must still avoid notifying the parent of the syscall exit. Now, this
behaviour is extended even to resuming with PTRACE_SINGLESTEP.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-04 02:57:18 +04:00
|
|
|
#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU)
|
2005-04-17 02:20:36 +04:00
|
|
|
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
|
|
|
|
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
|
2006-01-19 04:44:00 +03:00
|
|
|
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
|
2006-07-10 05:12:39 +04:00
|
|
|
#define _TIF_DEBUG (1<<TIF_DEBUG)
|
|
|
|
#define _TIF_IO_BITMAP (1<<TIF_IO_BITMAP)
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
/* work to do on interrupt/exception return */
|
|
|
|
#define _TIF_WORK_MASK \
|
2006-02-17 11:16:55 +03:00
|
|
|
(0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
|
|
|
|
_TIF_SECCOMP | _TIF_SYSCALL_EMU))
|
2005-04-17 02:20:36 +04:00
|
|
|
/* work to do on any return to u-space */
|
|
|
|
#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
|
|
|
|
|
2006-07-10 05:12:39 +04:00
|
|
|
/* flags to check in __switch_to() */
|
|
|
|
#define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP)
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* Thread-synchronous status.
|
|
|
|
*
|
|
|
|
* This is different from the flags in that nobody else
|
|
|
|
* ever touches our thread-synchronous status, so we don't
|
|
|
|
* have to worry about atomic accesses.
|
|
|
|
*/
|
|
|
|
#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */
|
2006-06-26 15:59:11 +04:00
|
|
|
#define TS_POLLING 0x0002 /* True if in idle loop and not sleeping */
|
|
|
|
|
|
|
|
#define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING)
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
|
|
|
|
#endif /* _ASM_THREAD_INFO_H */
|